From 9cf281edfa50436ac7d39cd90c04ee086c5b5468 Mon Sep 17 00:00:00 2001 From: mlaier Date: Thu, 17 Jun 2004 16:52:25 +0000 Subject: [PATCH 1/7] Import OpenBSD stable branch. --- sys/contrib/pf/net/pf.c | 19 ++++++++----------- sys/contrib/pf/net/pf_ioctl.c | 6 +++--- sys/contrib/pf/net/pf_norm.c | 10 +++++----- 3 files changed, 16 insertions(+), 19 deletions(-) diff --git a/sys/contrib/pf/net/pf.c b/sys/contrib/pf/net/pf.c index 82113ecbe1c4..a633832c20cb 100644 --- a/sys/contrib/pf/net/pf.c +++ b/sys/contrib/pf/net/pf.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pf.c,v 1.433 2004/03/26 22:20:57 dhartmei Exp $ */ +/* $OpenBSD: pf.c,v 1.433.2.1 2004/04/30 21:46:33 brad Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -4898,17 +4898,14 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, if (ifp == NULL) goto bad; - mtag = m_tag_find(m0, PACKET_TAG_PF_ROUTED, NULL); - if (mtag == NULL) { - struct m_tag *mtag; + if (m_tag_find(m0, PACKET_TAG_PF_ROUTED, NULL) != NULL) + goto bad; + mtag = m_tag_get(PACKET_TAG_PF_ROUTED, 0, M_NOWAIT); + if (mtag == NULL) + goto bad; + m_tag_prepend(m0, mtag); - mtag = m_tag_get(PACKET_TAG_PF_ROUTED, 0, M_NOWAIT); - if (mtag == NULL) - goto bad; - m_tag_prepend(m0, mtag); - } - - if (oifp != ifp && mtag == NULL) { + if (oifp != ifp) { if (pf_test(PF_OUT, ifp, &m0) != PF_PASS) goto bad; else if (m0 == NULL) diff --git a/sys/contrib/pf/net/pf_ioctl.c b/sys/contrib/pf/net/pf_ioctl.c index dd25ce2babe3..58e454a06cc0 100644 --- a/sys/contrib/pf/net/pf_ioctl.c +++ b/sys/contrib/pf/net/pf_ioctl.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pf_ioctl.c,v 1.112 2004/03/22 04:54:18 mcbride Exp $ */ +/* $OpenBSD: pf_ioctl.c,v 1.112.2.1 2004/04/30 21:43:30 brad Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -81,7 +81,7 @@ void pfattach(int); int pfopen(dev_t, int, int, struct proc *); int pfclose(dev_t, int, int, struct proc *); struct pf_pool *pf_get_pool(char *, char *, u_int32_t, - u_int8_t, u_int8_t, u_int8_t, u_int8_t, u_int8_t); + u_int8_t, u_int32_t, u_int8_t, u_int8_t, u_int8_t); int pf_get_ruleset_number(u_int8_t); void pf_init_ruleset(struct pf_ruleset *); void pf_mv_pool(struct pf_palist *, struct pf_palist *); @@ -198,7 +198,7 @@ pfclose(dev_t dev, int flags, int fmt, struct proc *p) struct pf_pool * pf_get_pool(char *anchorname, char *rulesetname, u_int32_t ticket, - u_int8_t rule_action, u_int8_t rule_number, u_int8_t r_last, + u_int8_t rule_action, u_int32_t rule_number, u_int8_t r_last, u_int8_t active, u_int8_t check_ticket) { struct pf_ruleset *ruleset; diff --git a/sys/contrib/pf/net/pf_norm.c b/sys/contrib/pf/net/pf_norm.c index d2c6456d6537..ec56dce9e3f4 100644 --- a/sys/contrib/pf/net/pf_norm.c +++ b/sys/contrib/pf/net/pf_norm.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pf_norm.c,v 1.80 2004/03/09 21:44:41 mcbride Exp $ */ +/* $OpenBSD: pf_norm.c,v 1.80.2.1 2004/04/30 21:46:33 brad Exp $ */ /* * Copyright 2001 Niels Provos @@ -1362,8 +1362,8 @@ pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd, } /* FALLTHROUGH */ default: - hlen -= opt[1]; - opt += opt[1]; + hlen -= MAX(opt[1], 2); + opt += MAX(opt[1], 2); break; } } @@ -1473,8 +1473,8 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, } /* FALLTHROUGH */ default: - hlen -= opt[1]; - opt += opt[1]; + hlen -= MAX(opt[1], 2); + opt += MAX(opt[1], 2); break; } } From d86bbc28a541e6cc611f3f6e8efe537eebf24e48 Mon Sep 17 00:00:00 2001 From: mlaier Date: Sat, 17 Jul 2004 17:06:41 +0000 Subject: [PATCH 2/7] Import from OpenBSD stable branch --- sys/contrib/pf/net/pf.c | 95 +++++++++++++++++++++++------------------ 1 file changed, 53 insertions(+), 42 deletions(-) diff --git a/sys/contrib/pf/net/pf.c b/sys/contrib/pf/net/pf.c index a633832c20cb..8d6c01d2d942 100644 --- a/sys/contrib/pf/net/pf.c +++ b/sys/contrib/pf/net/pf.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pf.c,v 1.433.2.1 2004/04/30 21:46:33 brad Exp $ */ +/* $OpenBSD: pf.c,v 1.433.2.2 2004/07/17 03:22:34 brad Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -4822,12 +4822,12 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, struct pf_state *s) { struct mbuf *m0, *m1; + struct m_tag *mtag; struct route iproute; struct route *ro; struct sockaddr_in *dst; struct ip *ip; struct ifnet *ifp = NULL; - struct m_tag *mtag; struct pf_addr naddr; struct pf_src_node *sn = NULL; int error = 0; @@ -4836,18 +4836,30 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, (dir != PF_IN && dir != PF_OUT) || oifp == NULL) panic("pf_route: invalid parameters"); - if (r->rt == PF_DUPTO) { - m0 = *m; - mtag = m_tag_find(m0, PACKET_TAG_PF_ROUTED, NULL); - if (mtag == NULL) { - mtag = m_tag_get(PACKET_TAG_PF_ROUTED, 0, M_NOWAIT); - if (mtag == NULL) - goto bad; - m_tag_prepend(m0, mtag); + if ((mtag = m_tag_find(*m, PACKET_TAG_PF_ROUTED, NULL)) == NULL) { + if ((mtag = m_tag_get(PACKET_TAG_PF_ROUTED, 1, M_NOWAIT)) == + NULL) { + m0 = *m; + *m = NULL; + goto bad; } - m0 = m_copym2(*m, 0, M_COPYALL, M_NOWAIT); - if (m0 == NULL) + *(char *)(mtag + 1) = 1; + m_tag_prepend(*m, mtag); + } else { + if (*(char *)(mtag + 1) > 3) { + m0 = *m; + *m = NULL; + goto bad; + } + (*(char *)(mtag + 1))++; + } + + if (r->rt == PF_DUPTO) { + if ((m0 = m_copym2(*m, 0, M_COPYALL, M_NOWAIT)) == NULL) return; + if ((mtag = m_tag_copy(mtag)) == NULL) + goto bad; + m_tag_prepend(m0, mtag); } else { if ((r->rt == PF_REPLYTO) == (r->direction == dir)) return; @@ -4894,17 +4906,9 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL; } } - if (ifp == NULL) goto bad; - if (m_tag_find(m0, PACKET_TAG_PF_ROUTED, NULL) != NULL) - goto bad; - mtag = m_tag_get(PACKET_TAG_PF_ROUTED, 0, M_NOWAIT); - if (mtag == NULL) - goto bad; - m_tag_prepend(m0, mtag); - if (oifp != ifp) { if (pf_test(PF_OUT, ifp, &m0) != PF_PASS) goto bad; @@ -5029,18 +5033,30 @@ pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, (dir != PF_IN && dir != PF_OUT) || oifp == NULL) panic("pf_route6: invalid parameters"); - if (r->rt == PF_DUPTO) { - m0 = *m; - mtag = m_tag_find(m0, PACKET_TAG_PF_ROUTED, NULL); - if (mtag == NULL) { - mtag = m_tag_get(PACKET_TAG_PF_ROUTED, 0, M_NOWAIT); - if (mtag == NULL) - goto bad; - m_tag_prepend(m0, mtag); + if ((mtag = m_tag_find(*m, PACKET_TAG_PF_ROUTED, NULL)) == NULL) { + if ((mtag = m_tag_get(PACKET_TAG_PF_ROUTED, 1, M_NOWAIT)) == + NULL) { + m0 = *m; + *m = NULL; + goto bad; } - m0 = m_copym2(*m, 0, M_COPYALL, M_NOWAIT); - if (m0 == NULL) + *(char *)(mtag + 1) = 1; + m_tag_prepend(*m, mtag); + } else { + if (*(char *)(mtag + 1) > 3) { + m0 = *m; + *m = NULL; + goto bad; + } + (*(char *)(mtag + 1))++; + } + + if (r->rt == PF_DUPTO) { + if ((m0 = m_copym2(*m, 0, M_COPYALL, M_NOWAIT)) == NULL) return; + if ((mtag = m_tag_copy(mtag)) == NULL) + goto bad; + m_tag_prepend(m0, mtag); } else { if ((r->rt == PF_REPLYTO) == (r->direction == dir)) return; @@ -5083,22 +5099,17 @@ pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, &s->rt_addr, AF_INET6); ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL; } - if (ifp == NULL) goto bad; if (oifp != ifp) { - mtag = m_tag_find(m0, PACKET_TAG_PF_ROUTED, NULL); - if (mtag == NULL) { - mtag = m_tag_get(PACKET_TAG_PF_ROUTED, 0, M_NOWAIT); - if (mtag == NULL) - goto bad; - m_tag_prepend(m0, mtag); - if (pf_test6(PF_OUT, ifp, &m0) != PF_PASS) - goto bad; - else if (m0 == NULL) - goto done; - } + if (pf_test6(PF_OUT, ifp, &m0) != PF_PASS) + goto bad; + else if (m0 == NULL) + goto done; + if (m0->m_len < sizeof(struct ip6_hdr)) + panic("pf_route6: m0->m_len < sizeof(struct ip6_hdr)"); + ip6 = mtod(m0, struct ip6_hdr *); } /* From 80669cee06df9d0b332229927208520eed0bd082 Mon Sep 17 00:00:00 2001 From: mlaier Date: Thu, 12 Aug 2004 13:46:21 +0000 Subject: [PATCH 3/7] Import a couple of fixes from OpenBSD-current, which did not make -stable in OpenBSD for various reasons. Discussed with: yongari --- sys/contrib/pf/net/pf.c | 3 +-- sys/contrib/pf/net/pf_if.c | 4 ++++ sys/contrib/pf/net/pf_norm.c | 14 +++++++++----- sys/contrib/pf/net/pfvar.h | 3 +++ 4 files changed, 17 insertions(+), 7 deletions(-) diff --git a/sys/contrib/pf/net/pf.c b/sys/contrib/pf/net/pf.c index 8d6c01d2d942..a2db3d48ebf7 100644 --- a/sys/contrib/pf/net/pf.c +++ b/sys/contrib/pf/net/pf.c @@ -1,4 +1,5 @@ /* $OpenBSD: pf.c,v 1.433.2.2 2004/07/17 03:22:34 brad Exp $ */ +/* add $OpenBSD: pf.c,v 1.448 2004/05/11 07:34:11 dhartmei Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -113,8 +114,6 @@ void pf_print_host(struct pf_addr *, u_int16_t, u_int8_t); void pf_print_state(struct pf_state *); void pf_print_flags(u_int8_t); -u_int16_t pf_cksum_fixup(u_int16_t, u_int16_t, u_int16_t, - u_int8_t); void pf_change_ap(struct pf_addr *, u_int16_t *, u_int16_t *, u_int16_t *, struct pf_addr *, u_int16_t, u_int8_t, sa_family_t); diff --git a/sys/contrib/pf/net/pf_if.c b/sys/contrib/pf/net/pf_if.c index cdc6e36962fd..a3d5f4175f01 100644 --- a/sys/contrib/pf/net/pf_if.c +++ b/sys/contrib/pf/net/pf_if.c @@ -1,4 +1,5 @@ /* $OpenBSD: pf_if.c,v 1.11 2004/03/15 11:38:23 cedric Exp $ */ +/* add $OpenBSD: pf_if.c,v 1.19 2004/08/11 12:06:44 henning Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -42,6 +43,7 @@ #include #include +#include #include #include @@ -410,6 +412,8 @@ pfi_instance_add(struct ifnet *ifp, int net, int flags) af = ia->ifa_addr->sa_family; if (af != AF_INET && af != AF_INET6) continue; + if (!(ia->ifa_flags & IFA_ROUTE)) + continue; if ((flags & PFI_AFLAG_BROADCAST) && af == AF_INET6) continue; if ((flags & PFI_AFLAG_BROADCAST) && diff --git a/sys/contrib/pf/net/pf_norm.c b/sys/contrib/pf/net/pf_norm.c index ec56dce9e3f4..3e5b40281db0 100644 --- a/sys/contrib/pf/net/pf_norm.c +++ b/sys/contrib/pf/net/pf_norm.c @@ -1,4 +1,5 @@ /* $OpenBSD: pf_norm.c,v 1.80.2.1 2004/04/30 21:46:33 brad Exp $ */ +/* add $OpenBSD: pf_norm.c,v 1.87 2004/05/11 07:34:11 dhartmei Exp $ */ /* * Copyright 2001 Niels Provos @@ -114,7 +115,6 @@ struct mbuf *pf_reassemble(struct mbuf **, struct pf_fragment **, struct pf_frent *, int); struct mbuf *pf_fragcache(struct mbuf **, struct ip*, struct pf_fragment **, int, int, int *); -u_int16_t pf_cksum_fixup(u_int16_t, u_int16_t, u_int16_t); int pf_normalize_tcpopt(struct pf_rule *, struct mbuf *, struct tcphdr *, int); @@ -976,8 +976,12 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason) if (r->min_ttl && h->ip_ttl < r->min_ttl) h->ip_ttl = r->min_ttl; - if (r->rule_flag & PFRULE_RANDOMID) + if (r->rule_flag & PFRULE_RANDOMID) { + u_int16_t ip_id = h->ip_id; + h->ip_id = ip_randomid(); + h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_id, h->ip_id, 0); + } return (PF_PASS); @@ -1273,13 +1277,13 @@ pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff, th->th_x2 = 0; nv = *(u_int16_t *)(&th->th_ack + 1); - th->th_sum = pf_cksum_fixup(th->th_sum, ov, nv); + th->th_sum = pf_cksum_fixup(th->th_sum, ov, nv, 0); rewrite = 1; } /* Remove urgent pointer, if TH_URG is not set */ if (!(flags & TH_URG) && th->th_urp) { - th->th_sum = pf_cksum_fixup(th->th_sum, th->th_urp, 0); + th->th_sum = pf_cksum_fixup(th->th_sum, th->th_urp, 0, 0); th->th_urp = 0; rewrite = 1; } @@ -1523,7 +1527,7 @@ pf_normalize_tcpopt(struct pf_rule *r, struct mbuf *m, struct tcphdr *th, mss = (u_int16_t *)(optp + 2); if ((ntohs(*mss)) > r->max_mss) { th->th_sum = pf_cksum_fixup(th->th_sum, - *mss, htons(r->max_mss)); + *mss, htons(r->max_mss), 0); *mss = htons(r->max_mss); rewrite = 1; } diff --git a/sys/contrib/pf/net/pfvar.h b/sys/contrib/pf/net/pfvar.h index d3dda46578e4..851d39591960 100644 --- a/sys/contrib/pf/net/pfvar.h +++ b/sys/contrib/pf/net/pfvar.h @@ -1,4 +1,5 @@ /* $OpenBSD: pfvar.h,v 1.187 2004/03/22 04:54:18 mcbride Exp $ */ +/* add $OpenBSD: pfvar.h,v 1.194 2004/05/11 07:34:11 dhartmei Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -1328,6 +1329,8 @@ extern struct pf_ruleset *pf_find_or_create_ruleset( char[PF_RULESET_NAME_SIZE]); extern void pf_remove_if_empty_ruleset( struct pf_ruleset *); +extern u_int16_t pf_cksum_fixup(u_int16_t, u_int16_t, u_int16_t, + u_int8_t); extern struct ifnet *sync_ifp; extern struct pf_rule pf_default_rule; From b1f9b6b2a2d5860b44ff6efac4bda999a1801fac Mon Sep 17 00:00:00 2001 From: mlaier Date: Thu, 12 Aug 2004 14:09:56 +0000 Subject: [PATCH 4/7] Import a fix from the OpenBSD-stable branch, that slipped by my previous import. Sorry. --- sys/contrib/pf/net/pf_ioctl.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sys/contrib/pf/net/pf_ioctl.c b/sys/contrib/pf/net/pf_ioctl.c index 58e454a06cc0..f6b7ee967dd9 100644 --- a/sys/contrib/pf/net/pf_ioctl.c +++ b/sys/contrib/pf/net/pf_ioctl.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pf_ioctl.c,v 1.112.2.1 2004/04/30 21:43:30 brad Exp $ */ +/* $OpenBSD: pf_ioctl.c,v 1.112.2.2 2004/07/24 18:28:12 brad Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -1236,7 +1236,8 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) while ((oldrule != NULL) && (oldrule->nr != pcr->nr)) oldrule = TAILQ_NEXT(oldrule, entries); if (oldrule == NULL) { - pf_rm_rule(NULL, newrule); + if (newrule != NULL) + pf_rm_rule(NULL, newrule); error = EINVAL; splx(s); break; From a0d335db265c273703bfe7e7a78575f766921ee7 Mon Sep 17 00:00:00 2001 From: mlaier Date: Tue, 3 May 2005 16:34:36 +0000 Subject: [PATCH 5/7] Import pf from OpenBSD 3.7 (OPENBSD_3_7 as of today) --- sys/contrib/pf/net/if_pflog.c | 8 +- sys/contrib/pf/net/if_pflog.h | 9 +- sys/contrib/pf/net/if_pfsync.c | 306 ++++++-- sys/contrib/pf/net/if_pfsync.h | 57 +- sys/contrib/pf/net/pf.c | 1215 ++++++++++++++++++++++---------- sys/contrib/pf/net/pf_if.c | 135 ++-- sys/contrib/pf/net/pf_ioctl.c | 918 ++++++++++++++---------- sys/contrib/pf/net/pf_norm.c | 371 +++++++++- sys/contrib/pf/net/pf_osfp.c | 6 +- sys/contrib/pf/net/pf_table.c | 250 +++++-- sys/contrib/pf/net/pfvar.h | 223 ++++-- 11 files changed, 2404 insertions(+), 1094 deletions(-) diff --git a/sys/contrib/pf/net/if_pflog.c b/sys/contrib/pf/net/if_pflog.c index 3b93226a439e..41e1e6564713 100644 --- a/sys/contrib/pf/net/if_pflog.c +++ b/sys/contrib/pf/net/if_pflog.c @@ -1,4 +1,4 @@ -/* $OpenBSD: if_pflog.c,v 1.11 2003/12/31 11:18:25 cedric Exp $ */ +/* $OpenBSD: if_pflog.c,v 1.12 2004/05/19 17:50:51 dhartmei Exp $ */ /* * The authors of this code are John Ioannidis (ji@tla.org), * Angelos D. Keromytis (kermit@csd.uch.gr) and @@ -197,11 +197,9 @@ pflog_packet(struct pfi_kif *kif, struct mbuf *m, sa_family_t af, u_int8_t dir, } else { hdr.rulenr = htonl(am->nr); hdr.subrulenr = htonl(rm->nr); - if (ruleset != NULL) - memcpy(hdr.ruleset, ruleset->name, + if (ruleset != NULL && ruleset->anchor != NULL) + strlcpy(hdr.ruleset, ruleset->anchor->name, sizeof(hdr.ruleset)); - - } hdr.dir = dir; diff --git a/sys/contrib/pf/net/if_pflog.h b/sys/contrib/pf/net/if_pflog.h index e4e603e37303..7a43b10c215e 100644 --- a/sys/contrib/pf/net/if_pflog.h +++ b/sys/contrib/pf/net/if_pflog.h @@ -1,4 +1,4 @@ -/* $OpenBSD: if_pflog.h,v 1.10 2004/03/19 04:52:04 frantzen Exp $ */ +/* $OpenBSD: if_pflog.h,v 1.11 2004/05/19 17:50:51 dhartmei Exp $ */ /* * Copyright 2001 Niels Provos * All rights reserved. @@ -31,10 +31,7 @@ struct pflog_softc { struct ifnet sc_if; /* the interface */ }; -/* XXX keep in sync with pfvar.h */ -#ifndef PF_RULESET_NAME_SIZE -#define PF_RULESET_NAME_SIZE 16 -#endif +#define PFLOG_RULESET_NAME_SIZE 16 struct pfloghdr { u_int8_t length; @@ -42,7 +39,7 @@ struct pfloghdr { u_int8_t action; u_int8_t reason; char ifname[IFNAMSIZ]; - char ruleset[PF_RULESET_NAME_SIZE]; + char ruleset[PFLOG_RULESET_NAME_SIZE]; u_int32_t rulenr; u_int32_t subrulenr; u_int8_t dir; diff --git a/sys/contrib/pf/net/if_pfsync.c b/sys/contrib/pf/net/if_pfsync.c index e4840ff32d97..d0f56d096a73 100644 --- a/sys/contrib/pf/net/if_pfsync.c +++ b/sys/contrib/pf/net/if_pfsync.c @@ -1,4 +1,4 @@ -/* $OpenBSD: if_pfsync.c,v 1.26 2004/03/28 18:14:20 mcbride Exp $ */ +/* $OpenBSD: if_pfsync.c,v 1.46 2005/02/20 15:58:38 mcbride Exp $ */ /* * Copyright (c) 2002 Michael Shalayeff @@ -37,11 +37,14 @@ #include #include #include +#include #include #include #include #include +#include +#include #ifdef INET #include @@ -58,6 +61,11 @@ #include #endif /* INET6 */ +#include "carp.h" +#if NCARP > 0 +extern int carp_suppress_preempt; +#endif + #include #include @@ -72,7 +80,6 @@ int pfsyncdebug; #endif struct pfsync_softc pfsyncif; -int pfsync_sync_ok; struct pfsyncstats pfsyncstats; void pfsyncattach(int); @@ -91,10 +98,8 @@ void pfsync_send_bus(struct pfsync_softc *, u_int8_t); void pfsync_bulk_update(void *); void pfsync_bulkfail(void *); +int pfsync_sync_ok; extern int ifqmaxlen; -extern struct timeval time; -extern struct timeval mono_time; -extern int hz; void pfsyncattach(int npfsync) @@ -108,6 +113,7 @@ pfsyncattach(int npfsync) pfsyncif.sc_statep.s = NULL; pfsyncif.sc_statep_net.s = NULL; pfsyncif.sc_maxupdates = 128; + pfsyncif.sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP; pfsyncif.sc_sendaddr.s_addr = INADDR_PFSYNC_GROUP; pfsyncif.sc_ureq_received = 0; pfsyncif.sc_ureq_sent = 0; @@ -193,6 +199,9 @@ pfsync_insert_net_state(struct pfsync_state *sp) st->rule.ptr = r; /* XXX get pointers to nat_rule and anchor */ + /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */ + r->states++; + /* fill in the rest of the state entry */ pf_state_host_ntoh(&sp->lan, &st->lan); pf_state_host_ntoh(&sp->gwy, &st->gwy); @@ -202,8 +211,8 @@ pfsync_insert_net_state(struct pfsync_state *sp) pf_state_peer_ntoh(&sp->dst, &st->dst); bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr)); - st->creation = ntohl(sp->creation) + time.tv_sec; - st->expire = ntohl(sp->expire) + time.tv_sec; + st->creation = time_second - ntohl(sp->creation); + st->expire = ntohl(sp->expire) + time_second; st->af = sp->af; st->proto = sp->proto; @@ -214,11 +223,13 @@ pfsync_insert_net_state(struct pfsync_state *sp) bcopy(sp->id, &st->id, sizeof(st->id)); st->creatorid = sp->creatorid; - st->sync_flags = sp->sync_flags | PFSTATE_FROMSYNC; + st->sync_flags = PFSTATE_FROMSYNC; if (pf_insert_state(kif, st)) { pfi_maybe_destroy(kif); + /* XXX when we have nat_rule/anchors, use STATE_DEC_COUNTERS */ + r->states--; pool_put(&pf_state_pl, st); return (EINVAL); } @@ -241,7 +252,7 @@ pfsync_input(struct mbuf *m, ...) struct pfsync_state_bus *bus; struct in_addr src; struct mbuf *mp; - int iplen, action, error, i, s, count, offp; + int iplen, action, error, i, s, count, offp, sfail, stale = 0; pfsyncstats.pfsyncs_ipackets++; @@ -297,6 +308,7 @@ pfsync_input(struct mbuf *m, ...) switch (action) { case PFSYNC_ACT_CLR: { + struct pf_state *nexts; struct pfi_kif *kif; u_int32_t creatorid; if ((mp = m_pulldown(m, iplen + sizeof(*ph), @@ -309,9 +321,13 @@ pfsync_input(struct mbuf *m, ...) s = splsoftnet(); if (cp->ifname[0] == '\0') { - RB_FOREACH(st, pf_state_tree_id, &tree_id) { - if (st->creatorid == creatorid) + for (st = RB_MIN(pf_state_tree_id, &tree_id); + st; st = nexts) { + nexts = RB_NEXT(pf_state_tree_id, &tree_id, st); + if (st->creatorid == creatorid) { st->timeout = PFTM_PURGE; + pf_purge_expired_state(st); + } } } else { kif = pfi_lookup_if(cp->ifname); @@ -322,13 +338,16 @@ pfsync_input(struct mbuf *m, ...) splx(s); goto done; } - RB_FOREACH(st, pf_state_tree_lan_ext, - &kif->pfik_lan_ext) { - if (st->creatorid == creatorid) + for (st = RB_MIN(pf_state_tree_lan_ext, + &kif->pfik_lan_ext); st; st = nexts) { + nexts = RB_NEXT(pf_state_tree_lan_ext, + &kif->pfik_lan_ext, st); + if (st->creatorid == creatorid) { st->timeout = PFTM_PURGE; + pf_purge_expired_state(st); + } } } - pf_purge_expired_states(); splx(s); break; @@ -376,6 +395,8 @@ pfsync_input(struct mbuf *m, ...) s = splsoftnet(); for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp); i < count; i++, sp++) { + int flags = PFSYNC_FLAG_STALE; + /* check for invalid values */ if (sp->timeout >= PFTM_MAX || sp->src.state > PF_TCPS_PROXY_DST || @@ -397,12 +418,73 @@ pfsync_input(struct mbuf *m, ...) pfsyncstats.pfsyncs_badstate++; continue; } + sfail = 0; + if (st->proto == IPPROTO_TCP) { + /* + * The state should never go backwards except + * for syn-proxy states. Neither should the + * sequence window slide backwards. + */ + if (st->src.state > sp->src.state && + (st->src.state < PF_TCPS_PROXY_SRC || + sp->src.state >= PF_TCPS_PROXY_SRC)) + sfail = 1; + else if (SEQ_GT(st->src.seqlo, + ntohl(sp->src.seqlo))) + sfail = 3; + else if (st->dst.state > sp->dst.state) { + /* There might still be useful + * information about the src state here, + * so import that part of the update, + * then "fail" so we send the updated + * state back to the peer who is missing + * our what we know. */ + pf_state_peer_ntoh(&sp->src, &st->src); + /* XXX do anything with timeouts? */ + sfail = 7; + flags = 0; + } else if (st->dst.state >= TCPS_SYN_SENT && + SEQ_GT(st->dst.seqlo, ntohl(sp->dst.seqlo))) + sfail = 4; + } else { + /* + * Non-TCP protocol state machine always go + * forwards + */ + if (st->src.state > sp->src.state) + sfail = 5; + else if ( st->dst.state > sp->dst.state) + sfail = 6; + } + if (sfail) { + if (pf_status.debug >= PF_DEBUG_MISC) + printf("pfsync: %s stale update " + "(%d) id: %016llx " + "creatorid: %08x\n", + (sfail < 7 ? "ignoring" + : "partial"), sfail, + betoh64(st->id), + ntohl(st->creatorid)); + pfsyncstats.pfsyncs_badstate++; + + if (!(sp->sync_flags & PFSTATE_STALE)) { + /* we have a better state, send it */ + if (sc->sc_mbuf != NULL && !stale) + pfsync_sendout(sc); + stale++; + if (!st->sync_flags) + pfsync_pack_state( + PFSYNC_ACT_UPD, st, flags); + } + continue; + } pf_state_peer_ntoh(&sp->src, &st->src); pf_state_peer_ntoh(&sp->dst, &st->dst); - st->expire = ntohl(sp->expire) + time.tv_sec; + st->expire = ntohl(sp->expire) + time_second; st->timeout = sp->timeout; - } + if (stale && sc->sc_mbuf != NULL) + pfsync_sendout(sc); splx(s); break; /* @@ -427,15 +509,10 @@ pfsync_input(struct mbuf *m, ...) pfsyncstats.pfsyncs_badstate++; continue; } - /* - * XXX - * pf_purge_expired_states() is expensive, - * we really want to purge the state directly. - */ st->timeout = PFTM_PURGE; st->sync_flags |= PFSTATE_FROMSYNC; + pf_purge_expired_state(st); } - pf_purge_expired_states(); splx(s); break; case PFSYNC_ACT_UPD_C: { @@ -468,17 +545,71 @@ pfsync_input(struct mbuf *m, ...) st = pf_find_state_byid(&key); if (st == NULL) { /* We don't have this state. Ask for it. */ - pfsync_request_update(up, &src); + error = pfsync_request_update(up, &src); + if (error == ENOMEM) { + splx(s); + goto done; + } update_requested = 1; pfsyncstats.pfsyncs_badstate++; continue; } + sfail = 0; + if (st->proto == IPPROTO_TCP) { + /* + * The state should never go backwards except + * for syn-proxy states. Neither should the + * sequence window slide backwards. + */ + if (st->src.state > up->src.state && + (st->src.state < PF_TCPS_PROXY_SRC || + up->src.state >= PF_TCPS_PROXY_SRC)) + sfail = 1; + else if (st->dst.state > up->dst.state) + sfail = 2; + else if (SEQ_GT(st->src.seqlo, + ntohl(up->src.seqlo))) + sfail = 3; + else if (st->dst.state >= TCPS_SYN_SENT && + SEQ_GT(st->dst.seqlo, ntohl(up->dst.seqlo))) + sfail = 4; + } else { + /* + * Non-TCP protocol state machine always go + * forwards + */ + if (st->src.state > up->src.state) + sfail = 5; + else if (st->dst.state > up->dst.state) + sfail = 6; + } + if (sfail) { + if (pf_status.debug >= PF_DEBUG_MISC) + printf("pfsync: ignoring stale update " + "(%d) id: %016llx " + "creatorid: %08x\n", sfail, + betoh64(st->id), + ntohl(st->creatorid)); + pfsyncstats.pfsyncs_badstate++; + + /* we have a better state, send it out */ + if ((!stale || update_requested) && + sc->sc_mbuf != NULL) { + pfsync_sendout(sc); + update_requested = 0; + } + stale++; + if (!st->sync_flags) + pfsync_pack_state(PFSYNC_ACT_UPD, st, + PFSYNC_FLAG_STALE); + continue; + } pf_state_peer_ntoh(&up->src, &st->src); pf_state_peer_ntoh(&up->dst, &st->dst); - st->expire = ntohl(up->expire) + time.tv_sec; + st->expire = ntohl(up->expire) + time_second; st->timeout = up->timeout; } - if (update_requested) + if ((update_requested || stale) && sc->sc_mbuf) pfsync_sendout(sc); splx(s); break; @@ -501,15 +632,10 @@ pfsync_input(struct mbuf *m, ...) pfsyncstats.pfsyncs_badstate++; continue; } - /* - * XXX - * pf_purge_expired_states() is expensive, - * we really want to purge the state directly. - */ st->timeout = PFTM_PURGE; st->sync_flags |= PFSTATE_FROMSYNC; + pf_purge_expired_state(st); } - pf_purge_expired_states(); splx(s); break; case PFSYNC_ACT_INS_F: @@ -524,7 +650,6 @@ pfsync_input(struct mbuf *m, ...) } s = splsoftnet(); - /* XXX send existing. pfsync_pack_state should handle this. */ if (sc->sc_mbuf != NULL) pfsync_sendout(sc); for (i = 0, @@ -534,7 +659,7 @@ pfsync_input(struct mbuf *m, ...) key.creatorid = rup->creatorid; if (key.id == 0 && key.creatorid == 0) { - sc->sc_ureq_received = mono_time.tv_sec; + sc->sc_ureq_received = time_uptime; if (pf_status.debug >= PF_DEBUG_MISC) printf("pfsync: received " "bulk update request\n"); @@ -546,7 +671,9 @@ pfsync_input(struct mbuf *m, ...) pfsyncstats.pfsyncs_badstate++; continue; } - pfsync_pack_state(PFSYNC_ACT_UPD, st, 0); + if (!st->sync_flags) + pfsync_pack_state(PFSYNC_ACT_UPD, + st, 0); } } if (sc->sc_mbuf != NULL) @@ -574,12 +701,16 @@ pfsync_input(struct mbuf *m, ...) "update start\n"); break; case PFSYNC_BUS_END: - if (mono_time.tv_sec - ntohl(bus->endtime) >= + if (time_uptime - ntohl(bus->endtime) >= sc->sc_ureq_sent) { /* that's it, we're happy */ sc->sc_ureq_sent = 0; sc->sc_bulk_tries = 0; timeout_del(&sc->sc_bulkfail_tmo); +#if NCARP > 0 + if (!pfsync_sync_ok) + carp_suppress_preempt--; +#endif pfsync_sync_ok = 1; if (pf_status.debug >= PF_DEBUG_MISC) printf("pfsync: received valid " @@ -643,8 +774,9 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) case SIOCGETPFSYNC: bzero(&pfsyncr, sizeof(pfsyncr)); if (sc->sc_sync_ifp) - strlcpy(pfsyncr.pfsyncr_syncif, + strlcpy(pfsyncr.pfsyncr_syncdev, sc->sc_sync_ifp->if_xname, IFNAMSIZ); + pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer; pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; if ((error = copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr)))) return (error); @@ -655,11 +787,17 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr)))) return (error); + if (pfsyncr.pfsyncr_syncpeer.s_addr == 0) + sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP; + else + sc->sc_sync_peer.s_addr = + pfsyncr.pfsyncr_syncpeer.s_addr; + if (pfsyncr.pfsyncr_maxupdates > 255) return (EINVAL); sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates; - if (pfsyncr.pfsyncr_syncif[0] == 0) { + if (pfsyncr.pfsyncr_syncdev[0] == 0) { sc->sc_sync_ifp = NULL; if (sc->sc_mbuf_net != NULL) { /* Don't keep stale pfsync packets around. */ @@ -669,12 +807,15 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) sc->sc_statep_net.s = NULL; splx(s); } + if (imo->imo_num_memberships > 0) { + in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); + imo->imo_multicast_ifp = NULL; + } break; } - if ((sifp = ifunit(pfsyncr.pfsyncr_syncif)) == NULL) + + if ((sifp = ifunit(pfsyncr.pfsyncr_syncdev)) == NULL) return (EINVAL); - else if (sifp == sc->sc_sync_ifp) - break; s = splnet(); if (sifp->if_mtu < sc->sc_if.if_mtu || @@ -691,12 +832,21 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) imo->imo_multicast_ifp = NULL; } - if (sc->sc_sync_ifp) { + if (sc->sc_sync_ifp && + sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) { struct in_addr addr; + if (!(sc->sc_sync_ifp->if_flags & IFF_MULTICAST)) { + sc->sc_sync_ifp = NULL; + splx(s); + return (EADDRNOTAVAIL); + } + addr.s_addr = INADDR_PFSYNC_GROUP; + if ((imo->imo_membership[0] = in_addmulti(&addr, sc->sc_sync_ifp)) == NULL) { + sc->sc_sync_ifp = NULL; splx(s); return (ENOBUFS); } @@ -704,14 +854,25 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) imo->imo_multicast_ifp = sc->sc_sync_ifp; imo->imo_multicast_ttl = PFSYNC_DFLTTL; imo->imo_multicast_loop = 0; + } + if (sc->sc_sync_ifp || + sc->sc_sendaddr.s_addr != INADDR_PFSYNC_GROUP) { /* Request a full state table update. */ - sc->sc_ureq_sent = mono_time.tv_sec; + sc->sc_ureq_sent = time_uptime; +#if NCARP > 0 + if (pfsync_sync_ok) + carp_suppress_preempt++; +#endif pfsync_sync_ok = 0; if (pf_status.debug >= PF_DEBUG_MISC) printf("pfsync: requesting bulk update\n"); timeout_add(&sc->sc_bulkfail_tmo, 5 * hz); - pfsync_request_update(NULL, NULL); + error = pfsync_request_update(NULL, NULL); + if (error == ENOMEM) { + splx(s); + return (ENOMEM); + } pfsync_sendout(sc); } splx(s); @@ -808,7 +969,7 @@ pfsync_get_mbuf(struct pfsync_softc *sc, u_int8_t action, void **sp) } int -pfsync_pack_state(u_int8_t action, struct pf_state *st, int compress) +pfsync_pack_state(u_int8_t action, struct pf_state *st, int flags) { struct ifnet *ifp = &pfsyncif.sc_if; struct pfsync_softc *sc = ifp->if_softc; @@ -825,7 +986,8 @@ pfsync_pack_state(u_int8_t action, struct pf_state *st, int compress) * If a packet falls in the forest and there's nobody around to * hear, does it make a sound? */ - if (ifp->if_bpf == NULL && sc->sc_sync_ifp == NULL) { + if (ifp->if_bpf == NULL && sc->sc_sync_ifp == NULL && + sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) { /* Don't leave any stale pfsync packets hanging around. */ if (sc->sc_mbuf != NULL) { m_freem(sc->sc_mbuf); @@ -879,9 +1041,9 @@ pfsync_pack_state(u_int8_t action, struct pf_state *st, int compress) } } - secs = time.tv_sec; + secs = time_second; - st->pfsync_time = mono_time.tv_sec; + st->pfsync_time = time_uptime; TAILQ_REMOVE(&state_updates, st, u.s.entry_updates); TAILQ_INSERT_TAIL(&state_updates, st, u.s.entry_updates); @@ -924,7 +1086,8 @@ pfsync_pack_state(u_int8_t action, struct pf_state *st, int compress) sp->allow_opts = st->allow_opts; sp->timeout = st->timeout; - sp->sync_flags = st->sync_flags & PFSTATE_NOSYNC; + if (flags & PFSYNC_FLAG_STALE) + sp->sync_flags |= PFSTATE_STALE; } pf_state_peer_hton(&st->src, &sp->src); @@ -936,7 +1099,7 @@ pfsync_pack_state(u_int8_t action, struct pf_state *st, int compress) sp->expire = htonl(st->expire - secs); /* do we need to build "compressed" actions for network transfer? */ - if (sc->sc_sync_ifp && compress) { + if (sc->sc_sync_ifp && flags & PFSYNC_FLAG_COMPRESS) { switch (action) { case PFSYNC_ACT_UPD: newaction = PFSYNC_ACT_UPD_C; @@ -1010,24 +1173,20 @@ pfsync_request_update(struct pfsync_state_upd *up, struct in_addr *src) struct pfsync_header *h; struct pfsync_softc *sc = ifp->if_softc; struct pfsync_state_upd_req *rup; - int s, ret; + int ret = 0; if (sc->sc_mbuf == NULL) { if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ, - (void *)&sc->sc_statep.s)) == NULL) { - splx(s); + (void *)&sc->sc_statep.s)) == NULL) return (ENOMEM); - } h = mtod(sc->sc_mbuf, struct pfsync_header *); } else { h = mtod(sc->sc_mbuf, struct pfsync_header *); if (h->action != PFSYNC_ACT_UREQ) { pfsync_sendout(sc); if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ, - (void *)&sc->sc_statep.s)) == NULL) { - splx(s); + (void *)&sc->sc_statep.s)) == NULL) return (ENOMEM); - } h = mtod(sc->sc_mbuf, struct pfsync_header *); } } @@ -1087,6 +1246,7 @@ pfsync_timeout(void *v) splx(s); } +/* This must be called in splnet() */ void pfsync_send_bus(struct pfsync_softc *sc, u_int8_t status) { @@ -1102,7 +1262,7 @@ pfsync_send_bus(struct pfsync_softc *sc, u_int8_t status) bus = sc->sc_statep.b; bus->creatorid = pf_status.hostid; bus->status = status; - bus->endtime = htonl(mono_time.tv_sec - sc->sc_ureq_received); + bus->endtime = htonl(time_uptime - sc->sc_ureq_received); pfsync_sendout(sc); } } @@ -1136,7 +1296,7 @@ pfsync_bulk_update(void *v) /* send an update and move to end of list */ if (!state->sync_flags) pfsync_pack_state(PFSYNC_ACT_UPD, state, 0); - state->pfsync_time = mono_time.tv_sec; + state->pfsync_time = time_uptime; TAILQ_REMOVE(&state_updates, state, u.s.entry_updates); TAILQ_INSERT_TAIL(&state_updates, state, u.s.entry_updates); @@ -1154,16 +1314,28 @@ void pfsync_bulkfail(void *v) { struct pfsync_softc *sc = v; + int s, error; if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { /* Try again in a bit */ timeout_add(&sc->sc_bulkfail_tmo, 5 * hz); - pfsync_request_update(NULL, NULL); - pfsync_sendout(sc); + s = splnet(); + error = pfsync_request_update(NULL, NULL); + if (error == ENOMEM) { + if (pf_status.debug >= PF_DEBUG_MISC) + printf("pfsync: cannot allocate mbufs for " + "bulk update\n"); + } else + pfsync_sendout(sc); + splx(s); } else { /* Pretend like the transfer was ok */ sc->sc_ureq_sent = 0; sc->sc_bulk_tries = 0; +#if NCARP > 0 + if (!pfsync_sync_ok) + carp_suppress_preempt--; +#endif pfsync_sync_ok = 1; if (pf_status.debug >= PF_DEBUG_MISC) printf("pfsync: failed to receive " @@ -1172,6 +1344,7 @@ pfsync_bulkfail(void *v) } } +/* This must be called in splnet() */ int pfsync_sendout(sc) struct pfsync_softc *sc; @@ -1199,9 +1372,8 @@ pfsync_sendout(sc) sc->sc_statep_net.s = NULL; } - if (sc->sc_sync_ifp) { + if (sc->sc_sync_ifp || sc->sc_sync_peer.s_addr != INADDR_PFSYNC_GROUP) { struct ip *ip; - struct ifaddr *ifa; struct sockaddr sa; M_PREPEND(m, sizeof(struct ip), M_DONTWAIT); @@ -1221,16 +1393,12 @@ pfsync_sendout(sc) ip->ip_sum = 0; bzero(&sa, sizeof(sa)); - sa.sa_family = AF_INET; - ifa = ifaof_ifpforaddr(&sa, sc->sc_sync_ifp); - if (ifa == NULL) - return (0); - ip->ip_src.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; + ip->ip_src.s_addr = INADDR_ANY; if (sc->sc_sendaddr.s_addr == INADDR_PFSYNC_GROUP) m->m_flags |= M_MCAST; ip->ip_dst = sc->sc_sendaddr; - sc->sc_sendaddr.s_addr = INADDR_PFSYNC_GROUP; + sc->sc_sendaddr.s_addr = sc->sc_sync_peer.s_addr; pfsyncstats.pfsyncs_opackets++; diff --git a/sys/contrib/pf/net/if_pfsync.h b/sys/contrib/pf/net/if_pfsync.h index b3705c8dd2a2..ddd049a81fda 100644 --- a/sys/contrib/pf/net/if_pfsync.h +++ b/sys/contrib/pf/net/if_pfsync.h @@ -1,4 +1,4 @@ -/* $OpenBSD: if_pfsync.h,v 1.13 2004/03/22 04:54:17 mcbride Exp $ */ +/* $OpenBSD: if_pfsync.h,v 1.19 2005/01/20 17:47:38 mcbride Exp $ */ /* * Copyright (c) 2001 Michael Shalayeff @@ -85,6 +85,9 @@ struct pfsync_state { u_int8_t updates; } __packed; +#define PFSYNC_FLAG_COMPRESS 0x01 +#define PFSYNC_FLAG_STALE 0x02 + struct pfsync_state_upd { u_int32_t id[2]; struct pfsync_state_peer src; @@ -150,9 +153,10 @@ struct pfsync_softc { struct timeout sc_tmo; struct timeout sc_bulk_tmo; struct timeout sc_bulkfail_tmo; + struct in_addr sc_sync_peer; struct in_addr sc_sendaddr; - struct mbuf *sc_mbuf; /* current cummulative mbuf */ - struct mbuf *sc_mbuf_net; /* current cummulative mbuf */ + struct mbuf *sc_mbuf; /* current cumulative mbuf */ + struct mbuf *sc_mbuf_net; /* current cumulative mbuf */ union sc_statep sc_statep; union sc_statep sc_statep_net; u_int32_t sc_ureq_received; @@ -184,7 +188,7 @@ struct pfsync_header { } __packed; #define PFSYNC_BULKPACKETS 1 /* # of packets per timeout */ -#define PFSYNC_MAX_BULKTRIES 12 +#define PFSYNC_MAX_BULKTRIES 12 #define PFSYNC_HDRLEN sizeof(struct pfsync_header) #define PFSYNC_ACTIONS \ "CLR ST", "INS ST", "UPD ST", "DEL ST", \ @@ -194,33 +198,34 @@ struct pfsync_header { #define PFSYNC_DFLTTL 255 struct pfsyncstats { - u_long pfsyncs_ipackets; /* total input packets, IPv4 */ - u_long pfsyncs_ipackets6; /* total input packets, IPv6 */ - u_long pfsyncs_badif; /* not the right interface */ - u_long pfsyncs_badttl; /* TTL is not PFSYNC_DFLTTL */ - u_long pfsyncs_hdrops; /* packets shorter than header */ - u_long pfsyncs_badver; /* bad (incl unsupp) version */ - u_long pfsyncs_badact; /* bad action */ - u_long pfsyncs_badlen; /* data length does not match */ - u_long pfsyncs_badauth; /* bad authentication */ - u_long pfsyncs_badstate; /* insert/lookup failed */ + u_int64_t pfsyncs_ipackets; /* total input packets, IPv4 */ + u_int64_t pfsyncs_ipackets6; /* total input packets, IPv6 */ + u_int64_t pfsyncs_badif; /* not the right interface */ + u_int64_t pfsyncs_badttl; /* TTL is not PFSYNC_DFLTTL */ + u_int64_t pfsyncs_hdrops; /* packets shorter than hdr */ + u_int64_t pfsyncs_badver; /* bad (incl unsupp) version */ + u_int64_t pfsyncs_badact; /* bad action */ + u_int64_t pfsyncs_badlen; /* data length does not match */ + u_int64_t pfsyncs_badauth; /* bad authentication */ + u_int64_t pfsyncs_stale; /* stale state */ + u_int64_t pfsyncs_badval; /* bad values */ + u_int64_t pfsyncs_badstate; /* insert/lookup failed */ - u_long pfsyncs_opackets; /* total output packets, IPv4 */ - u_long pfsyncs_opackets6; /* total output packets, IPv6 */ - u_long pfsyncs_onomem; /* no memory for an mbuf for a send */ - u_long pfsyncs_oerrors; /* ip output error */ + u_int64_t pfsyncs_opackets; /* total output packets, IPv4 */ + u_int64_t pfsyncs_opackets6; /* total output packets, IPv6 */ + u_int64_t pfsyncs_onomem; /* no memory for an mbuf */ + u_int64_t pfsyncs_oerrors; /* ip output error */ }; /* * Configuration structure for SIOCSETPFSYNC SIOCGETPFSYNC */ struct pfsyncreq { - char pfsyncr_syncif[IFNAMSIZ]; - int pfsyncr_maxupdates; - int pfsyncr_authlevel; + char pfsyncr_syncdev[IFNAMSIZ]; + struct in_addr pfsyncr_syncpeer; + int pfsyncr_maxupdates; + int pfsyncr_authlevel; }; -#define SIOCSETPFSYNC _IOW('i', 247, struct ifreq) -#define SIOCGETPFSYNC _IOWR('i', 248, struct ifreq) #define pf_state_peer_hton(s,d) do { \ @@ -267,12 +272,14 @@ int pfsync_pack_state(u_int8_t, struct pf_state *, int); } while (0) #define pfsync_update_state(st) do { \ if (!st->sync_flags) \ - pfsync_pack_state(PFSYNC_ACT_UPD, (st), 1); \ + pfsync_pack_state(PFSYNC_ACT_UPD, (st), \ + PFSYNC_FLAG_COMPRESS); \ st->sync_flags &= ~PFSTATE_FROMSYNC; \ } while (0) #define pfsync_delete_state(st) do { \ if (!st->sync_flags) \ - pfsync_pack_state(PFSYNC_ACT_DEL, (st), 1); \ + pfsync_pack_state(PFSYNC_ACT_DEL, (st), \ + PFSYNC_FLAG_COMPRESS); \ st->sync_flags &= ~PFSTATE_FROMSYNC; \ } while (0) #endif diff --git a/sys/contrib/pf/net/pf.c b/sys/contrib/pf/net/pf.c index a2db3d48ebf7..dc263d198af2 100644 --- a/sys/contrib/pf/net/pf.c +++ b/sys/contrib/pf/net/pf.c @@ -1,5 +1,4 @@ -/* $OpenBSD: pf.c,v 1.433.2.2 2004/07/17 03:22:34 brad Exp $ */ -/* add $OpenBSD: pf.c,v 1.448 2004/05/11 07:34:11 dhartmei Exp $ */ +/* $OpenBSD: pf.c,v 1.483 2005/03/15 17:38:43 dhartmei Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -69,6 +68,7 @@ #include #include #include +#include #include #include @@ -92,7 +92,7 @@ * Global variables */ -struct pf_anchorqueue pf_anchors; +struct pf_anchor_global pf_anchors; struct pf_ruleset pf_main_ruleset; struct pf_altqqueue pf_altqs[2]; struct pf_palist pf_pabuf; @@ -107,12 +107,22 @@ u_int32_t ticket_pabuf; struct timeout pf_expire_to; /* expire timeout */ +struct pf_anchor_stackframe { + struct pf_ruleset *rs; + struct pf_rule *r; + struct pf_anchor_node *parent; + struct pf_anchor *child; +} pf_anchor_stack[64]; + struct pool pf_src_tree_pl, pf_rule_pl; struct pool pf_state_pl, pf_altq_pl, pf_pooladdr_pl; void pf_print_host(struct pf_addr *, u_int16_t, u_int8_t); -void pf_print_state(struct pf_state *); -void pf_print_flags(u_int8_t); + +void pf_init_threshold(struct pf_threshold *, u_int32_t, + u_int32_t); +void pf_add_threshold(struct pf_threshold *); +int pf_check_threshold(struct pf_threshold *); void pf_change_ap(struct pf_addr *, u_int16_t *, u_int16_t *, u_int16_t *, struct pf_addr *, @@ -128,7 +138,8 @@ void pf_change_icmp(struct pf_addr *, u_int16_t *, void pf_send_tcp(const struct pf_rule *, sa_family_t, const struct pf_addr *, const struct pf_addr *, u_int16_t, u_int16_t, u_int32_t, u_int32_t, - u_int8_t, u_int16_t, u_int16_t, u_int8_t); + u_int8_t, u_int16_t, u_int16_t, u_int8_t, int, + struct ether_header *, struct ifnet *); void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t, sa_family_t, struct pf_rule *); struct pf_rule *pf_match_translation(struct pf_pdesc *, struct mbuf *, @@ -143,19 +154,19 @@ struct pf_rule *pf_get_translation(struct pf_pdesc *, struct mbuf *, int pf_test_tcp(struct pf_rule **, struct pf_state **, int, struct pfi_kif *, struct mbuf *, int, void *, struct pf_pdesc *, struct pf_rule **, - struct pf_ruleset **); + struct pf_ruleset **, struct ifqueue *); int pf_test_udp(struct pf_rule **, struct pf_state **, int, struct pfi_kif *, struct mbuf *, int, void *, struct pf_pdesc *, struct pf_rule **, - struct pf_ruleset **); + struct pf_ruleset **, struct ifqueue *); int pf_test_icmp(struct pf_rule **, struct pf_state **, int, struct pfi_kif *, struct mbuf *, int, void *, struct pf_pdesc *, struct pf_rule **, - struct pf_ruleset **); + struct pf_ruleset **, struct ifqueue *); int pf_test_other(struct pf_rule **, struct pf_state **, int, struct pfi_kif *, struct mbuf *, int, void *, struct pf_pdesc *, struct pf_rule **, - struct pf_ruleset **); + struct pf_ruleset **, struct ifqueue *); int pf_test_fragment(struct pf_rule **, int, struct pfi_kif *, struct mbuf *, void *, struct pf_pdesc *, struct pf_rule **, @@ -168,12 +179,12 @@ int pf_test_state_udp(struct pf_state **, int, void *, struct pf_pdesc *); int pf_test_state_icmp(struct pf_state **, int, struct pfi_kif *, struct mbuf *, int, - void *, struct pf_pdesc *); + void *, struct pf_pdesc *, u_short *); int pf_test_state_other(struct pf_state **, int, struct pfi_kif *, struct pf_pdesc *); struct pf_tag *pf_get_tag(struct mbuf *); int pf_match_tag(struct mbuf *, struct pf_rule *, - struct pf_rule *, struct pf_tag *, int *); + struct pf_tag **, int *); void pf_hash(struct pf_addr *, struct pf_addr *, struct pf_poolhashkey *, sa_family_t); int pf_map_addr(u_int8_t, struct pf_rule *, @@ -204,6 +215,8 @@ int pf_addr_wrap_neq(struct pf_addr_wrap *, static int pf_add_mbuf_tag(struct mbuf *, u_int); struct pf_state *pf_find_state_recurse(struct pfi_kif *, struct pf_state *, u_int8_t); +int pf_src_connlimit(struct pf_state **); +int pf_check_congestion(struct ifqueue *); struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = { { &pf_state_pl, PFSTATE_HIWAT }, @@ -214,12 +227,12 @@ struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = { #define STATE_LOOKUP() \ do { \ if (direction == PF_IN) \ - *state = pf_find_state_recurse( \ + *state = pf_find_state_recurse( \ kif, &key, PF_EXT_GWY); \ else \ - *state = pf_find_state_recurse( \ + *state = pf_find_state_recurse( \ kif, &key, PF_LAN_EXT); \ - if (*state == NULL) \ + if (*state == NULL || (*state)->timeout == PFTM_PURGE) \ return (PF_DROP); \ if (direction == PF_OUT && \ (((*state)->rule.ptr->rt == PF_ROUTETO && \ @@ -243,6 +256,24 @@ struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = { ((r)->rule_flag & PFRULE_GRBOUND) ? (k)->pfik_parent : \ (k)->pfik_parent->pfik_parent) +#define STATE_INC_COUNTERS(s) \ + do { \ + s->rule.ptr->states++; \ + if (s->anchor.ptr != NULL) \ + s->anchor.ptr->states++; \ + if (s->nat_rule.ptr != NULL) \ + s->nat_rule.ptr->states++; \ + } while (0) + +#define STATE_DEC_COUNTERS(s) \ + do { \ + if (s->nat_rule.ptr != NULL) \ + s->nat_rule.ptr->states--; \ + if (s->anchor.ptr != NULL) \ + s->anchor.ptr->states--; \ + s->rule.ptr->states--; \ + } while (0) + static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *); static __inline int pf_state_compare_lan_ext(struct pf_state *, struct pf_state *); @@ -250,6 +281,7 @@ static __inline int pf_state_compare_ext_gwy(struct pf_state *, struct pf_state *); static __inline int pf_state_compare_id(struct pf_state *, struct pf_state *); +static __inline int pf_anchor_compare(struct pf_anchor *, struct pf_anchor *); struct pf_src_tree tree_src_tracking; @@ -263,6 +295,8 @@ RB_GENERATE(pf_state_tree_ext_gwy, pf_state, u.s.entry_ext_gwy, pf_state_compare_ext_gwy); RB_GENERATE(pf_state_tree_id, pf_state, u.s.entry_id, pf_state_compare_id); +RB_GENERATE(pf_anchor_global, pf_anchor, entry_global, pf_anchor_compare); +RB_GENERATE(pf_anchor_node, pf_anchor, entry_node, pf_anchor_compare); static __inline int pf_src_compare(struct pf_src_node *a, struct pf_src_node *b) @@ -459,6 +493,14 @@ pf_state_compare_id(struct pf_state *a, struct pf_state *b) return (0); } +static __inline int +pf_anchor_compare(struct pf_anchor *a, struct pf_anchor *b) +{ + int c = strcmp(a->path, b->path); + + return (c ? (c < 0 ? -1 : 1) : 0); +} + #ifdef INET6 void pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af) @@ -477,7 +519,7 @@ pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af) break; } } -#endif +#endif /* INET6 */ struct pf_state * pf_find_state_byid(struct pf_state *key) @@ -553,6 +595,131 @@ pf_find_state_all(struct pf_state *key, u_int8_t tree, int *more) } } +void +pf_init_threshold(struct pf_threshold *threshold, + u_int32_t limit, u_int32_t seconds) +{ + threshold->limit = limit * PF_THRESHOLD_MULT; + threshold->seconds = seconds; + threshold->count = 0; + threshold->last = time_second; +} + +void +pf_add_threshold(struct pf_threshold *threshold) +{ + u_int32_t t = time_second, diff = t - threshold->last; + + if (diff >= threshold->seconds) + threshold->count = 0; + else + threshold->count -= threshold->count * diff / + threshold->seconds; + threshold->count += PF_THRESHOLD_MULT; + threshold->last = t; +} + +int +pf_check_threshold(struct pf_threshold *threshold) +{ + return (threshold->count > threshold->limit); +} + +int +pf_src_connlimit(struct pf_state **state) +{ + struct pf_state *s; + int bad = 0; + + (*state)->src_node->conn++; + pf_add_threshold(&(*state)->src_node->conn_rate); + + if ((*state)->rule.ptr->max_src_conn && + (*state)->rule.ptr->max_src_conn < + (*state)->src_node->conn) { + pf_status.lcounters[LCNT_SRCCONN]++; + bad++; + } + + if ((*state)->rule.ptr->max_src_conn_rate.limit && + pf_check_threshold(&(*state)->src_node->conn_rate)) { + pf_status.lcounters[LCNT_SRCCONNRATE]++; + bad++; + } + + if (!bad) + return (0); + + if ((*state)->rule.ptr->overload_tbl) { + struct pfr_addr p; + u_int32_t killed = 0; + + pf_status.lcounters[LCNT_OVERLOAD_TABLE]++; + if (pf_status.debug >= PF_DEBUG_MISC) { + printf("pf_src_connlimit: blocking address "); + pf_print_host(&(*state)->src_node->addr, 0, + (*state)->af); + } + + bzero(&p, sizeof(p)); + p.pfra_af = (*state)->af; + switch ((*state)->af) { +#ifdef INET + case AF_INET: + p.pfra_net = 32; + p.pfra_ip4addr = (*state)->src_node->addr.v4; + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + p.pfra_net = 128; + p.pfra_ip6addr = (*state)->src_node->addr.v6; + break; +#endif /* INET6 */ + } + + pfr_insert_kentry((*state)->rule.ptr->overload_tbl, + &p, time_second); + + /* kill existing states if that's required. */ + if ((*state)->rule.ptr->flush) { + pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++; + + RB_FOREACH(s, pf_state_tree_id, &tree_id) { + /* + * Kill states from this source. (Only those + * from the same rule if PF_FLUSH_GLOBAL is not + * set) + */ + if (s->af == (*state)->af && + (((*state)->direction == PF_OUT && + PF_AEQ(&(*state)->src_node->addr, + &s->lan.addr, s->af)) || + ((*state)->direction == PF_IN && + PF_AEQ(&(*state)->src_node->addr, + &s->ext.addr, s->af))) && + ((*state)->rule.ptr->flush & + PF_FLUSH_GLOBAL || + (*state)->rule.ptr == s->rule.ptr)) { + s->timeout = PFTM_PURGE; + s->src.state = s->dst.state = + TCPS_CLOSED; + killed++; + } + } + if (pf_status.debug >= PF_DEBUG_MISC) + printf(", %u states killed", killed); + } + if (pf_status.debug >= PF_DEBUG_MISC) + printf("\n"); + } + + /* kill this state */ + (*state)->timeout = PFTM_PURGE; + (*state)->src.state = (*state)->dst.state = TCPS_CLOSED; + return (1); +} + int pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule, struct pf_addr *src, sa_family_t af) @@ -574,9 +741,16 @@ pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule, if (!rule->max_src_nodes || rule->src_nodes < rule->max_src_nodes) (*sn) = pool_get(&pf_src_tree_pl, PR_NOWAIT); + else + pf_status.lcounters[LCNT_SRCNODES]++; if ((*sn) == NULL) return (-1); bzero(*sn, sizeof(struct pf_src_node)); + + pf_init_threshold(&(*sn)->conn_rate, + rule->max_src_conn_rate.limit, + rule->max_src_conn_rate.seconds); + (*sn)->af = af; if (rule->rule_flag & PFRULE_RULESRCTRACK || rule->rpool.opts & PF_POOL_STICKYADDR) @@ -594,7 +768,7 @@ pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule, pool_put(&pf_src_tree_pl, *sn); return (-1); } - (*sn)->creation = time.tv_sec; + (*sn)->creation = time_second; (*sn)->ruletype = rule->action; if ((*sn)->rule.ptr != NULL) (*sn)->rule.ptr->src_nodes++; @@ -602,8 +776,10 @@ pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule, pf_status.src_nodes++; } else { if (rule->max_src_states && - (*sn)->states >= rule->max_src_states) + (*sn)->states >= rule->max_src_states) { + pf_status.lcounters[LCNT_SRCSTATES]++; return (-1); + } } return (0); } @@ -705,7 +881,7 @@ pf_state_expires(const struct pf_state *state) /* handle all PFTM_* > PFTM_MAX here */ if (state->timeout == PFTM_PURGE) - return (time.tv_sec); + return (time_second); if (state->timeout == PFTM_UNTIL_PACKET) return (0); KASSERT(state->timeout < PFTM_MAX); @@ -726,7 +902,7 @@ pf_state_expires(const struct pf_state *state) return (state->expire + timeout * (end - states) / (end - start)); else - return (time.tv_sec); + return (time_second); } return (state->expire + timeout); } @@ -739,7 +915,7 @@ pf_purge_expired_src_nodes(void) for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) { next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur); - if (cur->states <= 0 && cur->expire <= time.tv_sec) { + if (cur->states <= 0 && cur->expire <= time_second) { if (cur->rule.ptr != NULL) { cur->rule.ptr->src_nodes--; if (cur->rule.ptr->states <= 0 && @@ -760,12 +936,17 @@ pf_src_tree_remove_state(struct pf_state *s) u_int32_t timeout; if (s->src_node != NULL) { + if (s->proto == IPPROTO_TCP) { + if (s->src.state == PF_TCPS_PROXY_DST || + s->timeout >= PFTM_TCP_ESTABLISHED) + --s->src_node->conn; + } if (--s->src_node->states <= 0) { timeout = s->rule.ptr->timeout[PFTM_SRC_NODE]; if (!timeout) timeout = pf_default_rule.timeout[PFTM_SRC_NODE]; - s->src_node->expire = time.tv_sec + timeout; + s->src_node->expire = time_second + timeout; } } if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) { @@ -774,12 +955,50 @@ pf_src_tree_remove_state(struct pf_state *s) if (!timeout) timeout = pf_default_rule.timeout[PFTM_SRC_NODE]; - s->nat_src_node->expire = time.tv_sec + timeout; + s->nat_src_node->expire = time_second + timeout; } } s->src_node = s->nat_src_node = NULL; } +void +pf_purge_expired_state(struct pf_state *cur) +{ + if (cur->src.state == PF_TCPS_PROXY_DST) + pf_send_tcp(cur->rule.ptr, cur->af, + &cur->ext.addr, &cur->lan.addr, + cur->ext.port, cur->lan.port, + cur->src.seqhi, cur->src.seqlo + 1, + TH_RST|TH_ACK, 0, 0, 0, 1, NULL, NULL); + RB_REMOVE(pf_state_tree_ext_gwy, + &cur->u.s.kif->pfik_ext_gwy, cur); + RB_REMOVE(pf_state_tree_lan_ext, + &cur->u.s.kif->pfik_lan_ext, cur); + RB_REMOVE(pf_state_tree_id, &tree_id, cur); +#if NPFSYNC + pfsync_delete_state(cur); +#endif + pf_src_tree_remove_state(cur); + if (--cur->rule.ptr->states <= 0 && + cur->rule.ptr->src_nodes <= 0) + pf_rm_rule(NULL, cur->rule.ptr); + if (cur->nat_rule.ptr != NULL) + if (--cur->nat_rule.ptr->states <= 0 && + cur->nat_rule.ptr->src_nodes <= 0) + pf_rm_rule(NULL, cur->nat_rule.ptr); + if (cur->anchor.ptr != NULL) + if (--cur->anchor.ptr->states <= 0) + pf_rm_rule(NULL, cur->anchor.ptr); + pf_normalize_tcp_cleanup(cur); + pfi_detach_state(cur->u.s.kif); + TAILQ_REMOVE(&state_updates, cur, u.s.entry_updates); + if (cur->tag) + pf_tag_unref(cur->tag); + pool_put(&pf_state_pl, cur); + pf_status.fcounters[FCNT_STATE_REMOVALS]++; + pf_status.states--; +} + void pf_purge_expired_states(void) { @@ -788,40 +1007,8 @@ pf_purge_expired_states(void) for (cur = RB_MIN(pf_state_tree_id, &tree_id); cur; cur = next) { next = RB_NEXT(pf_state_tree_id, &tree_id, cur); - - if (pf_state_expires(cur) <= time.tv_sec) { - if (cur->src.state == PF_TCPS_PROXY_DST) - pf_send_tcp(cur->rule.ptr, cur->af, - &cur->ext.addr, &cur->lan.addr, - cur->ext.port, cur->lan.port, - cur->src.seqhi, cur->src.seqlo + 1, 0, - TH_RST|TH_ACK, 0, 0); - RB_REMOVE(pf_state_tree_ext_gwy, - &cur->u.s.kif->pfik_ext_gwy, cur); - RB_REMOVE(pf_state_tree_lan_ext, - &cur->u.s.kif->pfik_lan_ext, cur); - RB_REMOVE(pf_state_tree_id, &tree_id, cur); -#if NPFSYNC - pfsync_delete_state(cur); -#endif - pf_src_tree_remove_state(cur); - if (--cur->rule.ptr->states <= 0 && - cur->rule.ptr->src_nodes <= 0) - pf_rm_rule(NULL, cur->rule.ptr); - if (cur->nat_rule.ptr != NULL) - if (--cur->nat_rule.ptr->states <= 0 && - cur->nat_rule.ptr->src_nodes <= 0) - pf_rm_rule(NULL, cur->nat_rule.ptr); - if (cur->anchor.ptr != NULL) - if (--cur->anchor.ptr->states <= 0) - pf_rm_rule(NULL, cur->anchor.ptr); - pf_normalize_tcp_cleanup(cur); - pfi_detach_state(cur->u.s.kif); - TAILQ_REMOVE(&state_updates, cur, u.s.entry_updates); - pool_put(&pf_state_pl, cur); - pf_status.fcounters[FCNT_STATE_REMOVALS]++; - pf_status.states--; - } + if (pf_state_expires(cur) <= time_second) + pf_purge_expired_state(cur); } } @@ -1011,14 +1198,14 @@ pf_calc_skip_steps(struct pf_rulequeue *rules) PF_SET_SKIP_STEPS(PF_SKIP_AF); if (cur->proto != prev->proto) PF_SET_SKIP_STEPS(PF_SKIP_PROTO); - if (cur->src.not != prev->src.not || + if (cur->src.neg != prev->src.neg || pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr)) PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR); if (cur->src.port[0] != prev->src.port[0] || cur->src.port[1] != prev->src.port[1] || cur->src.port_op != prev->src.port_op) PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT); - if (cur->dst.not != prev->dst.not || + if (cur->dst.neg != prev->dst.neg || pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr)) PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR); if (cur->dst.port[0] != prev->dst.port[0] || @@ -1057,21 +1244,6 @@ pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2) } } -void -pf_update_anchor_rules() -{ - struct pf_rule *rule; - int i; - - for (i = 0; i < PF_RULESET_MAX; ++i) - TAILQ_FOREACH(rule, pf_main_ruleset.rules[i].active.ptr, - entries) - if (rule->anchorname[0]) - rule->anchor = pf_find_anchor(rule->anchorname); - else - rule->anchor = NULL; -} - u_int16_t pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp) { @@ -1256,10 +1428,10 @@ void pf_send_tcp(const struct pf_rule *r, sa_family_t af, const struct pf_addr *saddr, const struct pf_addr *daddr, u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack, - u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl) + u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag, + struct ether_header *eh, struct ifnet *ifp) { struct mbuf *m; - struct m_tag *mtag; int len, tlen; #ifdef INET struct ip *h; @@ -1289,17 +1461,22 @@ pf_send_tcp(const struct pf_rule *r, sa_family_t af, } /* create outgoing mbuf */ - mtag = m_tag_get(PACKET_TAG_PF_GENERATED, 0, M_NOWAIT); - if (mtag == NULL) - return; m = m_gethdr(M_DONTWAIT, MT_HEADER); - if (m == NULL) { - m_tag_free(mtag); + if (m == NULL) return; + if (tag) { + struct m_tag *mtag; + + mtag = m_tag_get(PACKET_TAG_PF_GENERATED, 0, M_NOWAIT); + if (mtag == NULL) { + m_freem(m); + return; + } + m_tag_prepend(m, mtag); } - m_tag_prepend(m, mtag); #ifdef ALTQ if (r != NULL && r->qid) { + struct m_tag *mtag; struct altq_tag *atag; mtag = m_tag_get(PACKET_TAG_PF_QID, sizeof(*atag), M_NOWAIT); @@ -1312,7 +1489,7 @@ pf_send_tcp(const struct pf_rule *r, sa_family_t af, m_tag_prepend(m, mtag); } } -#endif +#endif /* ALTQ */ m->m_data += max_linkhdr; m->m_pkthdr.len = m->m_len = len; m->m_pkthdr.rcvif = NULL; @@ -1377,8 +1554,28 @@ pf_send_tcp(const struct pf_rule *r, sa_family_t af, h->ip_off = htons(ip_mtudisc ? IP_DF : 0); h->ip_ttl = ttl ? ttl : ip_defttl; h->ip_sum = 0; - ip_output(m, (void *)NULL, (void *)NULL, 0, (void *)NULL, - (void *)NULL); + if (eh == NULL) { + ip_output(m, (void *)NULL, (void *)NULL, 0, + (void *)NULL, (void *)NULL); + } else { + struct route ro; + struct rtentry rt; + struct ether_header *e = (void *)ro.ro_dst.sa_data; + + if (ifp == NULL) { + m_freem(m); + return; + } + rt.rt_ifp = ifp; + ro.ro_rt = &rt; + ro.ro_dst.sa_len = sizeof(ro.ro_dst); + ro.ro_dst.sa_family = pseudo_AF_HDRCMPLT; + bcopy(eh->ether_dhost, e->ether_shost, ETHER_ADDR_LEN); + bcopy(eh->ether_shost, e->ether_dhost, ETHER_ADDR_LEN); + e->ether_type = eh->ether_type; + ip_output(m, (void *)NULL, &ro, IP_ROUTETOETHER, + (void *)NULL, (void *)NULL); + } break; #endif /* INET */ #ifdef INET6 @@ -1427,7 +1624,7 @@ pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af, m_tag_prepend(m0, mtag); } } -#endif +#endif /* ALTQ */ switch (af) { #ifdef INET @@ -1552,17 +1749,14 @@ pf_get_tag(struct mbuf *m) } int -pf_match_tag(struct mbuf *m, struct pf_rule *r, struct pf_rule *nat_rule, - struct pf_tag *pftag, int *tag) +pf_match_tag(struct mbuf *m, struct pf_rule *r, struct pf_tag **pftag, int *tag) { if (*tag == -1) { /* find mbuf tag */ - pftag = pf_get_tag(m); - if (pftag != NULL) - *tag = pftag->tag; + *pftag = pf_get_tag(m); + if (*pftag != NULL) + *tag = (*pftag)->tag; else *tag = 0; - if (nat_rule != NULL && nat_rule->tag) - *tag = nat_rule->tag; } return ((!r->match_tag_not && r->match_tag == *tag) || @@ -1589,36 +1783,66 @@ pf_tag_packet(struct mbuf *m, struct pf_tag *pftag, int tag) return (0); } -#define PF_STEP_INTO_ANCHOR(r, a, s, n) \ - do { \ - if ((r) == NULL || (r)->anchor == NULL || \ - (s) != NULL || (a) != NULL) \ - panic("PF_STEP_INTO_ANCHOR"); \ - (a) = (r); \ - (s) = TAILQ_FIRST(&(r)->anchor->rulesets); \ - (r) = NULL; \ - while ((s) != NULL && ((r) = \ - TAILQ_FIRST((s)->rules[n].active.ptr)) == NULL) \ - (s) = TAILQ_NEXT((s), entries); \ - if ((r) == NULL) { \ - (r) = TAILQ_NEXT((a), entries); \ - (a) = NULL; \ - } \ - } while (0) +static void +pf_step_into_anchor(int *depth, struct pf_ruleset **rs, int n, + struct pf_rule **r, struct pf_rule **a) +{ + struct pf_anchor_stackframe *f; -#define PF_STEP_OUT_OF_ANCHOR(r, a, s, n) \ - do { \ - if ((r) != NULL || (a) == NULL || (s) == NULL) \ - panic("PF_STEP_OUT_OF_ANCHOR"); \ - (s) = TAILQ_NEXT((s), entries); \ - while ((s) != NULL && ((r) = \ - TAILQ_FIRST((s)->rules[n].active.ptr)) == NULL) \ - (s) = TAILQ_NEXT((s), entries); \ - if ((r) == NULL) { \ - (r) = TAILQ_NEXT((a), entries); \ - (a) = NULL; \ - } \ - } while (0) + if (*depth >= sizeof(pf_anchor_stack) / + sizeof(pf_anchor_stack[0])) { + printf("pf_step_into_anchor: stack overflow\n"); + *r = TAILQ_NEXT(*r, entries); + return; + } else if (*depth == 0 && a != NULL) + *a = *r; + f = pf_anchor_stack + (*depth)++; + f->rs = *rs; + f->r = *r; + if ((*r)->anchor_wildcard) { + f->parent = &(*r)->anchor->children; + if ((f->child = RB_MIN(pf_anchor_node, f->parent)) == + NULL) { + *r = NULL; + return; + } + *rs = &f->child->ruleset; + } else { + f->parent = NULL; + f->child = NULL; + *rs = &(*r)->anchor->ruleset; + } + *r = TAILQ_FIRST((*rs)->rules[n].active.ptr); +} + +static void +pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, int n, + struct pf_rule **r, struct pf_rule **a) +{ + struct pf_anchor_stackframe *f; + + do { + if (*depth <= 0) + break; + f = pf_anchor_stack + *depth - 1; + if (f->parent != NULL && f->child != NULL) { + f->child = RB_NEXT(pf_anchor_node, f->parent, f->child); + if (f->child != NULL) { + *rs = &f->child->ruleset; + *r = TAILQ_FIRST((*rs)->rules[n].active.ptr); + if (*r == NULL) + continue; + else + break; + } + } + (*depth)--; + if (*depth == 0 && a != NULL) + *a = NULL; + *rs = f->rs; + *r = TAILQ_NEXT(f->r, entries); + } while (*r == NULL); +} #ifdef INET6 void @@ -1772,20 +1996,27 @@ pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr, if (rpool->cur->addr.type == PF_ADDR_NOROUTE) return (1); if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { - if (af == AF_INET) { + switch (af) { +#ifdef INET + case AF_INET: if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 && (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) return (1); raddr = &rpool->cur->addr.p.dyn->pfid_addr4; rmask = &rpool->cur->addr.p.dyn->pfid_mask4; - } else { + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 && (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) return (1); raddr = &rpool->cur->addr.p.dyn->pfid_addr6; rmask = &rpool->cur->addr.p.dyn->pfid_mask6; + break; +#endif /* INET6 */ } } else if (rpool->cur->addr.type == PF_ADDR_TABLE) { if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) @@ -1807,25 +2038,29 @@ pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr, switch (af) { #ifdef INET case AF_INET: - rpool->counter.addr32[0] = arc4random(); + rpool->counter.addr32[0] = htonl(arc4random()); break; #endif /* INET */ #ifdef INET6 case AF_INET6: if (rmask->addr32[3] != 0xffffffff) - rpool->counter.addr32[3] = arc4random(); + rpool->counter.addr32[3] = + htonl(arc4random()); else break; if (rmask->addr32[2] != 0xffffffff) - rpool->counter.addr32[2] = arc4random(); + rpool->counter.addr32[2] = + htonl(arc4random()); else break; if (rmask->addr32[1] != 0xffffffff) - rpool->counter.addr32[1] = arc4random(); + rpool->counter.addr32[1] = + htonl(arc4random()); else break; if (rmask->addr32[0] != 0xffffffff) - rpool->counter.addr32[0] = arc4random(); + rpool->counter.addr32[0] = + htonl(arc4random()); break; #endif /* INET6 */ } @@ -1886,6 +2121,8 @@ pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr, get_addr: PF_ACPY(naddr, &rpool->counter, af); + if (init_addr != NULL && PF_AZERO(init_addr, af)) + PF_ACPY(init_addr, naddr, af); PF_AINC(&rpool->counter, af); break; } @@ -1928,7 +2165,7 @@ pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r, * similar 2 portloop in in_pcbbind */ if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP)) { - key.gwy.port = 0; + key.gwy.port = dport; if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL) return (0); } else if (low == 0 && high == 0) { @@ -1950,7 +2187,7 @@ pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r, high = tmp; } /* low < high */ - cut = arc4random() % (1 + high - low) + low; + cut = htonl(arc4random()) % (1 + high - low) + low; /* low <= cut <= high */ for (tmp = cut; tmp <= high; ++(tmp)) { key.gwy.port = htons(tmp); @@ -1992,8 +2229,11 @@ pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction, struct pfi_kif *kif, struct pf_addr *saddr, u_int16_t sport, struct pf_addr *daddr, u_int16_t dport, int rs_num) { - struct pf_rule *r, *rm = NULL, *anchorrule = NULL; + struct pf_rule *r, *rm = NULL; struct pf_ruleset *ruleset = NULL; + struct pf_tag *pftag = NULL; + int tag = -1; + int asd = 0; r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr); while (r && rm == NULL) { @@ -2019,7 +2259,7 @@ pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off, r = r->skip[PF_SKIP_AF].ptr; else if (r->proto && r->proto != pd->proto) r = r->skip[PF_SKIP_PROTO].ptr; - else if (PF_MISMATCHAW(&src->addr, saddr, pd->af, src->not)) + else if (PF_MISMATCHAW(&src->addr, saddr, pd->af, src->neg)) r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR : PF_SKIP_DST_ADDR].ptr; else if (src->port_op && !pf_match_port(src->port_op, @@ -2027,7 +2267,7 @@ pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off, r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT : PF_SKIP_DST_PORT].ptr; else if (dst != NULL && - PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->not)) + PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->neg)) r = r->skip[PF_SKIP_DST_ADDR].ptr; else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af, 0)) r = TAILQ_NEXT(r, entries); @@ -2035,20 +2275,25 @@ pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off, !pf_match_port(dst->port_op, dst->port[0], dst->port[1], dport)) r = r->skip[PF_SKIP_DST_PORT].ptr; + else if (r->match_tag && !pf_match_tag(m, r, &pftag, &tag)) + r = TAILQ_NEXT(r, entries); else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto != IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, m, off, pd->hdr.tcp), r->os_fingerprint))) r = TAILQ_NEXT(r, entries); - else if (r->anchorname[0] && r->anchor == NULL) - r = TAILQ_NEXT(r, entries); - else if (r->anchor == NULL) + else { + if (r->tag) + tag = r->tag; + if (r->anchor == NULL) { rm = r; - else - PF_STEP_INTO_ANCHOR(r, anchorrule, ruleset, rs_num); - if (r == NULL && anchorrule != NULL) - PF_STEP_OUT_OF_ANCHOR(r, anchorrule, ruleset, - rs_num); + } else + pf_step_into_anchor(&asd, &ruleset, rs_num, &r, NULL); + } + if (r == NULL) + pf_step_out_of_anchor(&asd, &ruleset, rs_num, &r, NULL); } + if (pf_tag_packet(m, pftag, tag)) + return (NULL); if (rm != NULL && (rm->action == PF_NONAT || rm->action == PF_NORDR || rm->action == PF_NOBINAT)) return (NULL); @@ -2100,7 +2345,9 @@ pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction, switch (direction) { case PF_OUT: if (r->rpool.cur->addr.type == PF_ADDR_DYNIFTL){ - if (pd->af == AF_INET) { + switch (pd->af) { +#ifdef INET + case AF_INET: if (r->rpool.cur->addr.p.dyn-> pfid_acnt4 < 1) return (NULL); @@ -2110,7 +2357,10 @@ pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction, &r->rpool.cur->addr.p.dyn-> pfid_mask4, saddr, AF_INET); - } else { + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: if (r->rpool.cur->addr.p.dyn-> pfid_acnt6 < 1) return (NULL); @@ -2120,6 +2370,8 @@ pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction, &r->rpool.cur->addr.p.dyn-> pfid_mask6, saddr, AF_INET6); + break; +#endif /* INET6 */ } } else PF_POOLMASK(naddr, @@ -2128,8 +2380,10 @@ pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction, saddr, pd->af); break; case PF_IN: - if (r->rpool.cur->addr.type == PF_ADDR_DYNIFTL){ - if (pd->af == AF_INET) { + if (r->src.addr.type == PF_ADDR_DYNIFTL) { + switch (pd->af) { +#ifdef INET + case AF_INET: if (r->src.addr.p.dyn-> pfid_acnt4 < 1) return (NULL); @@ -2139,7 +2393,10 @@ pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction, &r->src.addr.p.dyn-> pfid_mask4, daddr, AF_INET); - } else { + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: if (r->src.addr.p.dyn-> pfid_acnt6 < 1) return (NULL); @@ -2149,6 +2406,8 @@ pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction, &r->src.addr.p.dyn-> pfid_mask6, daddr, AF_INET6); + break; +#endif /* INET6 */ } } else PF_POOLMASK(naddr, @@ -2159,7 +2418,7 @@ pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction, } break; case PF_RDR: { - if (pf_map_addr(r->af, r, saddr, naddr, NULL, sn)) + if (pf_map_addr(pd->af, r, saddr, naddr, NULL, sn)) return (NULL); if (r->rpool.proxy_port[1]) { @@ -2224,6 +2483,7 @@ pf_socket_lookup(uid_t *uid, gid_t *gid, int direction, struct pf_pdesc *pd) daddr = pd->src; } switch (pd->af) { +#ifdef INET case AF_INET: inp = in_pcbhashlookup(tb, saddr->v4, sport, daddr->v4, dport); if (inp == NULL) { @@ -2232,6 +2492,7 @@ pf_socket_lookup(uid_t *uid, gid_t *gid, int direction, struct pf_pdesc *pd) return (0); } break; +#endif /* INET */ #ifdef INET6 case AF_INET6: inp = in6_pcbhashlookup(tb, &saddr->v6, sport, &daddr->v6, @@ -2316,6 +2577,7 @@ pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af) break; case TCPOPT_MAXSEG: bcopy((caddr_t)(opt + 2), (caddr_t)&mss, 2); + NTOHS(mss); /* FALLTHROUGH */ default: optlen = opt[1]; @@ -2410,7 +2672,8 @@ pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr) int pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction, struct pfi_kif *kif, struct mbuf *m, int off, void *h, - struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm) + struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm, + struct ifqueue *ifq) { struct pf_rule *nr = NULL; struct pf_addr *saddr = pd->src, *daddr = pd->dst; @@ -2428,6 +2691,12 @@ pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction, struct pf_tag *pftag = NULL; int tag = -1; u_int16_t mss = tcp_mssdflt; + int asd = 0; + + if (pf_check_congestion(ifq)) { + REASON_SET(&reason, PFRES_CONGEST); + return (PF_DROP); + } r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); @@ -2472,12 +2741,12 @@ pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction, r = r->skip[PF_SKIP_AF].ptr; else if (r->proto && r->proto != IPPROTO_TCP) r = r->skip[PF_SKIP_PROTO].ptr; - else if (PF_MISMATCHAW(&r->src.addr, saddr, af, r->src.not)) + else if (PF_MISMATCHAW(&r->src.addr, saddr, af, r->src.neg)) r = r->skip[PF_SKIP_SRC_ADDR].ptr; else if (r->src.port_op && !pf_match_port(r->src.port_op, r->src.port[0], r->src.port[1], th->th_sport)) r = r->skip[PF_SKIP_SRC_PORT].ptr; - else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, r->dst.not)) + else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, r->dst.neg)) r = r->skip[PF_SKIP_DST_ADDR].ptr; else if (r->dst.port_op && !pf_match_port(r->dst.port_op, r->dst.port[0], r->dst.port[1], th->th_dport)) @@ -2498,9 +2767,9 @@ pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction, !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1], gid)) r = TAILQ_NEXT(r, entries); - else if (r->match_tag && !pf_match_tag(m, r, nr, pftag, &tag)) + else if (r->prob && r->prob <= arc4random()) r = TAILQ_NEXT(r, entries); - else if (r->anchorname[0] && r->anchor == NULL) + else if (r->match_tag && !pf_match_tag(m, r, &pftag, &tag)) r = TAILQ_NEXT(r, entries); else if (r->os_fingerprint != PF_OSFP_ANY && !pf_osfp_match( pf_osfp_fingerprint(pd, m, off, th), r->os_fingerprint)) @@ -2516,12 +2785,12 @@ pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction, break; r = TAILQ_NEXT(r, entries); } else - PF_STEP_INTO_ANCHOR(r, a, ruleset, - PF_RULESET_FILTER); + pf_step_into_anchor(&asd, &ruleset, + PF_RULESET_FILTER, &r, &a); } - if (r == NULL && a != NULL) - PF_STEP_OUT_OF_ANCHOR(r, a, ruleset, - PF_RULESET_FILTER); + if (r == NULL) + pf_step_out_of_anchor(&asd, &ruleset, + PF_RULESET_FILTER, &r, &a); } r = *rm; a = *am; @@ -2563,7 +2832,7 @@ pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction, pf_send_tcp(r, af, pd->dst, pd->src, th->th_dport, th->th_sport, ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0, - r->return_ttl); + r->return_ttl, 1, pd->eh, kif->pfik_ifp); } else if ((af == AF_INET) && r->return_icmp) pf_send_icmp(m, r->return_icmp >> 8, r->return_icmp & 255, af, r); @@ -2590,21 +2859,29 @@ pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction, len = pd->tot_len - off - (th->th_off << 2); /* check maximums */ - if (r->max_states && (r->states >= r->max_states)) + if (r->max_states && (r->states >= r->max_states)) { + pf_status.lcounters[LCNT_STATES]++; + REASON_SET(&reason, PFRES_MAXSTATES); goto cleanup; + } /* src node for flter rule */ if ((r->rule_flag & PFRULE_SRCTRACK || r->rpool.opts & PF_POOL_STICKYADDR) && - pf_insert_src_node(&sn, r, saddr, af) != 0) + pf_insert_src_node(&sn, r, saddr, af) != 0) { + REASON_SET(&reason, PFRES_SRCLIMIT); goto cleanup; + } /* src node for translation rule */ if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) && ((direction == PF_OUT && pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) || - (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) + (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) { + REASON_SET(&reason, PFRES_SRCLIMIT); goto cleanup; + } s = pool_get(&pf_state_pl, PR_NOWAIT); if (s == NULL) { + REASON_SET(&reason, PFRES_MEMORY); cleanup: if (sn != NULL && sn->states == 0 && sn->expire == 0) { RB_REMOVE(pf_src_tree, &tree_src_tracking, sn); @@ -2619,18 +2896,13 @@ cleanup: pf_status.src_nodes--; pool_put(&pf_src_tree_pl, nsn); } - REASON_SET(&reason, PFRES_MEMORY); return (PF_DROP); } bzero(s, sizeof(*s)); - r->states++; - if (a != NULL) - a->states++; s->rule.ptr = r; s->nat_rule.ptr = nr; - if (s->nat_rule.ptr != NULL) - s->nat_rule.ptr->states++; s->anchor.ptr = a; + STATE_INC_COUNTERS(s); s->allow_opts = r->allow_opts; s->log = r->log & 2; s->proto = IPPROTO_TCP; @@ -2667,7 +2939,7 @@ cleanup: if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN && r->keep_state == PF_STATE_MODULATE) { /* Generate sequence number modulator */ - while ((s->src.seqdiff = arc4random()) == 0) + while ((s->src.seqdiff = htonl(arc4random())) == 0) ; pf_change_a(&th->th_seq, &th->th_sum, htonl(s->src.seqlo + s->src.seqdiff), 0); @@ -2692,8 +2964,8 @@ cleanup: s->dst.max_win = 1; s->src.state = TCPS_SYN_SENT; s->dst.state = TCPS_CLOSED; - s->creation = time.tv_sec; - s->expire = time.tv_sec; + s->creation = time_second; + s->expire = time_second; s->timeout = PFTM_TCP_FIRST_PACKET; pf_set_rt_ifp(s, saddr); if (sn != NULL) { @@ -2709,25 +2981,35 @@ cleanup: off, pd, th, &s->src, &s->dst)) { REASON_SET(&reason, PFRES_MEMORY); pf_src_tree_remove_state(s); + STATE_DEC_COUNTERS(s); pool_put(&pf_state_pl, s); return (PF_DROP); } if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub && - pf_normalize_tcp_stateful(m, off, pd, &reason, th, &s->src, - &s->dst, &rewrite)) { + pf_normalize_tcp_stateful(m, off, pd, &reason, th, s, + &s->src, &s->dst, &rewrite)) { + /* This really shouldn't happen!!! */ + DPFPRINTF(PF_DEBUG_URGENT, + ("pf_normalize_tcp_stateful failed on first pkt")); pf_normalize_tcp_cleanup(s); pf_src_tree_remove_state(s); + STATE_DEC_COUNTERS(s); pool_put(&pf_state_pl, s); return (PF_DROP); } if (pf_insert_state(BOUND_IFACE(r, kif), s)) { pf_normalize_tcp_cleanup(s); - REASON_SET(&reason, PFRES_MEMORY); + REASON_SET(&reason, PFRES_STATEINS); pf_src_tree_remove_state(s); + STATE_DEC_COUNTERS(s); pool_put(&pf_state_pl, s); return (PF_DROP); } else *sm = s; + if (tag > 0) { + pf_tag_ref(tag); + s->tag = tag; + } if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN && r->keep_state == PF_STATE_SYNPROXY) { s->src.state = PF_TCPS_PROXY_SRC; @@ -2742,7 +3024,7 @@ cleanup: bport, 0, af); } } - s->src.seqhi = arc4random(); + s->src.seqhi = htonl(arc4random()); /* Find mss option */ mss = pf_get_mss(m, off, th->th_off, af); mss = pf_calc_mss(saddr, af, mss); @@ -2750,7 +3032,8 @@ cleanup: s->src.mss = mss; pf_send_tcp(r, af, daddr, saddr, th->th_dport, th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1, - TH_SYN|TH_ACK, 0, s->src.mss, 0); + TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, NULL, NULL); + REASON_SET(&reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); } } @@ -2765,7 +3048,8 @@ cleanup: int pf_test_udp(struct pf_rule **rm, struct pf_state **sm, int direction, struct pfi_kif *kif, struct mbuf *m, int off, void *h, - struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm) + struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm, + struct ifqueue *ifq) { struct pf_rule *nr = NULL; struct pf_addr *saddr = pd->src, *daddr = pd->dst; @@ -2782,6 +3066,12 @@ pf_test_udp(struct pf_rule **rm, struct pf_state **sm, int direction, int rewrite = 0; struct pf_tag *pftag = NULL; int tag = -1; + int asd = 0; + + if (pf_check_congestion(ifq)) { + REASON_SET(&reason, PFRES_CONGEST); + return (PF_DROP); + } r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); @@ -2826,12 +3116,12 @@ pf_test_udp(struct pf_rule **rm, struct pf_state **sm, int direction, r = r->skip[PF_SKIP_AF].ptr; else if (r->proto && r->proto != IPPROTO_UDP) r = r->skip[PF_SKIP_PROTO].ptr; - else if (PF_MISMATCHAW(&r->src.addr, saddr, af, r->src.not)) + else if (PF_MISMATCHAW(&r->src.addr, saddr, af, r->src.neg)) r = r->skip[PF_SKIP_SRC_ADDR].ptr; else if (r->src.port_op && !pf_match_port(r->src.port_op, r->src.port[0], r->src.port[1], uh->uh_sport)) r = r->skip[PF_SKIP_SRC_PORT].ptr; - else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, r->dst.not)) + else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, r->dst.neg)) r = r->skip[PF_SKIP_DST_ADDR].ptr; else if (r->dst.port_op && !pf_match_port(r->dst.port_op, r->dst.port[0], r->dst.port[1], uh->uh_dport)) @@ -2850,9 +3140,9 @@ pf_test_udp(struct pf_rule **rm, struct pf_state **sm, int direction, !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1], gid)) r = TAILQ_NEXT(r, entries); - else if (r->match_tag && !pf_match_tag(m, r, nr, pftag, &tag)) + else if (r->prob && r->prob <= arc4random()) r = TAILQ_NEXT(r, entries); - else if (r->anchorname[0] && r->anchor == NULL) + else if (r->match_tag && !pf_match_tag(m, r, &pftag, &tag)) r = TAILQ_NEXT(r, entries); else if (r->os_fingerprint != PF_OSFP_ANY) r = TAILQ_NEXT(r, entries); @@ -2867,12 +3157,12 @@ pf_test_udp(struct pf_rule **rm, struct pf_state **sm, int direction, break; r = TAILQ_NEXT(r, entries); } else - PF_STEP_INTO_ANCHOR(r, a, ruleset, - PF_RULESET_FILTER); + pf_step_into_anchor(&asd, &ruleset, + PF_RULESET_FILTER, &r, &a); } - if (r == NULL && a != NULL) - PF_STEP_OUT_OF_ANCHOR(r, a, ruleset, - PF_RULESET_FILTER); + if (r == NULL) + pf_step_out_of_anchor(&asd, &ruleset, + PF_RULESET_FILTER, &r, &a); } r = *rm; a = *am; @@ -2923,21 +3213,29 @@ pf_test_udp(struct pf_rule **rm, struct pf_state **sm, int direction, struct pf_src_node *sn = NULL; /* check maximums */ - if (r->max_states && (r->states >= r->max_states)) + if (r->max_states && (r->states >= r->max_states)) { + pf_status.lcounters[LCNT_STATES]++; + REASON_SET(&reason, PFRES_MAXSTATES); goto cleanup; + } /* src node for flter rule */ if ((r->rule_flag & PFRULE_SRCTRACK || r->rpool.opts & PF_POOL_STICKYADDR) && - pf_insert_src_node(&sn, r, saddr, af) != 0) + pf_insert_src_node(&sn, r, saddr, af) != 0) { + REASON_SET(&reason, PFRES_SRCLIMIT); goto cleanup; + } /* src node for translation rule */ if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) && ((direction == PF_OUT && pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) || - (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) + (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) { + REASON_SET(&reason, PFRES_SRCLIMIT); goto cleanup; + } s = pool_get(&pf_state_pl, PR_NOWAIT); if (s == NULL) { + REASON_SET(&reason, PFRES_MEMORY); cleanup: if (sn != NULL && sn->states == 0 && sn->expire == 0) { RB_REMOVE(pf_src_tree, &tree_src_tracking, sn); @@ -2952,18 +3250,13 @@ cleanup: pf_status.src_nodes--; pool_put(&pf_src_tree_pl, nsn); } - REASON_SET(&reason, PFRES_MEMORY); return (PF_DROP); } bzero(s, sizeof(*s)); - r->states++; - if (a != NULL) - a->states++; s->rule.ptr = r; s->nat_rule.ptr = nr; - if (s->nat_rule.ptr != NULL) - s->nat_rule.ptr->states++; s->anchor.ptr = a; + STATE_INC_COUNTERS(s); s->allow_opts = r->allow_opts; s->log = r->log & 2; s->proto = IPPROTO_UDP; @@ -2996,8 +3289,8 @@ cleanup: } s->src.state = PFUDPS_SINGLE; s->dst.state = PFUDPS_NO_TRAFFIC; - s->creation = time.tv_sec; - s->expire = time.tv_sec; + s->creation = time_second; + s->expire = time_second; s->timeout = PFTM_UDP_FIRST_PACKET; pf_set_rt_ifp(s, saddr); if (sn != NULL) { @@ -3010,12 +3303,17 @@ cleanup: s->nat_src_node->states++; } if (pf_insert_state(BOUND_IFACE(r, kif), s)) { - REASON_SET(&reason, PFRES_MEMORY); + REASON_SET(&reason, PFRES_STATEINS); pf_src_tree_remove_state(s); + STATE_DEC_COUNTERS(s); pool_put(&pf_state_pl, s); return (PF_DROP); } else *sm = s; + if (tag > 0) { + pf_tag_ref(tag); + s->tag = tag; + } } /* copy back packet headers if we performed NAT operations */ @@ -3028,7 +3326,8 @@ cleanup: int pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction, struct pfi_kif *kif, struct mbuf *m, int off, void *h, - struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm) + struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm, + struct ifqueue *ifq) { struct pf_rule *nr = NULL; struct pf_addr *saddr = pd->src, *daddr = pd->dst; @@ -3045,6 +3344,12 @@ pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction, #ifdef INET6 int rewrite = 0; #endif /* INET6 */ + int asd = 0; + + if (pf_check_congestion(ifq)) { + REASON_SET(&reason, PFRES_CONGEST); + return (PF_DROP); + } switch (pd->proto) { #ifdef INET @@ -3081,7 +3386,7 @@ pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction, if (direction == PF_OUT) { /* check outgoing packet for BINAT/NAT */ if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn, - saddr, 0, daddr, 0, &pd->naddr, NULL)) != NULL) { + saddr, icmpid, daddr, icmpid, &pd->naddr, NULL)) != NULL) { PF_ACPY(&pd->baddr, saddr, af); switch (af) { #ifdef INET @@ -3105,7 +3410,7 @@ pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction, } else { /* check incoming packet for BINAT/RDR */ if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn, - saddr, 0, daddr, 0, &pd->naddr, NULL)) != NULL) { + saddr, icmpid, daddr, icmpid, &pd->naddr, NULL)) != NULL) { PF_ACPY(&pd->baddr, daddr, af); switch (af) { #ifdef INET @@ -3139,9 +3444,9 @@ pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction, r = r->skip[PF_SKIP_AF].ptr; else if (r->proto && r->proto != pd->proto) r = r->skip[PF_SKIP_PROTO].ptr; - else if (PF_MISMATCHAW(&r->src.addr, saddr, af, r->src.not)) + else if (PF_MISMATCHAW(&r->src.addr, saddr, af, r->src.neg)) r = r->skip[PF_SKIP_SRC_ADDR].ptr; - else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, r->dst.not)) + else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, r->dst.neg)) r = r->skip[PF_SKIP_DST_ADDR].ptr; else if (r->type && r->type != icmptype + 1) r = TAILQ_NEXT(r, entries); @@ -3151,9 +3456,9 @@ pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction, r = TAILQ_NEXT(r, entries); else if (r->rule_flag & PFRULE_FRAGMENT) r = TAILQ_NEXT(r, entries); - else if (r->match_tag && !pf_match_tag(m, r, nr, pftag, &tag)) + else if (r->prob && r->prob <= arc4random()) r = TAILQ_NEXT(r, entries); - else if (r->anchorname[0] && r->anchor == NULL) + else if (r->match_tag && !pf_match_tag(m, r, &pftag, &tag)) r = TAILQ_NEXT(r, entries); else if (r->os_fingerprint != PF_OSFP_ANY) r = TAILQ_NEXT(r, entries); @@ -3168,12 +3473,12 @@ pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction, break; r = TAILQ_NEXT(r, entries); } else - PF_STEP_INTO_ANCHOR(r, a, ruleset, - PF_RULESET_FILTER); + pf_step_into_anchor(&asd, &ruleset, + PF_RULESET_FILTER, &r, &a); } - if (r == NULL && a != NULL) - PF_STEP_OUT_OF_ANCHOR(r, a, ruleset, - PF_RULESET_FILTER); + if (r == NULL) + pf_step_out_of_anchor(&asd, &ruleset, + PF_RULESET_FILTER, &r, &a); } r = *rm; a = *am; @@ -3204,21 +3509,29 @@ pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction, struct pf_src_node *sn = NULL; /* check maximums */ - if (r->max_states && (r->states >= r->max_states)) + if (r->max_states && (r->states >= r->max_states)) { + pf_status.lcounters[LCNT_STATES]++; + REASON_SET(&reason, PFRES_MAXSTATES); goto cleanup; + } /* src node for flter rule */ if ((r->rule_flag & PFRULE_SRCTRACK || r->rpool.opts & PF_POOL_STICKYADDR) && - pf_insert_src_node(&sn, r, saddr, af) != 0) + pf_insert_src_node(&sn, r, saddr, af) != 0) { + REASON_SET(&reason, PFRES_SRCLIMIT); goto cleanup; + } /* src node for translation rule */ if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) && ((direction == PF_OUT && pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) || - (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) + (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) { + REASON_SET(&reason, PFRES_SRCLIMIT); goto cleanup; + } s = pool_get(&pf_state_pl, PR_NOWAIT); if (s == NULL) { + REASON_SET(&reason, PFRES_MEMORY); cleanup: if (sn != NULL && sn->states == 0 && sn->expire == 0) { RB_REMOVE(pf_src_tree, &tree_src_tracking, sn); @@ -3233,18 +3546,13 @@ cleanup: pf_status.src_nodes--; pool_put(&pf_src_tree_pl, nsn); } - REASON_SET(&reason, PFRES_MEMORY); return (PF_DROP); } bzero(s, sizeof(*s)); - r->states++; - if (a != NULL) - a->states++; s->rule.ptr = r; s->nat_rule.ptr = nr; - if (s->nat_rule.ptr != NULL) - s->nat_rule.ptr->states++; s->anchor.ptr = a; + STATE_INC_COUNTERS(s); s->allow_opts = r->allow_opts; s->log = r->log & 2; s->proto = pd->proto; @@ -3271,8 +3579,8 @@ cleanup: PF_ACPY(&s->gwy.addr, &s->lan.addr, af); s->gwy.port = icmpid; } - s->creation = time.tv_sec; - s->expire = time.tv_sec; + s->creation = time_second; + s->expire = time_second; s->timeout = PFTM_ICMP_FIRST_PACKET; pf_set_rt_ifp(s, saddr); if (sn != NULL) { @@ -3285,12 +3593,17 @@ cleanup: s->nat_src_node->states++; } if (pf_insert_state(BOUND_IFACE(r, kif), s)) { - REASON_SET(&reason, PFRES_MEMORY); + REASON_SET(&reason, PFRES_STATEINS); pf_src_tree_remove_state(s); + STATE_DEC_COUNTERS(s); pool_put(&pf_state_pl, s); return (PF_DROP); } else *sm = s; + if (tag > 0) { + pf_tag_ref(tag); + s->tag = tag; + } } #ifdef INET6 @@ -3306,7 +3619,7 @@ cleanup: int pf_test_other(struct pf_rule **rm, struct pf_state **sm, int direction, struct pfi_kif *kif, struct mbuf *m, int off, void *h, struct pf_pdesc *pd, - struct pf_rule **am, struct pf_ruleset **rsm) + struct pf_rule **am, struct pf_ruleset **rsm, struct ifqueue *ifq) { struct pf_rule *nr = NULL; struct pf_rule *r, *a = NULL; @@ -3317,6 +3630,12 @@ pf_test_other(struct pf_rule **rm, struct pf_state **sm, int direction, u_short reason; struct pf_tag *pftag = NULL; int tag = -1; + int asd = 0; + + if (pf_check_congestion(ifq)) { + REASON_SET(&reason, PFRES_CONGEST); + return (PF_DROP); + } r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); @@ -3377,17 +3696,17 @@ pf_test_other(struct pf_rule **rm, struct pf_state **sm, int direction, r = r->skip[PF_SKIP_AF].ptr; else if (r->proto && r->proto != pd->proto) r = r->skip[PF_SKIP_PROTO].ptr; - else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, r->src.not)) + else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, r->src.neg)) r = r->skip[PF_SKIP_SRC_ADDR].ptr; - else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, r->dst.not)) + else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, r->dst.neg)) r = r->skip[PF_SKIP_DST_ADDR].ptr; else if (r->tos && !(r->tos & pd->tos)) r = TAILQ_NEXT(r, entries); else if (r->rule_flag & PFRULE_FRAGMENT) r = TAILQ_NEXT(r, entries); - else if (r->match_tag && !pf_match_tag(m, r, nr, pftag, &tag)) + else if (r->prob && r->prob <= arc4random()) r = TAILQ_NEXT(r, entries); - else if (r->anchorname[0] && r->anchor == NULL) + else if (r->match_tag && !pf_match_tag(m, r, &pftag, &tag)) r = TAILQ_NEXT(r, entries); else if (r->os_fingerprint != PF_OSFP_ANY) r = TAILQ_NEXT(r, entries); @@ -3402,12 +3721,12 @@ pf_test_other(struct pf_rule **rm, struct pf_state **sm, int direction, break; r = TAILQ_NEXT(r, entries); } else - PF_STEP_INTO_ANCHOR(r, a, ruleset, - PF_RULESET_FILTER); + pf_step_into_anchor(&asd, &ruleset, + PF_RULESET_FILTER, &r, &a); } - if (r == NULL && a != NULL) - PF_STEP_OUT_OF_ANCHOR(r, a, ruleset, - PF_RULESET_FILTER); + if (r == NULL) + pf_step_out_of_anchor(&asd, &ruleset, + PF_RULESET_FILTER, &r, &a); } r = *rm; a = *am; @@ -3466,21 +3785,29 @@ pf_test_other(struct pf_rule **rm, struct pf_state **sm, int direction, struct pf_src_node *sn = NULL; /* check maximums */ - if (r->max_states && (r->states >= r->max_states)) + if (r->max_states && (r->states >= r->max_states)) { + pf_status.lcounters[LCNT_STATES]++; + REASON_SET(&reason, PFRES_MAXSTATES); goto cleanup; + } /* src node for flter rule */ if ((r->rule_flag & PFRULE_SRCTRACK || r->rpool.opts & PF_POOL_STICKYADDR) && - pf_insert_src_node(&sn, r, saddr, af) != 0) + pf_insert_src_node(&sn, r, saddr, af) != 0) { + REASON_SET(&reason, PFRES_SRCLIMIT); goto cleanup; + } /* src node for translation rule */ if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) && ((direction == PF_OUT && pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) || - (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) + (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) { + REASON_SET(&reason, PFRES_SRCLIMIT); goto cleanup; + } s = pool_get(&pf_state_pl, PR_NOWAIT); if (s == NULL) { + REASON_SET(&reason, PFRES_MEMORY); cleanup: if (sn != NULL && sn->states == 0 && sn->expire == 0) { RB_REMOVE(pf_src_tree, &tree_src_tracking, sn); @@ -3495,18 +3822,13 @@ cleanup: pf_status.src_nodes--; pool_put(&pf_src_tree_pl, nsn); } - REASON_SET(&reason, PFRES_MEMORY); return (PF_DROP); } bzero(s, sizeof(*s)); - r->states++; - if (a != NULL) - a->states++; s->rule.ptr = r; s->nat_rule.ptr = nr; - if (s->nat_rule.ptr != NULL) - s->nat_rule.ptr->states++; s->anchor.ptr = a; + STATE_INC_COUNTERS(s); s->allow_opts = r->allow_opts; s->log = r->log & 2; s->proto = pd->proto; @@ -3529,8 +3851,8 @@ cleanup: } s->src.state = PFOTHERS_SINGLE; s->dst.state = PFOTHERS_NO_TRAFFIC; - s->creation = time.tv_sec; - s->expire = time.tv_sec; + s->creation = time_second; + s->expire = time_second; s->timeout = PFTM_OTHER_FIRST_PACKET; pf_set_rt_ifp(s, saddr); if (sn != NULL) { @@ -3543,12 +3865,17 @@ cleanup: s->nat_src_node->states++; } if (pf_insert_state(BOUND_IFACE(r, kif), s)) { - REASON_SET(&reason, PFRES_MEMORY); + REASON_SET(&reason, PFRES_STATEINS); pf_src_tree_remove_state(s); + STATE_DEC_COUNTERS(s); pool_put(&pf_state_pl, s); return (PF_DROP); } else *sm = s; + if (tag > 0) { + pf_tag_ref(tag); + s->tag = tag; + } } return (PF_PASS); @@ -3565,6 +3892,7 @@ pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, u_short reason; struct pf_tag *pftag = NULL; int tag = -1; + int asd = 0; r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); while (r != NULL) { @@ -3578,9 +3906,9 @@ pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, r = r->skip[PF_SKIP_AF].ptr; else if (r->proto && r->proto != pd->proto) r = r->skip[PF_SKIP_PROTO].ptr; - else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, r->src.not)) + else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, r->src.neg)) r = r->skip[PF_SKIP_SRC_ADDR].ptr; - else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, r->dst.not)) + else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, r->dst.neg)) r = r->skip[PF_SKIP_DST_ADDR].ptr; else if (r->tos && !(r->tos & pd->tos)) r = TAILQ_NEXT(r, entries); @@ -3588,9 +3916,9 @@ pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, r->flagset || r->type || r->code || r->os_fingerprint != PF_OSFP_ANY) r = TAILQ_NEXT(r, entries); - else if (r->match_tag && !pf_match_tag(m, r, NULL, pftag, &tag)) + else if (r->prob && r->prob <= arc4random()) r = TAILQ_NEXT(r, entries); - else if (r->anchorname[0] && r->anchor == NULL) + else if (r->match_tag && !pf_match_tag(m, r, &pftag, &tag)) r = TAILQ_NEXT(r, entries); else { if (r->anchor == NULL) { @@ -3601,12 +3929,12 @@ pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, break; r = TAILQ_NEXT(r, entries); } else - PF_STEP_INTO_ANCHOR(r, a, ruleset, - PF_RULESET_FILTER); + pf_step_into_anchor(&asd, &ruleset, + PF_RULESET_FILTER, &r, &a); } - if (r == NULL && a != NULL) - PF_STEP_OUT_OF_ANCHOR(r, a, ruleset, - PF_RULESET_FILTER); + if (r == NULL) + pf_step_out_of_anchor(&asd, &ruleset, + PF_RULESET_FILTER, &r, &a); } r = *rm; a = *am; @@ -3636,7 +3964,7 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, struct pf_state key; struct tcphdr *th = pd->hdr.tcp; u_int16_t win = ntohs(th->th_win); - u_int32_t ack, end, seq; + u_int32_t ack, end, seq, orig_seq; u_int8_t sws, dws; int ackskew; int copyback = 0; @@ -3667,21 +3995,32 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, } if ((*state)->src.state == PF_TCPS_PROXY_SRC) { - if (direction != (*state)->direction) + if (direction != (*state)->direction) { + REASON_SET(reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); + } if (th->th_flags & TH_SYN) { - if (ntohl(th->th_seq) != (*state)->src.seqlo) + if (ntohl(th->th_seq) != (*state)->src.seqlo) { + REASON_SET(reason, PFRES_SYNPROXY); return (PF_DROP); + } pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, pd->src, th->th_dport, th->th_sport, (*state)->src.seqhi, ntohl(th->th_seq) + 1, - TH_SYN|TH_ACK, 0, (*state)->src.mss, 0); + TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1, + NULL, NULL); + REASON_SET(reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); } else if (!(th->th_flags & TH_ACK) || (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || - (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) + (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) { + REASON_SET(reason, PFRES_SYNPROXY); return (PF_DROP); - else + } else if ((*state)->src_node != NULL && + pf_src_connlimit(state)) { + REASON_SET(reason, PFRES_SRCLIMIT); + return (PF_DROP); + } else (*state)->src.state = PF_TCPS_PROXY_DST; } if ((*state)->src.state == PF_TCPS_PROXY_DST) { @@ -3697,31 +4036,37 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, if (direction == (*state)->direction) { if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) || (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || - (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) + (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) { + REASON_SET(reason, PFRES_SYNPROXY); return (PF_DROP); + } (*state)->src.max_win = MAX(ntohs(th->th_win), 1); if ((*state)->dst.seqhi == 1) - (*state)->dst.seqhi = arc4random(); + (*state)->dst.seqhi = htonl(arc4random()); pf_send_tcp((*state)->rule.ptr, pd->af, &src->addr, &dst->addr, src->port, dst->port, (*state)->dst.seqhi, 0, TH_SYN, 0, - (*state)->src.mss, 0); + (*state)->src.mss, 0, 0, NULL, NULL); + REASON_SET(reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); } else if (((th->th_flags & (TH_SYN|TH_ACK)) != (TH_SYN|TH_ACK)) || - (ntohl(th->th_ack) != (*state)->dst.seqhi + 1)) + (ntohl(th->th_ack) != (*state)->dst.seqhi + 1)) { + REASON_SET(reason, PFRES_SYNPROXY); return (PF_DROP); - else { + } else { (*state)->dst.max_win = MAX(ntohs(th->th_win), 1); (*state)->dst.seqlo = ntohl(th->th_seq); pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, pd->src, th->th_dport, th->th_sport, ntohl(th->th_ack), ntohl(th->th_seq) + 1, - TH_ACK, (*state)->src.max_win, 0, 0); + TH_ACK, (*state)->src.max_win, 0, 0, 0, + NULL, NULL); pf_send_tcp((*state)->rule.ptr, pd->af, &src->addr, &dst->addr, src->port, dst->port, (*state)->src.seqhi + 1, (*state)->src.seqlo + 1, - TH_ACK, (*state)->dst.max_win, 0, 0); + TH_ACK, (*state)->dst.max_win, 0, 0, 1, + NULL, NULL); (*state)->src.seqdiff = (*state)->dst.seqhi - (*state)->src.seqlo; (*state)->dst.seqdiff = (*state)->src.seqhi - @@ -3733,6 +4078,7 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, (*state)->src.wscale = (*state)->dst.wscale = 0; (*state)->src.state = (*state)->dst.state = TCPS_ESTABLISHED; + REASON_SET(reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); } } @@ -3749,7 +4095,7 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, * tcp_filtering.ps */ - seq = ntohl(th->th_seq); + orig_seq = seq = ntohl(th->th_seq); if (src->seqlo == 0) { /* First packet from this end. Set its state */ @@ -3763,7 +4109,7 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, /* Deferred generation of sequence number modulator */ if (dst->seqdiff && !src->seqdiff) { - while ((src->seqdiff = arc4random()) == 0) + while ((src->seqdiff = htonl(arc4random())) == 0) ; ack = ntohl(th->th_ack) - dst->seqdiff; pf_change_a(&th->th_seq, &th->th_sum, htonl(seq + @@ -3859,8 +4205,17 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, /* Retrans: not more than one window back */ (ackskew >= -MAXACKWINDOW) && /* Acking not more than one reassembled fragment backwards */ - (ackskew <= (MAXACKWINDOW << sws))) { + (ackskew <= (MAXACKWINDOW << sws)) && /* Acking not more than one window forward */ + ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo || + (pd->flags & PFDESC_IP_REAS) == 0)) { + /* Require an exact sequence match on resets when possible */ + + if (dst->scrub || src->scrub) { + if (pf_normalize_tcp_stateful(m, off, pd, reason, th, + *state, src, dst, ©back)) + return (PF_DROP); + } /* update max window */ if (src->max_win < win) @@ -3881,16 +4236,22 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, if (src->state < TCPS_CLOSING) src->state = TCPS_CLOSING; if (th->th_flags & TH_ACK) { - if (dst->state == TCPS_SYN_SENT) + if (dst->state == TCPS_SYN_SENT) { dst->state = TCPS_ESTABLISHED; - else if (dst->state == TCPS_CLOSING) + if (src->state == TCPS_ESTABLISHED && + (*state)->src_node != NULL && + pf_src_connlimit(state)) { + REASON_SET(reason, PFRES_SRCLIMIT); + return (PF_DROP); + } + } else if (dst->state == TCPS_CLOSING) dst->state = TCPS_FIN_WAIT_2; } if (th->th_flags & TH_RST) src->state = dst->state = TCPS_TIME_WAIT; /* update expire time */ - (*state)->expire = time.tv_sec; + (*state)->expire = time_second; if (src->state >= TCPS_FIN_WAIT_2 && dst->state >= TCPS_FIN_WAIT_2) (*state)->timeout = PFTM_TCP_CLOSED; @@ -3946,6 +4307,12 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, (*state)->packets[0], (*state)->packets[1]); } + if (dst->scrub || src->scrub) { + if (pf_normalize_tcp_stateful(m, off, pd, reason, th, + *state, src, dst, ©back)) + return (PF_DROP); + } + /* update max window */ if (src->max_win < win) src->max_win = win; @@ -3973,19 +4340,13 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, if ((*state)->dst.state == TCPS_SYN_SENT && (*state)->src.state == TCPS_SYN_SENT) { /* Send RST for state mismatches during handshake */ - if (!(th->th_flags & TH_RST)) { - u_int32_t ack = ntohl(th->th_seq) + pd->p_len; - - if (th->th_flags & TH_SYN) - ack++; - if (th->th_flags & TH_FIN) - ack++; + if (!(th->th_flags & TH_RST)) pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, pd->src, th->th_dport, - th->th_sport, ntohl(th->th_ack), ack, - TH_RST|TH_ACK, 0, 0, - (*state)->rule.ptr->return_ttl); - } + th->th_sport, ntohl(th->th_ack), 0, + TH_RST, 0, 0, + (*state)->rule.ptr->return_ttl, 1, + pd->eh, kif->pfik_ifp); src->seqlo = 0; src->seqhi = 1; src->max_win = 1; @@ -4007,15 +4368,10 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) ?' ' :'5', SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6'); } + REASON_SET(reason, PFRES_BADSTATE); return (PF_DROP); } - if (dst->scrub || src->scrub) { - if (pf_normalize_tcp_stateful(m, off, pd, reason, th, - src, dst, ©back)) - return (PF_DROP); - } - /* Any packets which have gotten here are to be passed */ /* translate source/destination address, if necessary */ @@ -4076,7 +4432,7 @@ pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif, dst->state = PFUDPS_MULTIPLE; /* update expire time */ - (*state)->expire = time.tv_sec; + (*state)->expire = time_second; if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE) (*state)->timeout = PFTM_UDP_MULTIPLE; else @@ -4100,7 +4456,7 @@ pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif, int pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, - struct mbuf *m, int off, void *h, struct pf_pdesc *pd) + struct mbuf *m, int off, void *h, struct pf_pdesc *pd, u_short *reason) { struct pf_addr *saddr = pd->src, *daddr = pd->dst; u_int16_t icmpid, *icmpsum; @@ -4161,7 +4517,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, STATE_LOOKUP(); - (*state)->expire = time.tv_sec; + (*state)->expire = time_second; (*state)->timeout = PFTM_ICMP_ERROR_REPLY; /* translate source/destination address, if necessary */ @@ -4236,7 +4592,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, ipoff2 = off + ICMP_MINLEN; if (!pf_pull_hdr(m, ipoff2, &h2, sizeof(h2), - NULL, NULL, pd2.af)) { + NULL, reason, pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: ICMP error message too short " "(ip)\n")); @@ -4246,8 +4602,10 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, * ICMP error messages don't refer to non-first * fragments */ - if (h2.ip_off & htons(IP_OFFMASK)) + if (h2.ip_off & htons(IP_OFFMASK)) { + REASON_SET(reason, PFRES_FRAG); return (PF_DROP); + } /* offset of protocol header that follows h2 */ off2 = ipoff2 + (h2.ip_hl << 2); @@ -4263,7 +4621,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, ipoff2 = off + sizeof(struct icmp6_hdr); if (!pf_pull_hdr(m, ipoff2, &h2_6, sizeof(h2_6), - NULL, NULL, pd2.af)) { + NULL, reason, pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: ICMP error message too short " "(ip6)\n")); @@ -4281,6 +4639,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, * ICMPv6 error messages for * non-first fragments */ + REASON_SET(reason, PFRES_FRAG); return (PF_DROP); case IPPROTO_AH: case IPPROTO_HOPOPTS: @@ -4290,7 +4649,8 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, struct ip6_ext opt6; if (!pf_pull_hdr(m, off2, &opt6, - sizeof(opt6), NULL, NULL, pd2.af)) { + sizeof(opt6), NULL, reason, + pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: ICMPv6 short opt\n")); return (PF_DROP); @@ -4326,7 +4686,8 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, * expected. Don't access any TCP header fields after * th_seq, an ackskew test is not possible. */ - if (!pf_pull_hdr(m, off2, &th, 8, NULL, NULL, pd2.af)) { + if (!pf_pull_hdr(m, off2, &th, 8, NULL, reason, + pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: ICMP error message too short " "(tcp)\n")); @@ -4383,6 +4744,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, pf_print_state(*state); printf(" seq=%u\n", seq); } + REASON_SET(reason, PFRES_BADSTATE); return (PF_DROP); } @@ -4434,7 +4796,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, struct pf_state key; if (!pf_pull_hdr(m, off2, &uh, sizeof(uh), - NULL, NULL, pd2.af)) { + NULL, reason, pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: ICMP error message too short " "(udp)\n")); @@ -4501,7 +4863,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, struct pf_state key; if (!pf_pull_hdr(m, off2, &iih, ICMP_MINLEN, - NULL, NULL, pd2.af)) { + NULL, reason, pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: ICMP error message too short i" "(icmp)\n")); @@ -4553,7 +4915,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, struct pf_state key; if (!pf_pull_hdr(m, off2, &iih, - sizeof(struct icmp6_hdr), NULL, NULL, pd2.af)) { + sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: ICMP error message too short " "(icmp6)\n")); @@ -4699,7 +5061,7 @@ pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif, dst->state = PFOTHERS_MULTIPLE; /* update expire time */ - (*state)->expire = time.tv_sec; + (*state)->expire = time_second; if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE) (*state)->timeout = PFTM_OTHER_MULTIPLE; else @@ -4797,18 +5159,80 @@ int pf_routable(struct pf_addr *addr, sa_family_t af) { struct sockaddr_in *dst; +#ifdef INET6 + struct sockaddr_in6 *dst6; + struct route_in6 ro; +#else struct route ro; +#endif + + bzero(&ro, sizeof(ro)); + switch (af) { + case AF_INET: + dst = satosin(&ro.ro_dst); + dst->sin_family = AF_INET; + dst->sin_len = sizeof(*dst); + dst->sin_addr = addr->v4; + break; +#ifdef INET6 + case AF_INET6: + dst6 = (struct sockaddr_in6 *)&ro.ro_dst; + dst6->sin6_family = AF_INET6; + dst6->sin6_len = sizeof(*dst6); + dst6->sin6_addr = addr->v6; + break; +#endif /* INET6 */ + default: + return (0); + } + + rtalloc_noclone((struct route *)&ro, NO_CLONING); + + if (ro.ro_rt != NULL) { + RTFREE(ro.ro_rt); + return (1); + } + + return (0); +} + +int +pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw) +{ + struct sockaddr_in *dst; +#ifdef INET6 + struct sockaddr_in6 *dst6; + struct route_in6 ro; +#else + struct route ro; +#endif int ret = 0; bzero(&ro, sizeof(ro)); - dst = satosin(&ro.ro_dst); - dst->sin_family = af; - dst->sin_len = sizeof(*dst); - dst->sin_addr = addr->v4; - rtalloc_noclone(&ro, NO_CLONING); + switch (af) { + case AF_INET: + dst = satosin(&ro.ro_dst); + dst->sin_family = AF_INET; + dst->sin_len = sizeof(*dst); + dst->sin_addr = addr->v4; + break; +#ifdef INET6 + case AF_INET6: + dst6 = (struct sockaddr_in6 *)&ro.ro_dst; + dst6->sin6_family = AF_INET6; + dst6->sin6_len = sizeof(*dst6); + dst6->sin6_addr = addr->v6; + break; +#endif /* INET6 */ + default: + return (0); + } + + rtalloc_noclone((struct route *)&ro, NO_CLONING); if (ro.ro_rt != NULL) { - ret = 1; + if (ro.ro_rt->rt_labelid == aw->v.rtlabel) + ret = 1; RTFREE(ro.ro_rt); } @@ -4856,17 +5280,18 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, if (r->rt == PF_DUPTO) { if ((m0 = m_copym2(*m, 0, M_COPYALL, M_NOWAIT)) == NULL) return; - if ((mtag = m_tag_copy(mtag)) == NULL) - goto bad; - m_tag_prepend(m0, mtag); } else { if ((r->rt == PF_REPLYTO) == (r->direction == dir)) return; m0 = *m; } - if (m0->m_len < sizeof(struct ip)) - panic("pf_route: m0->m_len < sizeof(struct ip)"); + if (m0->m_len < sizeof(struct ip)) { + DPFPRINTF(PF_DEBUG_URGENT, + ("pf_route: m0->m_len < sizeof(struct ip)\n")); + goto bad; + } + ip = mtod(m0, struct ip *); ro = &iproute; @@ -4889,8 +5314,11 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, if (ro->ro_rt->rt_flags & RTF_GATEWAY) dst = satosin(ro->ro_rt->rt_gateway); } else { - if (TAILQ_EMPTY(&r->rpool.list)) - panic("pf_route: TAILQ_EMPTY(&r->rpool.list)"); + if (TAILQ_EMPTY(&r->rpool.list)) { + DPFPRINTF(PF_DEBUG_URGENT, + ("pf_route: TAILQ_EMPTY(&r->rpool.list)\n")); + goto bad; + } if (s == NULL) { pf_map_addr(AF_INET, r, (struct pf_addr *)&ip->ip_src, &naddr, NULL, &sn); @@ -4909,12 +5337,15 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, goto bad; if (oifp != ifp) { - if (pf_test(PF_OUT, ifp, &m0) != PF_PASS) + if (pf_test(PF_OUT, ifp, &m0, NULL) != PF_PASS) goto bad; else if (m0 == NULL) goto done; - if (m0->m_len < sizeof(struct ip)) - panic("pf_route: m0->m_len < sizeof(struct ip)"); + if (m0->m_len < sizeof(struct ip)) { + DPFPRINTF(PF_DEBUG_URGENT, + ("pf_route: m0->m_len < sizeof(struct ip)\n")); + goto bad; + } ip = mtod(m0, struct ip *); } @@ -5053,17 +5484,17 @@ pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, if (r->rt == PF_DUPTO) { if ((m0 = m_copym2(*m, 0, M_COPYALL, M_NOWAIT)) == NULL) return; - if ((mtag = m_tag_copy(mtag)) == NULL) - goto bad; - m_tag_prepend(m0, mtag); } else { if ((r->rt == PF_REPLYTO) == (r->direction == dir)) return; m0 = *m; } - if (m0->m_len < sizeof(struct ip6_hdr)) - panic("pf_route6: m0->m_len < sizeof(struct ip6_hdr)"); + if (m0->m_len < sizeof(struct ip6_hdr)) { + DPFPRINTF(PF_DEBUG_URGENT, + ("pf_route6: m0->m_len < sizeof(struct ip6_hdr)\n")); + goto bad; + } ip6 = mtod(m0, struct ip6_hdr *); ro = &ip6route; @@ -5083,8 +5514,11 @@ pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, return; } - if (TAILQ_EMPTY(&r->rpool.list)) - panic("pf_route6: TAILQ_EMPTY(&r->rpool.list)"); + if (TAILQ_EMPTY(&r->rpool.list)) { + DPFPRINTF(PF_DEBUG_URGENT, + ("pf_route6: TAILQ_EMPTY(&r->rpool.list)\n")); + goto bad; + } if (s == NULL) { pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src, &naddr, NULL, &sn); @@ -5102,12 +5536,15 @@ pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, goto bad; if (oifp != ifp) { - if (pf_test6(PF_OUT, ifp, &m0) != PF_PASS) + if (pf_test6(PF_OUT, ifp, &m0, NULL) != PF_PASS) goto bad; else if (m0 == NULL) goto done; - if (m0->m_len < sizeof(struct ip6_hdr)) - panic("pf_route6: m0->m_len < sizeof(struct ip6_hdr)"); + if (m0->m_len < sizeof(struct ip6_hdr)) { + DPFPRINTF(PF_DEBUG_URGENT, + ("pf_route6: m0->m_len < sizeof(struct ip6_hdr)\n")); + goto bad; + } ip6 = mtod(m0, struct ip6_hdr *); } @@ -5178,7 +5615,8 @@ pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, return (1); if (m->m_pkthdr.len < off + len) return (1); - switch (af) { + switch (af) { +#ifdef INET case AF_INET: if (p == IPPROTO_ICMP) { if (m->m_len < off) @@ -5194,6 +5632,7 @@ pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, sum = in4_cksum(m, p, off, len); } break; +#endif /* INET */ #ifdef INET6 case AF_INET6: if (m->m_len < sizeof(struct ip6_hdr)) @@ -5244,7 +5683,8 @@ pf_add_mbuf_tag(struct mbuf *m, u_int tag) #ifdef INET int -pf_test(int dir, struct ifnet *ifp, struct mbuf **m0) +pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, + struct ether_header *eh) { struct pfi_kif *kif; u_short action, reason = 0, log = 0; @@ -5260,14 +5700,22 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0) (m_tag_find(m, PACKET_TAG_PF_GENERATED, NULL) != NULL)) return (PF_PASS); + if (ifp->if_type == IFT_CARP && ifp->if_carpdev) + ifp = ifp->if_carpdev; + kif = pfi_index2kif[ifp->if_index]; - if (kif == NULL) + if (kif == NULL) { + DPFPRINTF(PF_DEBUG_URGENT, + ("pf_test: kif == NULL, if_xname %s\n", ifp->if_xname)); return (PF_DROP); + } + if (kif->pfik_flags & PFI_IFLAG_SKIP) + return (PF_PASS); #ifdef DIAGNOSTIC if ((m->m_flags & M_PKTHDR) == 0) panic("non-M_PKTHDR is passed to pf_test"); -#endif +#endif /* DIAGNOSTIC */ memset(&pd, 0, sizeof(pd)); if (m->m_pkthdr.len < (int)sizeof(*h)) { @@ -5278,7 +5726,7 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0) } /* We do IP header normalization and packet reassembly here */ - if (pf_normalize_ip(m0, dir, kif, &reason) != PF_PASS) { + if (pf_normalize_ip(m0, dir, kif, &reason, &pd) != PF_PASS) { action = PF_DROP; goto done; } @@ -5301,6 +5749,7 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0) pd.af = AF_INET; pd.tos = h->ip_tos; pd.tot_len = ntohs(h->ip_len); + pd.eh = eh; /* handle fragments that didn't get reassembled by normalization */ if (h->ip_off & htons(IP_MF | IP_OFFMASK)) { @@ -5336,13 +5785,13 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0) if (action == PF_PASS) { #if NPFSYNC pfsync_update_state(s); -#endif +#endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) action = pf_test_tcp(&r, &s, dir, kif, - m, off, h, &pd, &a, &ruleset); + m, off, h, &pd, &a, &ruleset, &ipintrq); break; } @@ -5370,13 +5819,13 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0) if (action == PF_PASS) { #if NPFSYNC pfsync_update_state(s); -#endif +#endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) action = pf_test_udp(&r, &s, dir, kif, - m, off, h, &pd, &a, &ruleset); + m, off, h, &pd, &a, &ruleset, &ipintrq); break; } @@ -5394,17 +5843,18 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0) action = PF_DROP; goto done; } - action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd); + action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd, + &reason); if (action == PF_PASS) { #if NPFSYNC pfsync_update_state(s); -#endif +#endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) action = pf_test_icmp(&r, &s, dir, kif, - m, off, h, &pd, &a, &ruleset); + m, off, h, &pd, &a, &ruleset, &ipintrq); break; } @@ -5413,13 +5863,13 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0) if (action == PF_PASS) { #if NPFSYNC pfsync_update_state(s); -#endif +#endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) action = pf_test_other(&r, &s, dir, kif, m, off, h, - &pd, &a, &ruleset); + &pd, &a, &ruleset, &ipintrq); break; } @@ -5427,12 +5877,15 @@ done: if (action == PF_PASS && h->ip_hl > 5 && !((s && s->allow_opts) || r->allow_opts)) { action = PF_DROP; - REASON_SET(&reason, PFRES_SHORT); + REASON_SET(&reason, PFRES_IPOPTIONS); log = 1; DPFPRINTF(PF_DEBUG_MISC, ("pf: dropping packet with ip options\n")); } + if (s && s->tag) + pf_tag_packet(m, pf_get_tag(m), s->tag); + #ifdef ALTQ if (action == PF_PASS && r->qid) { struct m_tag *mtag; @@ -5451,7 +5904,7 @@ done: m_tag_prepend(m, mtag); } } -#endif +#endif /* ALTQ */ /* * connections redirected to loopback should not match sockets @@ -5526,12 +5979,12 @@ done: pfr_update_stats(tr->src.addr.p.tbl, (s == NULL || s->direction == dir) ? pd.src : pd.dst, pd.af, pd.tot_len, dir == PF_OUT, r->action == PF_PASS, - tr->src.not); + tr->src.neg); if (tr->dst.addr.type == PF_ADDR_TABLE) pfr_update_stats(tr->dst.addr.p.tbl, (s == NULL || s->direction == dir) ? pd.dst : pd.src, pd.af, pd.tot_len, dir == PF_OUT, r->action == PF_PASS, - tr->dst.not); + tr->dst.neg); } @@ -5549,7 +6002,8 @@ done: #ifdef INET6 int -pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0) +pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, + struct ether_header *eh) { struct pfi_kif *kif; u_short action, reason = 0, log = 0; @@ -5565,14 +6019,22 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0) (m_tag_find(m, PACKET_TAG_PF_GENERATED, NULL) != NULL)) return (PF_PASS); + if (ifp->if_type == IFT_CARP && ifp->if_carpdev) + ifp = ifp->if_carpdev; + kif = pfi_index2kif[ifp->if_index]; - if (kif == NULL) + if (kif == NULL) { + DPFPRINTF(PF_DEBUG_URGENT, + ("pf_test6: kif == NULL, if_xname %s\n", ifp->if_xname)); return (PF_DROP); + } + if (kif->pfik_flags & PFI_IFLAG_SKIP) + return (PF_PASS); #ifdef DIAGNOSTIC if ((m->m_flags & M_PKTHDR) == 0) - panic("non-M_PKTHDR is passed to pf_test"); -#endif + panic("non-M_PKTHDR is passed to pf_test6"); +#endif /* DIAGNOSTIC */ memset(&pd, 0, sizeof(pd)); if (m->m_pkthdr.len < (int)sizeof(*h)) { @@ -5583,7 +6045,7 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0) } /* We do IP header normalization and packet reassembly here */ - if (pf_normalize_ip6(m0, dir, kif, &reason) != PF_PASS) { + if (pf_normalize_ip6(m0, dir, kif, &reason, &pd) != PF_PASS) { action = PF_DROP; goto done; } @@ -5597,6 +6059,7 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0) pd.af = AF_INET6; pd.tos = 0; pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr); + pd.eh = eh; off = ((caddr_t)h - m->m_data) + sizeof(struct ip6_hdr); pd.proto = h->ip6_nxt; @@ -5616,11 +6079,10 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0) struct ip6_ext opt6; if (!pf_pull_hdr(m, off, &opt6, sizeof(opt6), - NULL, NULL, pd.af)) { + NULL, &reason, pd.af)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: IPv6 short opt\n")); action = PF_DROP; - REASON_SET(&reason, PFRES_SHORT); log = 1; goto done; } @@ -5650,8 +6112,10 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0) goto done; } if (dir == PF_IN && pf_check_proto_cksum(m, off, - ntohs(h->ip6_plen), IPPROTO_TCP, AF_INET6)) { + ntohs(h->ip6_plen) - (off - sizeof(struct ip6_hdr)), + IPPROTO_TCP, AF_INET6)) { action = PF_DROP; + REASON_SET(&reason, PFRES_PROTCKSUM); goto done; } pd.p_len = pd.tot_len - off - (th.th_off << 2); @@ -5663,13 +6127,13 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0) if (action == PF_PASS) { #if NPFSYNC pfsync_update_state(s); -#endif +#endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) action = pf_test_tcp(&r, &s, dir, kif, - m, off, h, &pd, &a, &ruleset); + m, off, h, &pd, &a, &ruleset, &ip6intrq); break; } @@ -5683,8 +6147,10 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0) goto done; } if (dir == PF_IN && uh.uh_sum && pf_check_proto_cksum(m, - off, ntohs(h->ip6_plen), IPPROTO_UDP, AF_INET6)) { + off, ntohs(h->ip6_plen) - (off - sizeof(struct ip6_hdr)), + IPPROTO_UDP, AF_INET6)) { action = PF_DROP; + REASON_SET(&reason, PFRES_PROTCKSUM); goto done; } if (uh.uh_dport == 0 || @@ -5697,13 +6163,13 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0) if (action == PF_PASS) { #if NPFSYNC pfsync_update_state(s); -#endif +#endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) action = pf_test_udp(&r, &s, dir, kif, - m, off, h, &pd, &a, &ruleset); + m, off, h, &pd, &a, &ruleset, &ip6intrq); break; } @@ -5717,40 +6183,48 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0) goto done; } if (dir == PF_IN && pf_check_proto_cksum(m, off, - ntohs(h->ip6_plen), IPPROTO_ICMPV6, AF_INET6)) { + ntohs(h->ip6_plen) - (off - sizeof(struct ip6_hdr)), + IPPROTO_ICMPV6, AF_INET6)) { action = PF_DROP; + REASON_SET(&reason, PFRES_PROTCKSUM); goto done; } action = pf_test_state_icmp(&s, dir, kif, - m, off, h, &pd); + m, off, h, &pd, &reason); if (action == PF_PASS) { #if NPFSYNC pfsync_update_state(s); -#endif +#endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) action = pf_test_icmp(&r, &s, dir, kif, - m, off, h, &pd, &a, &ruleset); + m, off, h, &pd, &a, &ruleset, &ip6intrq); break; } default: action = pf_test_state_other(&s, dir, kif, &pd); if (action == PF_PASS) { +#if NPFSYNC + pfsync_update_state(s); +#endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) action = pf_test_other(&r, &s, dir, kif, m, off, h, - &pd, &a, &ruleset); + &pd, &a, &ruleset, &ip6intrq); break; } done: /* XXX handle IPv6 options, if not allowed. not implemented. */ + if (s && s->tag) + pf_tag_packet(m, pf_get_tag(m), s->tag); + #ifdef ALTQ if (action == PF_PASS && r->qid) { struct m_tag *mtag; @@ -5769,7 +6243,7 @@ done: m_tag_prepend(m, mtag); } } -#endif +#endif /* ALTQ */ if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP || pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL && @@ -5839,12 +6313,12 @@ done: pfr_update_stats(tr->src.addr.p.tbl, (s == NULL || s->direction == dir) ? pd.src : pd.dst, pd.af, pd.tot_len, dir == PF_OUT, r->action == PF_PASS, - tr->src.not); + tr->src.neg); if (tr->dst.addr.type == PF_ADDR_TABLE) pfr_update_stats(tr->dst.addr.p.tbl, (s == NULL || s->direction == dir) ? pd.dst : pd.src, pd.af, pd.tot_len, dir == PF_OUT, r->action == PF_PASS, - tr->dst.not); + tr->dst.neg); } @@ -5859,3 +6333,12 @@ done: return (action); } #endif /* INET6 */ + +int +pf_check_congestion(struct ifqueue *ifq) +{ + if (ifq->ifq_congestion) + return (1); + else + return (0); +} diff --git a/sys/contrib/pf/net/pf_if.c b/sys/contrib/pf/net/pf_if.c index a3d5f4175f01..26ce0ee731e6 100644 --- a/sys/contrib/pf/net/pf_if.c +++ b/sys/contrib/pf/net/pf_if.c @@ -1,5 +1,4 @@ -/* $OpenBSD: pf_if.c,v 1.11 2004/03/15 11:38:23 cedric Exp $ */ -/* add $OpenBSD: pf_if.c,v 1.19 2004/08/11 12:06:44 henning Exp $ */ +/* $OpenBSD: pf_if.c,v 1.23 2004/12/22 17:17:55 dhartmei Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -43,7 +42,6 @@ #include #include -#include #include #include @@ -77,10 +75,6 @@ long pfi_update = 1; struct pfr_addr *pfi_buffer; int pfi_buffer_cnt; int pfi_buffer_max; -char pfi_reserved_anchor[PF_ANCHOR_NAME_SIZE] = - PF_RESERVED_ANCHOR; -char pfi_interface_ruleset[PF_RULESET_NAME_SIZE] = - PF_INTERFACE_RULESET; void pfi_dynaddr_update(void *); void pfi_kifaddr_update(void *); @@ -91,7 +85,6 @@ void pfi_address_add(struct sockaddr *, int, int); int pfi_if_compare(struct pfi_kif *, struct pfi_kif *); struct pfi_kif *pfi_if_create(const char *, struct pfi_kif *, int); void pfi_copy_group(char *, const char *, int); -void pfi_dynamic_drivers(void); void pfi_newgroup(const char *, int); int pfi_skip_if(const char *, struct pfi_kif *, int); int pfi_unmask(void *); @@ -100,7 +93,6 @@ void pfi_dohooks(struct pfi_kif *); RB_PROTOTYPE(pfi_ifhead, pfi_kif, pfik_tree, pfi_if_compare); RB_GENERATE(pfi_ifhead, pfi_kif, pfik_tree, pfi_if_compare); -#define PFI_DYNAMIC_BUSES { "pcmcia", "cardbus", "uhub" } #define PFI_BUFFER_MAX 0x10000 #define PFI_MTYPE M_IFADDR @@ -117,7 +109,6 @@ pfi_initialize(void) pfi_buffer = malloc(pfi_buffer_max * sizeof(*pfi_buffer), PFI_MTYPE, M_WAITOK); pfi_self = pfi_if_create("self", NULL, PFI_IFLAG_GROUP); - pfi_dynamic_drivers(); } void @@ -230,7 +221,12 @@ pfi_lookup_create(const char *name) if (p == NULL) { pfi_copy_group(key.pfik_name, name, sizeof(key.pfik_name)); q = pfi_lookup_if(key.pfik_name); - if (q != NULL) + if (q == NULL) { + pfi_newgroup(key.pfik_name, PFI_IFLAG_DYNAMIC); + q = pfi_lookup_if(key.pfik_name); + } + p = pfi_lookup_if(name); + if (p == NULL && q != NULL) p = pfi_if_create(name, q, PFI_IFLAG_INSTANCE); } splx(s); @@ -316,8 +312,7 @@ pfi_dynaddr_setup(struct pf_addr_wrap *aw, sa_family_t af) if (dyn->pfid_net != 128) snprintf(tblname + strlen(tblname), sizeof(tblname) - strlen(tblname), "/%d", dyn->pfid_net); - ruleset = pf_find_or_create_ruleset(pfi_reserved_anchor, - pfi_interface_ruleset); + ruleset = pf_find_or_create_ruleset(PF_RESERVED_ANCHOR); if (ruleset == NULL) senderr(1); @@ -354,11 +349,14 @@ void pfi_dynaddr_update(void *p) { struct pfi_dynaddr *dyn = (struct pfi_dynaddr *)p; - struct pfi_kif *kif = dyn->pfid_kif; - struct pfr_ktable *kt = dyn->pfid_kt; + struct pfi_kif *kif; + struct pfr_ktable *kt; - if (dyn == NULL || kif == NULL || kt == NULL) + if (dyn == NULL || dyn->pfid_kif == NULL || dyn->pfid_kt == NULL) panic("pfi_dynaddr_update"); + + kif = dyn->pfid_kif; + kt = dyn->pfid_kt; if (kt->pfrkt_larg != pfi_update) { /* this table needs to be brought up-to-date */ pfi_table_update(kt, kif, dyn->pfid_net, dyn->pfid_iflags); @@ -412,8 +410,6 @@ pfi_instance_add(struct ifnet *ifp, int net, int flags) af = ia->ifa_addr->sa_family; if (af != AF_INET && af != AF_INET6) continue; - if (!(ia->ifa_flags & IFA_ROUTE)) - continue; if ((flags & PFI_AFLAG_BROADCAST) && af == AF_INET6) continue; if ((flags & PFI_AFLAG_BROADCAST) && @@ -434,14 +430,14 @@ pfi_instance_add(struct ifnet *ifp, int net, int flags) } if (af == AF_INET) got4 = 1; - else + else if (af == AF_INET6) got6 = 1; net2 = net; if (net2 == 128 && (flags & PFI_AFLAG_NETWORK)) { if (af == AF_INET) { net2 = pfi_unmask(&((struct sockaddr_in *) ia->ifa_netmask)->sin_addr); - } else { + } else if (af == AF_INET6) { net2 = pfi_unmask(&((struct sockaddr_in6 *) ia->ifa_netmask)->sin6_addr); } @@ -574,7 +570,7 @@ pfi_if_create(const char *name, struct pfi_kif *q, int flags) RB_INIT(&p->pfik_ext_gwy); p->pfik_flags = flags; p->pfik_parent = q; - p->pfik_tzero = time.tv_sec; + p->pfik_tzero = time_second; RB_INSERT(pfi_ifhead, &pfi_ifs, p); if (q != NULL) { @@ -628,46 +624,6 @@ pfi_copy_group(char *p, const char *q, int m) *p++ = '\0'; } -void -pfi_dynamic_drivers(void) -{ - char *buses[] = PFI_DYNAMIC_BUSES; - int nbuses = sizeof(buses)/sizeof(buses[0]); - int enabled[sizeof(buses)/sizeof(buses[0])]; - struct device *dev; - struct cfdata *cf; - struct cfdriver *drv; - short *p; - int i; - - bzero(enabled, sizeof(enabled)); - TAILQ_FOREACH(dev, &alldevs, dv_list) { - if (!(dev->dv_flags & DVF_ACTIVE)) - continue; - for (i = 0; i < nbuses; i++) - if (!enabled[i] && !strcmp(buses[i], - dev->dv_cfdata->cf_driver->cd_name)) - enabled[i] = 1; - } - for (cf = cfdata; cf->cf_driver; cf++) { - if (cf->cf_driver->cd_class != DV_IFNET) - continue; - for (p = cf->cf_parents; p && *p >= 0; p++) { - if ((drv = cfdata[*p].cf_driver) == NULL) - continue; - for (i = 0; i < nbuses; i++) - if (enabled[i] && - !strcmp(drv->cd_name, buses[i])) - break; - if (i < nbuses) { - pfi_newgroup(cf->cf_driver->cd_name, - PFI_IFLAG_DYNAMIC); - break; - } - } - } -} - void pfi_newgroup(const char *name, int flags) { @@ -714,10 +670,10 @@ pfi_clr_istats(const char *name, int *nzero, int flags) { struct pfi_kif *p; int n = 0, s; - long tzero = time.tv_sec; + long tzero = time_second; - s = splsoftnet(); ACCEPT_FLAGS(PFI_FLAG_GROUP|PFI_FLAG_INSTANCE); + s = splsoftnet(); RB_FOREACH(p, pfi_ifhead, &pfi_ifs) { if (pfi_skip_if(name, p, flags)) continue; @@ -732,6 +688,44 @@ pfi_clr_istats(const char *name, int *nzero, int flags) return (0); } +int +pfi_set_flags(const char *name, int flags) +{ + struct pfi_kif *p; + int s; + + if (flags & ~PFI_IFLAG_SETABLE_MASK) + return (EINVAL); + + s = splsoftnet(); + RB_FOREACH(p, pfi_ifhead, &pfi_ifs) { + if (pfi_skip_if(name, p, PFI_FLAG_GROUP|PFI_FLAG_INSTANCE)) + continue; + p->pfik_flags |= flags; + } + splx(s); + return (0); +} + +int +pfi_clear_flags(const char *name, int flags) +{ + struct pfi_kif *p; + int s; + + if (flags & ~PFI_IFLAG_SETABLE_MASK) + return (EINVAL); + + s = splsoftnet(); + RB_FOREACH(p, pfi_ifhead, &pfi_ifs) { + if (pfi_skip_if(name, p, PFI_FLAG_GROUP|PFI_FLAG_INSTANCE)) + continue; + p->pfik_flags &= ~flags; + } + splx(s); + return (0); +} + int pfi_get_ifaces(const char *name, struct pfi_if *buf, int *size, int flags) { @@ -745,7 +739,7 @@ pfi_get_ifaces(const char *name, struct pfi_if *buf, int *size, int flags) continue; if (*size > n++) { if (!p->pfik_tzero) - p->pfik_tzero = boottime.tv_sec; + p->pfik_tzero = time_second; if (copyout(p, buf++, sizeof(*buf))) { splx(s); return (EFAULT); @@ -820,7 +814,9 @@ pfi_dohooks(struct pfi_kif *p) int pfi_match_addr(struct pfi_dynaddr *dyn, struct pf_addr *a, sa_family_t af) { - if (af == AF_INET) { + switch (af) { +#ifdef INET + case AF_INET: switch (dyn->pfid_acnt4) { case 0: return (0); @@ -830,7 +826,10 @@ pfi_match_addr(struct pfi_dynaddr *dyn, struct pf_addr *a, sa_family_t af) default: return (pfr_match_addr(dyn->pfid_kt, a, AF_INET)); } - } else { + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: switch (dyn->pfid_acnt6) { case 0: return (0); @@ -840,5 +839,9 @@ pfi_match_addr(struct pfi_dynaddr *dyn, struct pf_addr *a, sa_family_t af) default: return (pfr_match_addr(dyn->pfid_kt, a, AF_INET6)); } + break; +#endif /* INET6 */ + default: + return (0); } } diff --git a/sys/contrib/pf/net/pf_ioctl.c b/sys/contrib/pf/net/pf_ioctl.c index f6b7ee967dd9..f73c67b852fc 100644 --- a/sys/contrib/pf/net/pf_ioctl.c +++ b/sys/contrib/pf/net/pf_ioctl.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pf_ioctl.c,v 1.112.2.2 2004/07/24 18:28:12 brad Exp $ */ +/* $OpenBSD: pf_ioctl.c,v 1.139 2005/03/03 07:13:39 dhartmei Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -80,10 +80,16 @@ void pfattach(int); int pfopen(dev_t, int, int, struct proc *); int pfclose(dev_t, int, int, struct proc *); -struct pf_pool *pf_get_pool(char *, char *, u_int32_t, - u_int8_t, u_int32_t, u_int8_t, u_int8_t, u_int8_t); +struct pf_pool *pf_get_pool(char *, u_int32_t, u_int8_t, u_int32_t, + u_int8_t, u_int8_t, u_int8_t); int pf_get_ruleset_number(u_int8_t); void pf_init_ruleset(struct pf_ruleset *); +int pf_anchor_setup(struct pf_rule *, + const struct pf_ruleset *, const char *); +int pf_anchor_copyout(const struct pf_ruleset *, + const struct pf_rule *, struct pfioc_rule *); +void pf_anchor_remove(struct pf_rule *); + void pf_mv_pool(struct pf_palist *, struct pf_palist *); void pf_empty_pool(struct pf_palist *); int pfioctl(dev_t, u_long, caddr_t, int, struct proc *); @@ -91,14 +97,19 @@ int pfioctl(dev_t, u_long, caddr_t, int, struct proc *); int pf_begin_altq(u_int32_t *); int pf_rollback_altq(u_int32_t); int pf_commit_altq(u_int32_t); +int pf_enable_altq(struct pf_altq *); +int pf_disable_altq(struct pf_altq *); #endif /* ALTQ */ -int pf_begin_rules(u_int32_t *, int, char *, char *); -int pf_rollback_rules(u_int32_t, int, char *, char *); -int pf_commit_rules(u_int32_t, int, char *, char *); +int pf_begin_rules(u_int32_t *, int, const char *); +int pf_rollback_rules(u_int32_t, int, char *); +int pf_commit_rules(u_int32_t, int, char *); extern struct timeout pf_expire_to; struct pf_rule pf_default_rule; +#ifdef ALTQ +static int pf_altq_running; +#endif #define TAGID_MAX 50000 TAILQ_HEAD(pf_tags, pf_tagname) pf_tags = TAILQ_HEAD_INITIALIZER(pf_tags), @@ -110,6 +121,9 @@ TAILQ_HEAD(pf_tags, pf_tagname) pf_tags = TAILQ_HEAD_INITIALIZER(pf_tags), static u_int16_t tagname2tag(struct pf_tags *, char *); static void tag2tagname(struct pf_tags *, u_int16_t, char *); static void tag_unref(struct pf_tags *, u_int16_t); +int pf_rtlabel_add(struct pf_addr_wrap *); +void pf_rtlabel_remove(struct pf_addr_wrap *); +void pf_rtlabel_copyout(struct pf_addr_wrap *); #define DPFPRINTF(n, x) if (pf_status.debug >= (n)) printf x @@ -125,9 +139,9 @@ pfattach(int num) pool_init(&pf_state_pl, sizeof(struct pf_state), 0, 0, 0, "pfstatepl", NULL); pool_init(&pf_altq_pl, sizeof(struct pf_altq), 0, 0, 0, "pfaltqpl", - NULL); + &pool_allocator_nointr); pool_init(&pf_pooladdr_pl, sizeof(struct pf_pooladdr), 0, 0, 0, - "pfpooladdrpl", NULL); + "pfpooladdrpl", &pool_allocator_nointr); pfr_initialize(); pfi_initialize(); pf_osfp_initialize(); @@ -136,7 +150,7 @@ pfattach(int num) pf_pool_limits[PF_LIMIT_STATES].limit, NULL, 0); RB_INIT(&tree_src_tracking); - TAILQ_INIT(&pf_anchors); + RB_INIT(&pf_anchors); pf_init_ruleset(&pf_main_ruleset); TAILQ_INIT(&pf_altqs[0]); TAILQ_INIT(&pf_altqs[1]); @@ -151,23 +165,24 @@ pfattach(int num) pf_default_rule.nr = -1; /* initialize default timeouts */ - timeout[PFTM_TCP_FIRST_PACKET] = 120; /* First TCP packet */ - timeout[PFTM_TCP_OPENING] = 30; /* No response yet */ - timeout[PFTM_TCP_ESTABLISHED] = 24*60*60; /* Established */ - timeout[PFTM_TCP_CLOSING] = 15 * 60; /* Half closed */ - timeout[PFTM_TCP_FIN_WAIT] = 45; /* Got both FINs */ - timeout[PFTM_TCP_CLOSED] = 90; /* Got a RST */ - timeout[PFTM_UDP_FIRST_PACKET] = 60; /* First UDP packet */ - timeout[PFTM_UDP_SINGLE] = 30; /* Unidirectional */ - timeout[PFTM_UDP_MULTIPLE] = 60; /* Bidirectional */ - timeout[PFTM_ICMP_FIRST_PACKET] = 20; /* First ICMP packet */ - timeout[PFTM_ICMP_ERROR_REPLY] = 10; /* Got error response */ - timeout[PFTM_OTHER_FIRST_PACKET] = 60; /* First packet */ - timeout[PFTM_OTHER_SINGLE] = 30; /* Unidirectional */ - timeout[PFTM_OTHER_MULTIPLE] = 60; /* Bidirectional */ - timeout[PFTM_FRAG] = 30; /* Fragment expire */ - timeout[PFTM_INTERVAL] = 10; /* Expire interval */ - timeout[PFTM_SRC_NODE] = 0; /* Source tracking */ + timeout[PFTM_TCP_FIRST_PACKET] = PFTM_TCP_FIRST_PACKET_VAL; + timeout[PFTM_TCP_OPENING] = PFTM_TCP_OPENING_VAL; + timeout[PFTM_TCP_ESTABLISHED] = PFTM_TCP_ESTABLISHED_VAL; + timeout[PFTM_TCP_CLOSING] = PFTM_TCP_CLOSING_VAL; + timeout[PFTM_TCP_FIN_WAIT] = PFTM_TCP_FIN_WAIT_VAL; + timeout[PFTM_TCP_CLOSED] = PFTM_TCP_CLOSED_VAL; + timeout[PFTM_UDP_FIRST_PACKET] = PFTM_UDP_FIRST_PACKET_VAL; + timeout[PFTM_UDP_SINGLE] = PFTM_UDP_SINGLE_VAL; + timeout[PFTM_UDP_MULTIPLE] = PFTM_UDP_MULTIPLE_VAL; + timeout[PFTM_ICMP_FIRST_PACKET] = PFTM_ICMP_FIRST_PACKET_VAL; + timeout[PFTM_ICMP_ERROR_REPLY] = PFTM_ICMP_ERROR_REPLY_VAL; + timeout[PFTM_OTHER_FIRST_PACKET] = PFTM_OTHER_FIRST_PACKET_VAL; + timeout[PFTM_OTHER_SINGLE] = PFTM_OTHER_SINGLE_VAL; + timeout[PFTM_OTHER_MULTIPLE] = PFTM_OTHER_MULTIPLE_VAL; + timeout[PFTM_FRAG] = PFTM_FRAG_VAL; + timeout[PFTM_INTERVAL] = PFTM_INTERVAL_VAL; + timeout[PFTM_SRC_NODE] = PFTM_SRC_NODE_VAL; + timeout[PFTM_TS_DIFF] = PFTM_TS_DIFF_VAL; timeout_set(&pf_expire_to, pf_purge_timeout, &pf_expire_to); timeout_add(&pf_expire_to, timeout[PFTM_INTERVAL] * hz); @@ -197,15 +212,15 @@ pfclose(dev_t dev, int flags, int fmt, struct proc *p) } struct pf_pool * -pf_get_pool(char *anchorname, char *rulesetname, u_int32_t ticket, - u_int8_t rule_action, u_int32_t rule_number, u_int8_t r_last, - u_int8_t active, u_int8_t check_ticket) +pf_get_pool(char *anchor, u_int32_t ticket, u_int8_t rule_action, + u_int32_t rule_number, u_int8_t r_last, u_int8_t active, + u_int8_t check_ticket) { struct pf_ruleset *ruleset; struct pf_rule *rule; int rs_num; - ruleset = pf_find_ruleset(anchorname, rulesetname); + ruleset = pf_find_ruleset(anchor); if (ruleset == NULL) return (NULL); rs_num = pf_get_ruleset_number(rule_action); @@ -245,6 +260,7 @@ pf_get_ruleset_number(u_int8_t action) { switch (action) { case PF_SCRUB: + case PF_NOSCRUB: return (PF_RULESET_SCRUB); break; case PF_PASS: @@ -284,120 +300,247 @@ pf_init_ruleset(struct pf_ruleset *ruleset) } struct pf_anchor * -pf_find_anchor(const char *anchorname) +pf_find_anchor(const char *path) { - struct pf_anchor *anchor; - int n = -1; + static struct pf_anchor key; - anchor = TAILQ_FIRST(&pf_anchors); - while (anchor != NULL && (n = strcmp(anchor->name, anchorname)) < 0) - anchor = TAILQ_NEXT(anchor, entries); - if (n == 0) - return (anchor); - else - return (NULL); + memset(&key, 0, sizeof(key)); + strlcpy(key.path, path, sizeof(key.path)); + return (RB_FIND(pf_anchor_global, &pf_anchors, &key)); } struct pf_ruleset * -pf_find_ruleset(char *anchorname, char *rulesetname) +pf_find_ruleset(const char *path) { struct pf_anchor *anchor; - struct pf_ruleset *ruleset; - if (!anchorname[0] && !rulesetname[0]) + while (*path == '/') + path++; + if (!*path) return (&pf_main_ruleset); - if (!anchorname[0] || !rulesetname[0]) - return (NULL); - anchorname[PF_ANCHOR_NAME_SIZE-1] = 0; - rulesetname[PF_RULESET_NAME_SIZE-1] = 0; - anchor = pf_find_anchor(anchorname); + anchor = pf_find_anchor(path); if (anchor == NULL) return (NULL); - ruleset = TAILQ_FIRST(&anchor->rulesets); - while (ruleset != NULL && strcmp(ruleset->name, rulesetname) < 0) - ruleset = TAILQ_NEXT(ruleset, entries); - if (ruleset != NULL && !strcmp(ruleset->name, rulesetname)) - return (ruleset); else - return (NULL); + return (&anchor->ruleset); } struct pf_ruleset * -pf_find_or_create_ruleset(char anchorname[PF_ANCHOR_NAME_SIZE], - char rulesetname[PF_RULESET_NAME_SIZE]) +pf_find_or_create_ruleset(const char *path) { - struct pf_anchor *anchor, *a; - struct pf_ruleset *ruleset, *r; + static char p[MAXPATHLEN]; + char *q, *r; + struct pf_ruleset *ruleset; + struct pf_anchor *anchor, *dup, *parent = NULL; - if (!anchorname[0] && !rulesetname[0]) - return (&pf_main_ruleset); - if (!anchorname[0] || !rulesetname[0]) + while (*path == '/') + path++; + ruleset = pf_find_ruleset(path); + if (ruleset != NULL) + return (ruleset); + strlcpy(p, path, sizeof(p)); + while (parent == NULL && (q = strrchr(p, '/')) != NULL) { + *q = 0; + if ((ruleset = pf_find_ruleset(p)) != NULL) { + parent = ruleset->anchor; + break; + } + } + if (q == NULL) + q = p; + else + q++; + strlcpy(p, path, sizeof(p)); + if (!*q) return (NULL); - anchorname[PF_ANCHOR_NAME_SIZE-1] = 0; - rulesetname[PF_RULESET_NAME_SIZE-1] = 0; - a = TAILQ_FIRST(&pf_anchors); - while (a != NULL && strcmp(a->name, anchorname) < 0) - a = TAILQ_NEXT(a, entries); - if (a != NULL && !strcmp(a->name, anchorname)) - anchor = a; - else { - anchor = (struct pf_anchor *)malloc(sizeof(struct pf_anchor), - M_TEMP, M_NOWAIT); + while ((r = strchr(q, '/')) != NULL || *q) { + if (r != NULL) + *r = 0; + if (!*q || strlen(q) >= PF_ANCHOR_NAME_SIZE || + (parent != NULL && strlen(parent->path) >= + MAXPATHLEN - PF_ANCHOR_NAME_SIZE - 1)) + return (NULL); + anchor = (struct pf_anchor *)malloc(sizeof(*anchor), M_TEMP, + M_NOWAIT); if (anchor == NULL) return (NULL); - memset(anchor, 0, sizeof(struct pf_anchor)); - bcopy(anchorname, anchor->name, sizeof(anchor->name)); - TAILQ_INIT(&anchor->rulesets); - if (a != NULL) - TAILQ_INSERT_BEFORE(a, anchor, entries); - else - TAILQ_INSERT_TAIL(&pf_anchors, anchor, entries); - } - r = TAILQ_FIRST(&anchor->rulesets); - while (r != NULL && strcmp(r->name, rulesetname) < 0) - r = TAILQ_NEXT(r, entries); - if (r != NULL && !strcmp(r->name, rulesetname)) - return (r); - ruleset = (struct pf_ruleset *)malloc(sizeof(struct pf_ruleset), - M_TEMP, M_NOWAIT); - if (ruleset != NULL) { - pf_init_ruleset(ruleset); - bcopy(rulesetname, ruleset->name, sizeof(ruleset->name)); - ruleset->anchor = anchor; + memset(anchor, 0, sizeof(*anchor)); + RB_INIT(&anchor->children); + strlcpy(anchor->name, q, sizeof(anchor->name)); + if (parent != NULL) { + strlcpy(anchor->path, parent->path, + sizeof(anchor->path)); + strlcat(anchor->path, "/", sizeof(anchor->path)); + } + strlcat(anchor->path, anchor->name, sizeof(anchor->path)); + if ((dup = RB_INSERT(pf_anchor_global, &pf_anchors, anchor)) != + NULL) { + printf("pf_find_or_create_ruleset: RB_INSERT1 " + "'%s' '%s' collides with '%s' '%s'\n", + anchor->path, anchor->name, dup->path, dup->name); + free(anchor, M_TEMP); + return (NULL); + } + if (parent != NULL) { + anchor->parent = parent; + if ((dup = RB_INSERT(pf_anchor_node, &parent->children, + anchor)) != NULL) { + printf("pf_find_or_create_ruleset: " + "RB_INSERT2 '%s' '%s' collides with " + "'%s' '%s'\n", anchor->path, anchor->name, + dup->path, dup->name); + RB_REMOVE(pf_anchor_global, &pf_anchors, + anchor); + free(anchor, M_TEMP); + return (NULL); + } + } + pf_init_ruleset(&anchor->ruleset); + anchor->ruleset.anchor = anchor; + parent = anchor; if (r != NULL) - TAILQ_INSERT_BEFORE(r, ruleset, entries); + q = r + 1; else - TAILQ_INSERT_TAIL(&anchor->rulesets, ruleset, entries); + *q = 0; } - return (ruleset); + return (&anchor->ruleset); } void pf_remove_if_empty_ruleset(struct pf_ruleset *ruleset) { - struct pf_anchor *anchor; + struct pf_anchor *parent; int i; - if (ruleset == NULL || ruleset->anchor == NULL || ruleset->tables > 0 || - ruleset->topen) - return; - for (i = 0; i < PF_RULESET_MAX; ++i) - if (!TAILQ_EMPTY(ruleset->rules[i].active.ptr) || - !TAILQ_EMPTY(ruleset->rules[i].inactive.ptr) || - ruleset->rules[i].inactive.open) + while (ruleset != NULL) { + if (ruleset == &pf_main_ruleset || ruleset->anchor == NULL || + !RB_EMPTY(&ruleset->anchor->children) || + ruleset->anchor->refcnt > 0 || ruleset->tables > 0 || + ruleset->topen) return; - - anchor = ruleset->anchor; - TAILQ_REMOVE(&anchor->rulesets, ruleset, entries); - free(ruleset, M_TEMP); - - if (TAILQ_EMPTY(&anchor->rulesets)) { - TAILQ_REMOVE(&pf_anchors, anchor, entries); - free(anchor, M_TEMP); - pf_update_anchor_rules(); + for (i = 0; i < PF_RULESET_MAX; ++i) + if (!TAILQ_EMPTY(ruleset->rules[i].active.ptr) || + !TAILQ_EMPTY(ruleset->rules[i].inactive.ptr) || + ruleset->rules[i].inactive.open) + return; + RB_REMOVE(pf_anchor_global, &pf_anchors, ruleset->anchor); + if ((parent = ruleset->anchor->parent) != NULL) + RB_REMOVE(pf_anchor_node, &parent->children, + ruleset->anchor); + free(ruleset->anchor, M_TEMP); + if (parent == NULL) + return; + ruleset = &parent->ruleset; } } +int +pf_anchor_setup(struct pf_rule *r, const struct pf_ruleset *s, + const char *name) +{ + static char *p, path[MAXPATHLEN]; + struct pf_ruleset *ruleset; + + r->anchor = NULL; + r->anchor_relative = 0; + r->anchor_wildcard = 0; + if (!name[0]) + return (0); + if (name[0] == '/') + strlcpy(path, name + 1, sizeof(path)); + else { + /* relative path */ + r->anchor_relative = 1; + if (s->anchor == NULL || !s->anchor->path[0]) + path[0] = 0; + else + strlcpy(path, s->anchor->path, sizeof(path)); + while (name[0] == '.' && name[1] == '.' && name[2] == '/') { + if (!path[0]) { + printf("pf_anchor_setup: .. beyond root\n"); + return (1); + } + if ((p = strrchr(path, '/')) != NULL) + *p = 0; + else + path[0] = 0; + r->anchor_relative++; + name += 3; + } + if (path[0]) + strlcat(path, "/", sizeof(path)); + strlcat(path, name, sizeof(path)); + } + if ((p = strrchr(path, '/')) != NULL && !strcmp(p, "/*")) { + r->anchor_wildcard = 1; + *p = 0; + } + ruleset = pf_find_or_create_ruleset(path); + if (ruleset == NULL || ruleset->anchor == NULL) { + printf("pf_anchor_setup: ruleset\n"); + return (1); + } + r->anchor = ruleset->anchor; + r->anchor->refcnt++; + return (0); +} + +int +pf_anchor_copyout(const struct pf_ruleset *rs, const struct pf_rule *r, + struct pfioc_rule *pr) +{ + pr->anchor_call[0] = 0; + if (r->anchor == NULL) + return (0); + if (!r->anchor_relative) { + strlcpy(pr->anchor_call, "/", sizeof(pr->anchor_call)); + strlcat(pr->anchor_call, r->anchor->path, + sizeof(pr->anchor_call)); + } else { + char a[MAXPATHLEN], b[MAXPATHLEN], *p; + int i; + + if (rs->anchor == NULL) + a[0] = 0; + else + strlcpy(a, rs->anchor->path, sizeof(a)); + strlcpy(b, r->anchor->path, sizeof(b)); + for (i = 1; i < r->anchor_relative; ++i) { + if ((p = strrchr(a, '/')) == NULL) + p = a; + *p = 0; + strlcat(pr->anchor_call, "../", + sizeof(pr->anchor_call)); + } + if (strncmp(a, b, strlen(a))) { + printf("pf_anchor_copyout: '%s' '%s'\n", a, b); + return (1); + } + if (strlen(b) > strlen(a)) + strlcat(pr->anchor_call, b + (a[0] ? strlen(a) + 1 : 0), + sizeof(pr->anchor_call)); + } + if (r->anchor_wildcard) + strlcat(pr->anchor_call, pr->anchor_call[0] ? "/*" : "*", + sizeof(pr->anchor_call)); + return (0); +} + +void +pf_anchor_remove(struct pf_rule *r) +{ + if (r->anchor == NULL) + return; + if (r->anchor->refcnt <= 0) { + printf("pf_anchor_remove: broken refcount"); + r->anchor = NULL; + return; + } + if (!--r->anchor->refcnt) + pf_remove_if_empty_ruleset(&r->anchor->ruleset); + r->anchor = NULL; +} + void pf_mv_pool(struct pf_palist *poola, struct pf_palist *poolb) { @@ -435,6 +578,8 @@ pf_rm_rule(struct pf_rulequeue *rulequeue, struct pf_rule *rule) */ pf_tbladdr_remove(&rule->src.addr); pf_tbladdr_remove(&rule->dst.addr); + if (rule->overload_tbl) + pfr_detach_table(rule->overload_tbl); } TAILQ_REMOVE(rulequeue, rule, entries); rule->entries.tqe_prev = NULL; @@ -451,13 +596,18 @@ pf_rm_rule(struct pf_rulequeue *rulequeue, struct pf_rule *rule) pf_qid_unref(rule->pqid); pf_qid_unref(rule->qid); #endif + pf_rtlabel_remove(&rule->src.addr); + pf_rtlabel_remove(&rule->dst.addr); pfi_dynaddr_remove(&rule->src.addr); pfi_dynaddr_remove(&rule->dst.addr); if (rulequeue == NULL) { pf_tbladdr_remove(&rule->src.addr); pf_tbladdr_remove(&rule->dst.addr); + if (rule->overload_tbl) + pfr_detach_table(rule->overload_tbl); } pfi_detach_rule(rule->kif); + pf_anchor_remove(rule); pf_empty_pool(&rule->rpool.list); pool_put(&pf_rule_pl, rule); } @@ -551,12 +701,55 @@ pf_tag2tagname(u_int16_t tagid, char *p) return (tag2tagname(&pf_tags, tagid, p)); } +void +pf_tag_ref(u_int16_t tag) +{ + struct pf_tagname *t; + + TAILQ_FOREACH(t, &pf_tags, entries) + if (t->tag == tag) + break; + if (t != NULL) + t->ref++; +} + void pf_tag_unref(u_int16_t tag) { return (tag_unref(&pf_tags, tag)); } +int +pf_rtlabel_add(struct pf_addr_wrap *a) +{ + if (a->type == PF_ADDR_RTLABEL && + (a->v.rtlabel = rtlabel_name2id(a->v.rtlabelname)) == 0) + return (-1); + return (0); +} + +void +pf_rtlabel_remove(struct pf_addr_wrap *a) +{ + if (a->type == PF_ADDR_RTLABEL) + rtlabel_unref(a->v.rtlabel); +} + +void +pf_rtlabel_copyout(struct pf_addr_wrap *a) +{ + const char *name; + + if (a->type == PF_ADDR_RTLABEL && a->v.rtlabel) { + if ((name = rtlabel_id2name(a->v.rtlabel)) == NULL) + strlcpy(a->v.rtlabelname, "?", + sizeof(a->v.rtlabelname)); + else + strlcpy(a->v.rtlabelname, name, + sizeof(a->v.rtlabelname)); + } +} + #ifdef ALTQ u_int32_t pf_qname2qid(char *qname) @@ -643,7 +836,9 @@ pf_commit_altq(u_int32_t ticket) if (altq->qname[0] == 0) { /* attach the discipline */ error = altq_pfattach(altq); - if (error) { + if (error == 0 && pf_altq_running) + error = pf_enable_altq(altq); + if (error != 0) { splx(s); return (error); } @@ -655,6 +850,8 @@ pf_commit_altq(u_int32_t ticket) TAILQ_REMOVE(pf_altqs_inactive, altq, entries); if (altq->qname[0] == 0) { /* detach and destroy the discipline */ + if (pf_altq_running) + error = pf_disable_altq(altq); err = altq_pfdetach(altq); if (err != 0 && error == 0) error = err; @@ -670,17 +867,72 @@ pf_commit_altq(u_int32_t ticket) altqs_inactive_open = 0; return (error); } + +int +pf_enable_altq(struct pf_altq *altq) +{ + struct ifnet *ifp; + struct tb_profile tb; + int s, error = 0; + + if ((ifp = ifunit(altq->ifname)) == NULL) + return (EINVAL); + + if (ifp->if_snd.altq_type != ALTQT_NONE) + error = altq_enable(&ifp->if_snd); + + /* set tokenbucket regulator */ + if (error == 0 && ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) { + tb.rate = altq->ifbandwidth; + tb.depth = altq->tbrsize; + s = splimp(); + error = tbr_set(&ifp->if_snd, &tb); + splx(s); + } + + return (error); +} + +int +pf_disable_altq(struct pf_altq *altq) +{ + struct ifnet *ifp; + struct tb_profile tb; + int s, error; + + if ((ifp = ifunit(altq->ifname)) == NULL) + return (EINVAL); + + /* + * when the discipline is no longer referenced, it was overridden + * by a new one. if so, just return. + */ + if (altq->altq_disc != ifp->if_snd.altq_disc) + return (0); + + error = altq_disable(&ifp->if_snd); + + if (error == 0) { + /* clear tokenbucket regulator */ + tb.rate = 0; + s = splimp(); + error = tbr_set(&ifp->if_snd, &tb); + splx(s); + } + + return (error); +} #endif /* ALTQ */ int -pf_begin_rules(u_int32_t *ticket, int rs_num, char *anchor, char *ruleset) +pf_begin_rules(u_int32_t *ticket, int rs_num, const char *anchor) { struct pf_ruleset *rs; struct pf_rule *rule; if (rs_num < 0 || rs_num >= PF_RULESET_MAX) return (EINVAL); - rs = pf_find_or_create_ruleset(anchor, ruleset); + rs = pf_find_or_create_ruleset(anchor); if (rs == NULL) return (EINVAL); while ((rule = TAILQ_FIRST(rs->rules[rs_num].inactive.ptr)) != NULL) @@ -691,14 +943,14 @@ pf_begin_rules(u_int32_t *ticket, int rs_num, char *anchor, char *ruleset) } int -pf_rollback_rules(u_int32_t ticket, int rs_num, char *anchor, char *ruleset) +pf_rollback_rules(u_int32_t ticket, int rs_num, char *anchor) { struct pf_ruleset *rs; struct pf_rule *rule; if (rs_num < 0 || rs_num >= PF_RULESET_MAX) return (EINVAL); - rs = pf_find_ruleset(anchor, ruleset); + rs = pf_find_ruleset(anchor); if (rs == NULL || !rs->rules[rs_num].inactive.open || rs->rules[rs_num].inactive.ticket != ticket) return (0); @@ -709,7 +961,7 @@ pf_rollback_rules(u_int32_t ticket, int rs_num, char *anchor, char *ruleset) } int -pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor, char *ruleset) +pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor) { struct pf_ruleset *rs; struct pf_rule *rule; @@ -718,7 +970,7 @@ pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor, char *ruleset) if (rs_num < 0 || rs_num >= PF_RULESET_MAX) return (EINVAL); - rs = pf_find_ruleset(anchor, ruleset); + rs = pf_find_ruleset(anchor); if (rs == NULL || !rs->rules[rs_num].inactive.open || ticket != rs->rules[rs_num].inactive.ticket) return (EBUSY); @@ -738,7 +990,6 @@ pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor, char *ruleset) pf_rm_rule(old_rules, rule); rs->rules[rs_num].inactive.open = 0; pf_remove_if_empty_ruleset(rs); - pf_update_anchor_rules(); splx(s); return (0); } @@ -771,8 +1022,6 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) case DIOCGETALTQS: case DIOCGETALTQ: case DIOCGETQSTATS: - case DIOCGETANCHORS: - case DIOCGETANCHOR: case DIOCGETRULESETS: case DIOCGETRULESET: case DIOCRGETTABLES: @@ -791,6 +1040,8 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) case DIOCCLRSRCNODES: case DIOCIGETIFACES: case DIOCICLRISTATS: + case DIOCSETIFFLAG: + case DIOCCLRIFFLAG: break; case DIOCRCLRTABLES: case DIOCRADDTABLES: @@ -818,8 +1069,6 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) case DIOCGETALTQS: case DIOCGETALTQ: case DIOCGETQSTATS: - case DIOCGETANCHORS: - case DIOCGETANCHOR: case DIOCGETRULESETS: case DIOCGETRULESET: case DIOCRGETTABLES: @@ -848,6 +1097,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) return (EACCES); } + s = splsoftnet(); switch (cmd) { case DIOCSTART: @@ -855,9 +1105,9 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = EEXIST; else { pf_status.running = 1; - pf_status.since = time.tv_sec; + pf_status.since = time_second; if (pf_status.stateid == 0) { - pf_status.stateid = time.tv_sec; + pf_status.stateid = time_second; pf_status.stateid = pf_status.stateid << 32; } DPFPRINTF(PF_DEBUG_MISC, ("pf: started\n")); @@ -869,19 +1119,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = ENOENT; else { pf_status.running = 0; - pf_status.since = time.tv_sec; + pf_status.since = time_second; DPFPRINTF(PF_DEBUG_MISC, ("pf: stopped\n")); } break; - case DIOCBEGINRULES: { - struct pfioc_rule *pr = (struct pfioc_rule *)addr; - - error = pf_begin_rules(&pr->ticket, pf_get_ruleset_number( - pr->rule.action), pr->anchor, pr->ruleset); - break; - } - case DIOCADDRULE: { struct pfioc_rule *pr = (struct pfioc_rule *)addr; struct pf_ruleset *ruleset; @@ -889,7 +1131,8 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) struct pf_pooladdr *pa; int rs_num; - ruleset = pf_find_ruleset(pr->anchor, pr->ruleset); + pr->anchor[sizeof(pr->anchor) - 1] = 0; + ruleset = pf_find_ruleset(pr->anchor); if (ruleset == NULL) { error = EINVAL; break; @@ -899,10 +1142,6 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = EINVAL; break; } - if (pr->rule.anchorname[0] && ruleset != &pf_main_ruleset) { - error = EINVAL; - break; - } if (pr->rule.return_icmp >> 8 > ICMP_MAXTYPE) { error = EINVAL; break; @@ -979,6 +1218,9 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = EBUSY; if (rule->rt && !rule->direction) error = EINVAL; + if (pf_rtlabel_add(&rule->src.addr) || + pf_rtlabel_add(&rule->dst.addr)) + error = EBUSY; if (pfi_dynaddr_setup(&rule->src.addr, rule->af)) error = EINVAL; if (pfi_dynaddr_setup(&rule->dst.addr, rule->af)) @@ -987,13 +1229,24 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = EINVAL; if (pf_tbladdr_setup(ruleset, &rule->dst.addr)) error = EINVAL; + if (pf_anchor_setup(rule, ruleset, pr->anchor_call)) + error = EINVAL; TAILQ_FOREACH(pa, &pf_pabuf, entries) if (pf_tbladdr_setup(ruleset, &pa->addr)) error = EINVAL; + if (rule->overload_tblname[0]) { + if ((rule->overload_tbl = pfr_attach_table(ruleset, + rule->overload_tblname)) == NULL) + error = EINVAL; + else + rule->overload_tbl->pfrkt_flags |= + PFR_TFLAG_ACTIVE; + } + pf_mv_pool(&pf_pabuf, &rule->rpool.list); if (((((rule->action == PF_NAT) || (rule->action == PF_RDR) || - (rule->action == PF_BINAT)) && !rule->anchorname[0]) || + (rule->action == PF_BINAT)) && rule->anchor == NULL) || (rule->rt > PF_FASTROUTE)) && (TAILQ_FIRST(&rule->rpool.list) == NULL)) error = EINVAL; @@ -1009,21 +1262,14 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) break; } - case DIOCCOMMITRULES: { - struct pfioc_rule *pr = (struct pfioc_rule *)addr; - - error = pf_commit_rules(pr->ticket, pf_get_ruleset_number( - pr->rule.action), pr->anchor, pr->ruleset); - break; - } - case DIOCGETRULES: { struct pfioc_rule *pr = (struct pfioc_rule *)addr; struct pf_ruleset *ruleset; struct pf_rule *tail; int rs_num; - ruleset = pf_find_ruleset(pr->anchor, pr->ruleset); + pr->anchor[sizeof(pr->anchor) - 1] = 0; + ruleset = pf_find_ruleset(pr->anchor); if (ruleset == NULL) { error = EINVAL; break; @@ -1033,7 +1279,6 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = EINVAL; break; } - s = splsoftnet(); tail = TAILQ_LAST(ruleset->rules[rs_num].active.ptr, pf_rulequeue); if (tail) @@ -1041,7 +1286,6 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) else pr->nr = 0; pr->ticket = ruleset->rules[rs_num].active.ticket; - splx(s); break; } @@ -1051,7 +1295,8 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) struct pf_rule *rule; int rs_num, i; - ruleset = pf_find_ruleset(pr->anchor, pr->ruleset); + pr->anchor[sizeof(pr->anchor) - 1] = 0; + ruleset = pf_find_ruleset(pr->anchor); if (ruleset == NULL) { error = EINVAL; break; @@ -1065,27 +1310,30 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = EBUSY; break; } - s = splsoftnet(); rule = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr); while ((rule != NULL) && (rule->nr != pr->nr)) rule = TAILQ_NEXT(rule, entries); if (rule == NULL) { error = EBUSY; - splx(s); break; } bcopy(rule, &pr->rule, sizeof(struct pf_rule)); + if (pf_anchor_copyout(ruleset, rule, pr)) { + error = EBUSY; + break; + } pfi_dynaddr_copyout(&pr->rule.src.addr); pfi_dynaddr_copyout(&pr->rule.dst.addr); pf_tbladdr_copyout(&pr->rule.src.addr); pf_tbladdr_copyout(&pr->rule.dst.addr); + pf_rtlabel_copyout(&pr->rule.src.addr); + pf_rtlabel_copyout(&pr->rule.dst.addr); for (i = 0; i < PF_SKIP_COUNT; ++i) if (rule->skip[i].ptr == NULL) pr->rule.skip[i].nr = -1; else pr->rule.skip[i].nr = rule->skip[i].ptr->nr; - splx(s); break; } @@ -1108,7 +1356,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = EINVAL; break; } - ruleset = pf_find_ruleset(pcr->anchor, pcr->ruleset); + ruleset = pf_find_ruleset(pcr->anchor); if (ruleset == NULL) { error = EINVAL; break; @@ -1182,7 +1430,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } else newrule->pqid = newrule->qid; } -#endif +#endif /* ALTQ */ if (newrule->tagname[0]) if ((newrule->tag = pf_tagname2tag(newrule->tagname)) == 0) @@ -1191,9 +1439,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) if ((newrule->match_tag = pf_tagname2tag( newrule->match_tagname)) == 0) error = EBUSY; - if (newrule->rt && !newrule->direction) error = EINVAL; + if (pf_rtlabel_add(&newrule->src.addr) || + pf_rtlabel_add(&newrule->dst.addr)) + error = EBUSY; if (pfi_dynaddr_setup(&newrule->src.addr, newrule->af)) error = EINVAL; if (pfi_dynaddr_setup(&newrule->dst.addr, newrule->af)) @@ -1202,13 +1452,25 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = EINVAL; if (pf_tbladdr_setup(ruleset, &newrule->dst.addr)) error = EINVAL; + if (pf_anchor_setup(newrule, ruleset, pcr->anchor_call)) + error = EINVAL; + + if (newrule->overload_tblname[0]) { + if ((newrule->overload_tbl = pfr_attach_table( + ruleset, newrule->overload_tblname)) == + NULL) + error = EINVAL; + else + newrule->overload_tbl->pfrkt_flags |= + PFR_TFLAG_ACTIVE; + } pf_mv_pool(&pf_pabuf, &newrule->rpool.list); if (((((newrule->action == PF_NAT) || (newrule->action == PF_RDR) || (newrule->action == PF_BINAT) || (newrule->rt > PF_FASTROUTE)) && - !newrule->anchorname[0])) && + !pcr->anchor[0])) && (TAILQ_FIRST(&newrule->rpool.list) == NULL)) error = EINVAL; @@ -1222,8 +1484,6 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } pf_empty_pool(&pf_pabuf); - s = splsoftnet(); - if (pcr->action == PF_CHANGE_ADD_HEAD) oldrule = TAILQ_FIRST( ruleset->rules[rs_num].active.ptr); @@ -1239,7 +1499,6 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) if (newrule != NULL) pf_rm_rule(NULL, newrule); error = EINVAL; - splx(s); break; } } @@ -1265,12 +1524,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) ruleset->rules[rs_num].active.ptr, entries) oldrule->nr = nr++; + ruleset->rules[rs_num].active.ticket++; + pf_calc_skip_steps(ruleset->rules[rs_num].active.ptr); pf_remove_if_empty_ruleset(ruleset); - pf_update_anchor_rules(); - ruleset->rules[rs_num].active.ticket++; - splx(s); break; } @@ -1279,7 +1537,6 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) struct pfioc_state_kill *psk = (struct pfioc_state_kill *)addr; int killed = 0; - s = splsoftnet(); RB_FOREACH(state, pf_state_tree_id, &tree_id) { if (!psk->psk_ifname[0] || !strcmp(psk->psk_ifname, state->u.s.kif->pfik_name)) { @@ -1297,7 +1554,6 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) #if NPFSYNC pfsync_clear_states(pf_status.hostid, psk->psk_ifname); #endif - splx(s); break; } @@ -1306,16 +1562,15 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) struct pfioc_state_kill *psk = (struct pfioc_state_kill *)addr; int killed = 0; - s = splsoftnet(); RB_FOREACH(state, pf_state_tree_id, &tree_id) { if ((!psk->psk_af || state->af == psk->psk_af) && (!psk->psk_proto || psk->psk_proto == state->proto) && - PF_MATCHA(psk->psk_src.not, + PF_MATCHA(psk->psk_src.neg, &psk->psk_src.addr.v.a.addr, &psk->psk_src.addr.v.a.mask, &state->lan.addr, state->af) && - PF_MATCHA(psk->psk_dst.not, + PF_MATCHA(psk->psk_dst.neg, &psk->psk_dst.addr.v.a.addr, &psk->psk_dst.addr.v.a.mask, &state->ext.addr, state->af) && @@ -1334,7 +1589,6 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } } pf_purge_expired_states(); - splx(s); psk->psk_af = killed; break; } @@ -1354,12 +1608,10 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = ENOMEM; break; } - s = splsoftnet(); kif = pfi_lookup_create(ps->state.u.ifname); if (kif == NULL) { pool_put(&pf_state_pl, state); error = ENOENT; - splx(s); break; } bcopy(&ps->state, state, sizeof(struct pf_state)); @@ -1368,7 +1620,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) state->nat_rule.ptr = NULL; state->anchor.ptr = NULL; state->rt_kif = NULL; - state->creation = time.tv_sec; + state->creation = time_second; state->pfsync_time = 0; state->packets[0] = state->packets[1] = 0; state->bytes[0] = state->bytes[1] = 0; @@ -1378,7 +1630,6 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) pool_put(&pf_state_pl, state); error = ENOMEM; } - splx(s); break; } @@ -1388,7 +1639,6 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) u_int32_t nr; nr = 0; - s = splsoftnet(); RB_FOREACH(state, pf_state_tree_id, &tree_id) { if (nr >= ps->nr) break; @@ -1396,7 +1646,6 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } if (state == NULL) { error = EBUSY; - splx(s); break; } bcopy(state, &ps->state, sizeof(struct pf_state)); @@ -1405,10 +1654,9 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) -1 : state->nat_rule.ptr->nr; ps->state.anchor.nr = (state->anchor.ptr == NULL) ? -1 : state->anchor.ptr->nr; - splx(s); ps->state.expire = pf_state_expires(state); - if (ps->state.expire > time.tv_sec) - ps->state.expire -= time.tv_sec; + if (ps->state.expire > time_second) + ps->state.expire -= time_second; else ps->state.expire = 0; break; @@ -1423,20 +1671,17 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) int space = ps->ps_len; if (space == 0) { - s = splsoftnet(); TAILQ_FOREACH(kif, &pfi_statehead, pfik_w_states) nr += kif->pfik_states; - splx(s); ps->ps_len = sizeof(struct pf_state) * nr; - return (0); + break; } - s = splsoftnet(); p = ps->ps_states; TAILQ_FOREACH(kif, &pfi_statehead, pfik_w_states) RB_FOREACH(state, pf_state_tree_ext_gwy, &kif->pfik_ext_gwy) { - int secs = time.tv_sec; + int secs = time_second; if ((nr+1) * sizeof(*p) > (unsigned)ps->ps_len) break; @@ -1456,15 +1701,12 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) else pstore.expire = 0; error = copyout(&pstore, p, sizeof(*p)); - if (error) { - splx(s); + if (error) goto fail; - } p++; nr++; } ps->ps_len = sizeof(struct pf_state) * nr; - splx(s); break; } @@ -1515,8 +1757,6 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) !pnl->dport || !pnl->sport) error = EINVAL; else { - s = splsoftnet(); - /* * userland gives us source and dest of connection, * reverse the lookup so we ask for what happens with @@ -1556,7 +1796,6 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } } else error = ENOENT; - splx(s); } break; } @@ -1629,88 +1868,48 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) struct pf_ruleset *ruleset = &pf_main_ruleset; struct pf_rule *rule; - s = splsoftnet(); TAILQ_FOREACH(rule, ruleset->rules[PF_RULESET_FILTER].active.ptr, entries) rule->evaluations = rule->packets = rule->bytes = 0; - splx(s); break; } #ifdef ALTQ case DIOCSTARTALTQ: { struct pf_altq *altq; - struct ifnet *ifp; - struct tb_profile tb; /* enable all altq interfaces on active list */ - s = splsoftnet(); TAILQ_FOREACH(altq, pf_altqs_active, entries) { if (altq->qname[0] == 0) { - if ((ifp = ifunit(altq->ifname)) == NULL) { - error = EINVAL; - break; - } - if (ifp->if_snd.altq_type != ALTQT_NONE) - error = altq_enable(&ifp->if_snd); - if (error != 0) - break; - /* set tokenbucket regulator */ - tb.rate = altq->ifbandwidth; - tb.depth = altq->tbrsize; - error = tbr_set(&ifp->if_snd, &tb); + error = pf_enable_altq(altq); if (error != 0) break; } } if (error == 0) - pfaltq_running = 1; - splx(s); + pf_altq_running = 1; DPFPRINTF(PF_DEBUG_MISC, ("altq: started\n")); break; } case DIOCSTOPALTQ: { struct pf_altq *altq; - struct ifnet *ifp; - struct tb_profile tb; - int err; /* disable all altq interfaces on active list */ - s = splsoftnet(); TAILQ_FOREACH(altq, pf_altqs_active, entries) { if (altq->qname[0] == 0) { - if ((ifp = ifunit(altq->ifname)) == NULL) { - error = EINVAL; + error = pf_disable_altq(altq); + if (error != 0) break; - } - if (ifp->if_snd.altq_type != ALTQT_NONE) { - err = altq_disable(&ifp->if_snd); - if (err != 0 && error == 0) - error = err; - } - /* clear tokenbucket regulator */ - tb.rate = 0; - err = tbr_set(&ifp->if_snd, &tb); - if (err != 0 && error == 0) - error = err; } } if (error == 0) - pfaltq_running = 0; - splx(s); + pf_altq_running = 0; DPFPRINTF(PF_DEBUG_MISC, ("altq: stopped\n")); break; } - case DIOCBEGINALTQS: { - u_int32_t *ticket = (u_int32_t *)addr; - - error = pf_begin_altq(ticket); - break; - } - case DIOCADDALTQ: { struct pfioc_altq *pa = (struct pfioc_altq *)addr; struct pf_altq *altq, *a; @@ -1756,23 +1955,14 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) break; } - case DIOCCOMMITALTQS: { - u_int32_t ticket = *(u_int32_t *)addr; - - error = pf_commit_altq(ticket); - break; - } - case DIOCGETALTQS: { struct pfioc_altq *pa = (struct pfioc_altq *)addr; struct pf_altq *altq; pa->nr = 0; - s = splsoftnet(); TAILQ_FOREACH(altq, pf_altqs_active, entries) pa->nr++; pa->ticket = ticket_altqs_active; - splx(s); break; } @@ -1786,7 +1976,6 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) break; } nr = 0; - s = splsoftnet(); altq = TAILQ_FIRST(pf_altqs_active); while ((altq != NULL) && (nr < pa->nr)) { altq = TAILQ_NEXT(altq, entries); @@ -1794,11 +1983,9 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } if (altq == NULL) { error = EBUSY; - splx(s); break; } bcopy(altq, &pa->altq, sizeof(struct pf_altq)); - splx(s); break; } @@ -1819,7 +2006,6 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } nbytes = pq->nbytes; nr = 0; - s = splsoftnet(); altq = TAILQ_FIRST(pf_altqs_active); while ((altq != NULL) && (nr < pq->nr)) { altq = TAILQ_NEXT(altq, entries); @@ -1827,11 +2013,9 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } if (altq == NULL) { error = EBUSY; - splx(s); break; } error = altq_getqstats(altq, pq->buf, &nbytes); - splx(s); if (error == 0) { pq->scheduler = altq->scheduler; pq->nbytes = nbytes; @@ -1898,17 +2082,14 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) struct pfioc_pooladdr *pp = (struct pfioc_pooladdr *)addr; pp->nr = 0; - s = splsoftnet(); - pool = pf_get_pool(pp->anchor, pp->ruleset, pp->ticket, - pp->r_action, pp->r_num, 0, 1, 0); + pool = pf_get_pool(pp->anchor, pp->ticket, pp->r_action, + pp->r_num, 0, 1, 0); if (pool == NULL) { error = EBUSY; - splx(s); break; } TAILQ_FOREACH(pa, &pool->list, entries) pp->nr++; - splx(s); break; } @@ -1916,12 +2097,10 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) struct pfioc_pooladdr *pp = (struct pfioc_pooladdr *)addr; u_int32_t nr = 0; - s = splsoftnet(); - pool = pf_get_pool(pp->anchor, pp->ruleset, pp->ticket, - pp->r_action, pp->r_num, 0, 1, 1); + pool = pf_get_pool(pp->anchor, pp->ticket, pp->r_action, + pp->r_num, 0, 1, 1); if (pool == NULL) { error = EBUSY; - splx(s); break; } pa = TAILQ_FIRST(&pool->list); @@ -1931,13 +2110,12 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } if (pa == NULL) { error = EBUSY; - splx(s); break; } bcopy(pa, &pp->addr, sizeof(struct pf_pooladdr)); pfi_dynaddr_copyout(&pp->addr.addr); pf_tbladdr_copyout(&pp->addr.addr); - splx(s); + pf_rtlabel_copyout(&pp->addr.addr); break; } @@ -1958,13 +2136,13 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) break; } - ruleset = pf_find_ruleset(pca->anchor, pca->ruleset); + ruleset = pf_find_ruleset(pca->anchor); if (ruleset == NULL) { error = EBUSY; break; } - pool = pf_get_pool(pca->anchor, pca->ruleset, pca->ticket, - pca->r_action, pca->r_num, pca->r_last, 1, 1); + pool = pf_get_pool(pca->anchor, pca->ticket, pca->r_action, + pca->r_num, pca->r_last, 1, 1); if (pool == NULL) { error = EBUSY; break; @@ -2009,8 +2187,6 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } } - s = splsoftnet(); - if (pca->action == PF_CHANGE_ADD_HEAD) oldpa = TAILQ_FIRST(&pool->list); else if (pca->action == PF_CHANGE_ADD_TAIL) @@ -2025,7 +2201,6 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } if (oldpa == NULL) { error = EINVAL; - splx(s); break; } } @@ -2050,72 +2225,64 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) pool->cur = TAILQ_FIRST(&pool->list); PF_ACPY(&pool->counter, &pool->cur->addr.v.a.addr, pca->af); - splx(s); - break; - } - - case DIOCGETANCHORS: { - struct pfioc_anchor *pa = (struct pfioc_anchor *)addr; - struct pf_anchor *anchor; - - pa->nr = 0; - TAILQ_FOREACH(anchor, &pf_anchors, entries) - pa->nr++; - break; - } - - case DIOCGETANCHOR: { - struct pfioc_anchor *pa = (struct pfioc_anchor *)addr; - struct pf_anchor *anchor; - u_int32_t nr = 0; - - anchor = TAILQ_FIRST(&pf_anchors); - while (anchor != NULL && nr < pa->nr) { - anchor = TAILQ_NEXT(anchor, entries); - nr++; - } - if (anchor == NULL) - error = EBUSY; - else - bcopy(anchor->name, pa->name, sizeof(pa->name)); break; } case DIOCGETRULESETS: { struct pfioc_ruleset *pr = (struct pfioc_ruleset *)addr; - struct pf_anchor *anchor; struct pf_ruleset *ruleset; + struct pf_anchor *anchor; - pr->anchor[PF_ANCHOR_NAME_SIZE-1] = 0; - if ((anchor = pf_find_anchor(pr->anchor)) == NULL) { + pr->path[sizeof(pr->path) - 1] = 0; + if ((ruleset = pf_find_ruleset(pr->path)) == NULL) { error = EINVAL; break; } pr->nr = 0; - TAILQ_FOREACH(ruleset, &anchor->rulesets, entries) - pr->nr++; + if (ruleset->anchor == NULL) { + /* XXX kludge for pf_main_ruleset */ + RB_FOREACH(anchor, pf_anchor_global, &pf_anchors) + if (anchor->parent == NULL) + pr->nr++; + } else { + RB_FOREACH(anchor, pf_anchor_node, + &ruleset->anchor->children) + pr->nr++; + } break; } case DIOCGETRULESET: { struct pfioc_ruleset *pr = (struct pfioc_ruleset *)addr; - struct pf_anchor *anchor; struct pf_ruleset *ruleset; + struct pf_anchor *anchor; u_int32_t nr = 0; - if ((anchor = pf_find_anchor(pr->anchor)) == NULL) { + pr->path[sizeof(pr->path) - 1] = 0; + if ((ruleset = pf_find_ruleset(pr->path)) == NULL) { error = EINVAL; break; } - ruleset = TAILQ_FIRST(&anchor->rulesets); - while (ruleset != NULL && nr < pr->nr) { - ruleset = TAILQ_NEXT(ruleset, entries); - nr++; + pr->name[0] = 0; + if (ruleset->anchor == NULL) { + /* XXX kludge for pf_main_ruleset */ + RB_FOREACH(anchor, pf_anchor_global, &pf_anchors) + if (anchor->parent == NULL && nr++ == pr->nr) { + strlcpy(pr->name, anchor->name, + sizeof(pr->name)); + break; + } + } else { + RB_FOREACH(anchor, pf_anchor_node, + &ruleset->anchor->children) + if (nr++ == pr->nr) { + strlcpy(pr->name, anchor->name, + sizeof(pr->name)); + break; + } } - if (ruleset == NULL) + if (!pr->name[0]) error = EBUSY; - else - bcopy(ruleset->name, pr->name, sizeof(pr->name)); break; } @@ -2306,31 +2473,6 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) break; } - case DIOCRINABEGIN: { - struct pfioc_table *io = (struct pfioc_table *)addr; - - if (io->pfrio_esize != 0) { - error = ENODEV; - break; - } - error = pfr_ina_begin(&io->pfrio_table, &io->pfrio_ticket, - &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL); - break; - } - - case DIOCRINACOMMIT: { - struct pfioc_table *io = (struct pfioc_table *)addr; - - if (io->pfrio_esize != 0) { - error = ENODEV; - break; - } - error = pfr_ina_commit(&io->pfrio_table, io->pfrio_ticket, - &io->pfrio_nadd, &io->pfrio_nchange, io->pfrio_flags | - PFR_FLAG_USERIOCTL); - break; - } - case DIOCRINADEFINE: { struct pfioc_table *io = (struct pfioc_table *)addr; @@ -2346,25 +2488,22 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) case DIOCOSFPADD: { struct pf_osfp_ioctl *io = (struct pf_osfp_ioctl *)addr; - s = splsoftnet(); error = pf_osfp_add(io); - splx(s); break; } case DIOCOSFPGET: { struct pf_osfp_ioctl *io = (struct pf_osfp_ioctl *)addr; - s = splsoftnet(); error = pf_osfp_get(io); - splx(s); break; } case DIOCXBEGIN: { - struct pfioc_trans *io = (struct pfioc_trans *)addr; - struct pfioc_trans_e ioe; - struct pfr_table table; - int i; + struct pfioc_trans *io = (struct pfioc_trans *) + addr; + static struct pfioc_trans_e ioe; + static struct pfr_table table; + int i; if (io->esize != sizeof(ioe)) { error = ENODEV; @@ -2378,7 +2517,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) switch (ioe.rs_num) { #ifdef ALTQ case PF_RULESET_ALTQ: - if (ioe.anchor[0] || ioe.ruleset[0]) { + if (ioe.anchor[0]) { error = EINVAL; goto fail; } @@ -2390,15 +2529,13 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) bzero(&table, sizeof(table)); strlcpy(table.pfrt_anchor, ioe.anchor, sizeof(table.pfrt_anchor)); - strlcpy(table.pfrt_ruleset, ioe.ruleset, - sizeof(table.pfrt_ruleset)); if ((error = pfr_ina_begin(&table, &ioe.ticket, NULL, 0))) goto fail; break; default: if ((error = pf_begin_rules(&ioe.ticket, - ioe.rs_num, ioe.anchor, ioe.ruleset))) + ioe.rs_num, ioe.anchor))) goto fail; break; } @@ -2411,10 +2548,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } case DIOCXROLLBACK: { - struct pfioc_trans *io = (struct pfioc_trans *)addr; - struct pfioc_trans_e ioe; - struct pfr_table table; - int i; + struct pfioc_trans *io = (struct pfioc_trans *) + addr; + static struct pfioc_trans_e ioe; + static struct pfr_table table; + int i; if (io->esize != sizeof(ioe)) { error = ENODEV; @@ -2428,7 +2566,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) switch (ioe.rs_num) { #ifdef ALTQ case PF_RULESET_ALTQ: - if (ioe.anchor[0] || ioe.ruleset[0]) { + if (ioe.anchor[0]) { error = EINVAL; goto fail; } @@ -2440,15 +2578,13 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) bzero(&table, sizeof(table)); strlcpy(table.pfrt_anchor, ioe.anchor, sizeof(table.pfrt_anchor)); - strlcpy(table.pfrt_ruleset, ioe.ruleset, - sizeof(table.pfrt_ruleset)); if ((error = pfr_ina_rollback(&table, ioe.ticket, NULL, 0))) goto fail; /* really bad */ break; default: if ((error = pf_rollback_rules(ioe.ticket, - ioe.rs_num, ioe.anchor, ioe.ruleset))) + ioe.rs_num, ioe.anchor))) goto fail; /* really bad */ break; } @@ -2457,11 +2593,12 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } case DIOCXCOMMIT: { - struct pfioc_trans *io = (struct pfioc_trans *)addr; - struct pfioc_trans_e ioe; - struct pfr_table table; - struct pf_ruleset *rs; - int i; + struct pfioc_trans *io = (struct pfioc_trans *) + addr; + static struct pfioc_trans_e ioe; + static struct pfr_table table; + struct pf_ruleset *rs; + int i; if (io->esize != sizeof(ioe)) { error = ENODEV; @@ -2476,7 +2613,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) switch (ioe.rs_num) { #ifdef ALTQ case PF_RULESET_ALTQ: - if (ioe.anchor[0] || ioe.ruleset[0]) { + if (ioe.anchor[0]) { error = EINVAL; goto fail; } @@ -2488,7 +2625,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) break; #endif /* ALTQ */ case PF_RULESET_TABLE: - rs = pf_find_ruleset(ioe.anchor, ioe.ruleset); + rs = pf_find_ruleset(ioe.anchor); if (rs == NULL || !rs->topen || ioe.ticket != rs->tticket) { error = EBUSY; @@ -2501,7 +2638,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = EINVAL; goto fail; } - rs = pf_find_ruleset(ioe.anchor, ioe.ruleset); + rs = pf_find_ruleset(ioe.anchor); if (rs == NULL || !rs->rules[ioe.rs_num].inactive.open || rs->rules[ioe.rs_num].inactive.ticket != @@ -2529,15 +2666,13 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) bzero(&table, sizeof(table)); strlcpy(table.pfrt_anchor, ioe.anchor, sizeof(table.pfrt_anchor)); - strlcpy(table.pfrt_ruleset, ioe.ruleset, - sizeof(table.pfrt_ruleset)); if ((error = pfr_ina_commit(&table, ioe.ticket, NULL, NULL, 0))) goto fail; /* really bad */ break; default: if ((error = pf_commit_rules(ioe.ticket, - ioe.rs_num, ioe.anchor, ioe.ruleset))) + ioe.rs_num, ioe.anchor))) goto fail; /* really bad */ break; } @@ -2553,18 +2688,15 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) int space = psn->psn_len; if (space == 0) { - s = splsoftnet(); RB_FOREACH(n, pf_src_tree, &tree_src_tracking) nr++; - splx(s); psn->psn_len = sizeof(struct pf_src_node) * nr; - return (0); + break; } - s = splsoftnet(); p = psn->psn_src_nodes; RB_FOREACH(n, pf_src_tree, &tree_src_tracking) { - int secs = time.tv_sec; + int secs = time_second, diff; if ((nr + 1) * sizeof(*p) > (unsigned)psn->psn_len) break; @@ -2577,16 +2709,23 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) pstore.expire -= secs; else pstore.expire = 0; + + /* adjust the connection rate estimate */ + diff = secs - n->conn_rate.last; + if (diff >= n->conn_rate.seconds) + pstore.conn_rate.count = 0; + else + pstore.conn_rate.count -= + n->conn_rate.count * diff / + n->conn_rate.seconds; + error = copyout(&pstore, p, sizeof(*p)); - if (error) { - splx(s); + if (error) goto fail; - } p++; nr++; } psn->psn_len = sizeof(struct pf_src_node) * nr; - splx(s); break; } @@ -2594,7 +2733,6 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) struct pf_src_node *n; struct pf_state *state; - s = splsoftnet(); RB_FOREACH(state, pf_state_tree_id, &tree_id) { state->src_node = NULL; state->nat_src_node = NULL; @@ -2605,25 +2743,21 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } pf_purge_expired_src_nodes(); pf_status.src_nodes = 0; - splx(s); break; } case DIOCSETHOSTID: { u_int32_t *hostid = (u_int32_t *)addr; - if (*hostid == 0) { - error = EINVAL; - goto fail; - } - pf_status.hostid = *hostid; + if (*hostid == 0) + pf_status.hostid = arc4random(); + else + pf_status.hostid = *hostid; break; } case DIOCOSFPFLUSH: - s = splsoftnet(); pf_osfp_flush(); - splx(s); break; case DIOCIGETIFACES: { @@ -2646,11 +2780,25 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) break; } + case DIOCSETIFFLAG: { + struct pfioc_iface *io = (struct pfioc_iface *)addr; + + error = pfi_set_flags(io->pfiio_name, io->pfiio_flags); + break; + } + + case DIOCCLRIFFLAG: { + struct pfioc_iface *io = (struct pfioc_iface *)addr; + + error = pfi_clear_flags(io->pfiio_name, io->pfiio_flags); + break; + } + default: error = ENODEV; break; } fail: - + splx(s); return (error); } diff --git a/sys/contrib/pf/net/pf_norm.c b/sys/contrib/pf/net/pf_norm.c index 3e5b40281db0..3c3dbc062ca4 100644 --- a/sys/contrib/pf/net/pf_norm.c +++ b/sys/contrib/pf/net/pf_norm.c @@ -1,5 +1,4 @@ -/* $OpenBSD: pf_norm.c,v 1.80.2.1 2004/04/30 21:46:33 brad Exp $ */ -/* add $OpenBSD: pf_norm.c,v 1.87 2004/05/11 07:34:11 dhartmei Exp $ */ +/* $OpenBSD: pf_norm.c,v 1.97 2004/09/21 16:59:12 aaron Exp $ */ /* * Copyright 2001 Niels Provos @@ -118,8 +117,12 @@ struct mbuf *pf_fragcache(struct mbuf **, struct ip*, int pf_normalize_tcpopt(struct pf_rule *, struct mbuf *, struct tcphdr *, int); -#define DPFPRINTF(x) if (pf_status.debug >= PF_DEBUG_MISC) \ - { printf("%s: ", __func__); printf x ;} +#define DPFPRINTF(x) do { \ + if (pf_status.debug >= PF_DEBUG_MISC) { \ + printf("%s: ", __func__); \ + printf x ; \ + } \ +} while(0) /* Globals */ struct pool pf_frent_pl, pf_frag_pl, pf_cache_pl, pf_cent_pl; @@ -173,7 +176,7 @@ void pf_purge_expired_fragments(void) { struct pf_fragment *frag; - u_int32_t expire = time.tv_sec - + u_int32_t expire = time_second - pf_default_rule.timeout[PFTM_FRAG]; while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) { @@ -284,7 +287,7 @@ pf_find_fragment(struct ip *ip, struct pf_frag_tree *tree) frag = RB_FIND(pf_frag_tree, tree, &key); if (frag != NULL) { /* XXX Are we sure we want to update the timeout? */ - frag->fr_timeout = time.tv_sec; + frag->fr_timeout = time_second; if (BUFFER_FRAGMENTS(frag)) { TAILQ_REMOVE(&pf_fragqueue, frag, frag_next); TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next); @@ -349,7 +352,7 @@ pf_reassemble(struct mbuf **m0, struct pf_fragment **frag, (*frag)->fr_dst = frent->fr_ip->ip_dst; (*frag)->fr_p = frent->fr_ip->ip_p; (*frag)->fr_id = frent->fr_ip->ip_id; - (*frag)->fr_timeout = time.tv_sec; + (*frag)->fr_timeout = time_second; LIST_INIT(&(*frag)->fr_queue); RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag); @@ -551,7 +554,7 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, (*frag)->fr_dst = h->ip_dst; (*frag)->fr_p = h->ip_p; (*frag)->fr_id = h->ip_id; - (*frag)->fr_timeout = time.tv_sec; + (*frag)->fr_timeout = time_second; cur->fr_off = off; cur->fr_end = max; @@ -810,7 +813,8 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, } int -pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason) +pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, + struct pf_pdesc *pd) { struct mbuf *m = *m0; struct pf_rule *r; @@ -837,10 +841,10 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason) else if (r->proto && r->proto != h->ip_p) r = r->skip[PF_SKIP_PROTO].ptr; else if (PF_MISMATCHAW(&r->src.addr, - (struct pf_addr *)&h->ip_src.s_addr, AF_INET, r->src.not)) + (struct pf_addr *)&h->ip_src.s_addr, AF_INET, r->src.neg)) r = r->skip[PF_SKIP_SRC_ADDR].ptr; else if (PF_MISMATCHAW(&r->dst.addr, - (struct pf_addr *)&h->ip_dst.s_addr, AF_INET, r->dst.not)) + (struct pf_addr *)&h->ip_dst.s_addr, AF_INET, r->dst.neg)) r = r->skip[PF_SKIP_DST_ADDR].ptr; else break; @@ -982,6 +986,8 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason) h->ip_id = ip_randomid(); h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_id, h->ip_id, 0); } + if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) + pd->flags |= PFDESC_IP_REAS; return (PF_PASS); @@ -989,7 +995,8 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason) /* Enforce a minimum ttl, may cause endless packet loops */ if (r->min_ttl && h->ip_ttl < r->min_ttl) h->ip_ttl = r->min_ttl; - + if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) + pd->flags |= PFDESC_IP_REAS; return (PF_PASS); no_mem: @@ -1021,7 +1028,7 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason) #ifdef INET6 int pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif, - u_short *reason) + u_short *reason, struct pf_pdesc *pd) { struct mbuf *m = *m0; struct pf_rule *r; @@ -1053,10 +1060,10 @@ pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif, r = r->skip[PF_SKIP_PROTO].ptr; #endif else if (PF_MISMATCHAW(&r->src.addr, - (struct pf_addr *)&h->ip6_src, AF_INET6, r->src.not)) + (struct pf_addr *)&h->ip6_src, AF_INET6, r->src.neg)) r = r->skip[PF_SKIP_SRC_ADDR].ptr; else if (PF_MISMATCHAW(&r->dst.addr, - (struct pf_addr *)&h->ip6_dst, AF_INET6, r->dst.not)) + (struct pf_addr *)&h->ip6_dst, AF_INET6, r->dst.neg)) r = r->skip[PF_SKIP_DST_ADDR].ptr; else break; @@ -1171,6 +1178,7 @@ pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif, goto badfrag; /* do something about it */ + /* remember to set pd->flags |= PFDESC_IP_REAS */ return (PF_PASS); shortpkt: @@ -1191,7 +1199,7 @@ pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif, PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL); return (PF_DROP); } -#endif +#endif /* INET6 */ int pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff, @@ -1216,12 +1224,12 @@ pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff, r = r->skip[PF_SKIP_AF].ptr; else if (r->proto && r->proto != pd->proto) r = r->skip[PF_SKIP_PROTO].ptr; - else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, r->src.not)) + else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, r->src.neg)) r = r->skip[PF_SKIP_SRC_ADDR].ptr; else if (r->src.port_op && !pf_match_port(r->src.port_op, r->src.port[0], r->src.port[1], th->th_sport)) r = r->skip[PF_SKIP_SRC_PORT].ptr; - else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, r->dst.not)) + else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, r->dst.neg)) r = r->skip[PF_SKIP_DST_ADDR].ptr; else if (r->dst.port_op && !pf_match_port(r->dst.port_op, r->dst.port[0], r->dst.port[1], th->th_dport)) @@ -1236,7 +1244,7 @@ pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff, } } - if (rm == NULL) + if (rm == NULL || rm->action == PF_NOSCRUB) return (PF_PASS); else r->packets++; @@ -1309,6 +1317,7 @@ int pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd, struct tcphdr *th, struct pf_state_peer *src, struct pf_state_peer *dst) { + u_int32_t tsval, tsecr; u_int8_t hdr[60]; u_int8_t *opt; @@ -1362,7 +1371,18 @@ pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd, if (opt[1] >= TCPOLEN_TIMESTAMP) { src->scrub->pfss_flags |= PFSS_TIMESTAMP; - src->scrub->pfss_ts_mod = arc4random(); + src->scrub->pfss_ts_mod = + htonl(arc4random()); + + /* note PFSS_PAWS not set yet */ + memcpy(&tsval, &opt[2], + sizeof(u_int32_t)); + memcpy(&tsecr, &opt[6], + sizeof(u_int32_t)); + src->scrub->pfss_tsval0 = ntohl(tsval); + src->scrub->pfss_tsval = ntohl(tsval); + src->scrub->pfss_tsecr = ntohl(tsecr); + getmicrouptime(&src->scrub->pfss_last); } /* FALLTHROUGH */ default: @@ -1389,12 +1409,16 @@ pf_normalize_tcp_cleanup(struct pf_state *state) int pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, - u_short *reason, struct tcphdr *th, struct pf_state_peer *src, - struct pf_state_peer *dst, int *writeback) + u_short *reason, struct tcphdr *th, struct pf_state *state, + struct pf_state_peer *src, struct pf_state_peer *dst, int *writeback) { + struct timeval uptime; + u_int32_t tsval, tsecr; + u_int tsval_from_last; u_int8_t hdr[60]; u_int8_t *opt; int copyback = 0; + int got_ts = 0; KASSERT(src->scrub || dst->scrub); @@ -1448,32 +1472,46 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, * NAT detection, OS uptime determination or * reboot detection. */ + + if (got_ts) { + /* Huh? Multiple timestamps!? */ + if (pf_status.debug >= PF_DEBUG_MISC) { + DPFPRINTF(("multiple TS??")); + pf_print_state(state); + printf("\n"); + } + REASON_SET(reason, PFRES_TS); + return (PF_DROP); + } if (opt[1] >= TCPOLEN_TIMESTAMP) { - u_int32_t ts_value; - if (src->scrub && + memcpy(&tsval, &opt[2], + sizeof(u_int32_t)); + if (tsval && src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) { - memcpy(&ts_value, &opt[2], - sizeof(u_int32_t)); - ts_value = htonl(ntohl(ts_value) - + src->scrub->pfss_ts_mod); + tsval = ntohl(tsval); pf_change_a(&opt[2], - &th->th_sum, ts_value, 0); + &th->th_sum, + htonl(tsval + + src->scrub->pfss_ts_mod), + 0); copyback = 1; } /* Modulate TS reply iff valid (!0) */ - memcpy(&ts_value, &opt[6], + memcpy(&tsecr, &opt[6], sizeof(u_int32_t)); - if (ts_value && dst->scrub && + if (tsecr && dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) { - ts_value = htonl(ntohl(ts_value) - - dst->scrub->pfss_ts_mod); + tsecr = ntohl(tsecr) + - dst->scrub->pfss_ts_mod; pf_change_a(&opt[6], - &th->th_sum, ts_value, 0); + &th->th_sum, htonl(tsecr), + 0); copyback = 1; } + got_ts = 1; } /* FALLTHROUGH */ default: @@ -1492,9 +1530,274 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, } + /* + * Must invalidate PAWS checks on connections idle for too long. + * The fastest allowed timestamp clock is 1ms. That turns out to + * be about 24 days before it wraps. XXX Right now our lowerbound + * TS echo check only works for the first 12 days of a connection + * when the TS has exhausted half its 32bit space + */ +#define TS_MAX_IDLE (24*24*60*60) +#define TS_MAX_CONN (12*24*60*60) /* XXX remove when better tsecr check */ + + getmicrouptime(&uptime); + if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) && + (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE || + time_second - state->creation > TS_MAX_CONN)) { + if (pf_status.debug >= PF_DEBUG_MISC) { + DPFPRINTF(("src idled out of PAWS\n")); + pf_print_state(state); + printf("\n"); + } + src->scrub->pfss_flags = (src->scrub->pfss_flags & ~PFSS_PAWS) + | PFSS_PAWS_IDLED; + } + if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) && + uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) { + if (pf_status.debug >= PF_DEBUG_MISC) { + DPFPRINTF(("dst idled out of PAWS\n")); + pf_print_state(state); + printf("\n"); + } + dst->scrub->pfss_flags = (dst->scrub->pfss_flags & ~PFSS_PAWS) + | PFSS_PAWS_IDLED; + } + + if (got_ts && src->scrub && dst->scrub && + (src->scrub->pfss_flags & PFSS_PAWS) && + (dst->scrub->pfss_flags & PFSS_PAWS)) { + /* Validate that the timestamps are "in-window". + * RFC1323 describes TCP Timestamp options that allow + * measurement of RTT (round trip time) and PAWS + * (protection against wrapped sequence numbers). PAWS + * gives us a set of rules for rejecting packets on + * long fat pipes (packets that were somehow delayed + * in transit longer than the time it took to send the + * full TCP sequence space of 4Gb). We can use these + * rules and infer a few others that will let us treat + * the 32bit timestamp and the 32bit echoed timestamp + * as sequence numbers to prevent a blind attacker from + * inserting packets into a connection. + * + * RFC1323 tells us: + * - The timestamp on this packet must be greater than + * or equal to the last value echoed by the other + * endpoint. The RFC says those will be discarded + * since it is a dup that has already been acked. + * This gives us a lowerbound on the timestamp. + * timestamp >= other last echoed timestamp + * - The timestamp will be less than or equal to + * the last timestamp plus the time between the + * last packet and now. The RFC defines the max + * clock rate as 1ms. We will allow clocks to be + * up to 10% fast and will allow a total difference + * or 30 seconds due to a route change. And this + * gives us an upperbound on the timestamp. + * timestamp <= last timestamp + max ticks + * We have to be careful here. Windows will send an + * initial timestamp of zero and then initialize it + * to a random value after the 3whs; presumably to + * avoid a DoS by having to call an expensive RNG + * during a SYN flood. Proof MS has at least one + * good security geek. + * + * - The TCP timestamp option must also echo the other + * endpoints timestamp. The timestamp echoed is the + * one carried on the earliest unacknowledged segment + * on the left edge of the sequence window. The RFC + * states that the host will reject any echoed + * timestamps that were larger than any ever sent. + * This gives us an upperbound on the TS echo. + * tescr <= largest_tsval + * - The lowerbound on the TS echo is a little more + * tricky to determine. The other endpoint's echoed + * values will not decrease. But there may be + * network conditions that re-order packets and + * cause our view of them to decrease. For now the + * only lowerbound we can safely determine is that + * the TS echo will never be less than the orginal + * TS. XXX There is probably a better lowerbound. + * Remove TS_MAX_CONN with better lowerbound check. + * tescr >= other original TS + * + * It is also important to note that the fastest + * timestamp clock of 1ms will wrap its 32bit space in + * 24 days. So we just disable TS checking after 24 + * days of idle time. We actually must use a 12d + * connection limit until we can come up with a better + * lowerbound to the TS echo check. + */ + struct timeval delta_ts; + int ts_fudge; + + + /* + * PFTM_TS_DIFF is how many seconds of leeway to allow + * a host's timestamp. This can happen if the previous + * packet got delayed in transit for much longer than + * this packet. + */ + if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0) + ts_fudge = pf_default_rule.timeout[PFTM_TS_DIFF]; + + + /* Calculate max ticks since the last timestamp */ +#define TS_MAXFREQ 1100 /* RFC max TS freq of 1Khz + 10% skew */ +#define TS_MICROSECS 1000000 /* microseconds per second */ + timersub(&uptime, &src->scrub->pfss_last, &delta_ts); + tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ; + tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS/TS_MAXFREQ); + + + if ((src->state >= TCPS_ESTABLISHED && + dst->state >= TCPS_ESTABLISHED) && + (SEQ_LT(tsval, dst->scrub->pfss_tsecr) || + SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) || + (tsecr && (SEQ_GT(tsecr, dst->scrub->pfss_tsval) || + SEQ_LT(tsecr, dst->scrub->pfss_tsval0))))) { + /* Bad RFC1323 implementation or an insertion attack. + * + * - Solaris 2.6 and 2.7 are known to send another ACK + * after the FIN,FIN|ACK,ACK closing that carries + * an old timestamp. + */ + + DPFPRINTF(("Timestamp failed %c%c%c%c\n", + SEQ_LT(tsval, dst->scrub->pfss_tsecr) ? '0' : ' ', + SEQ_GT(tsval, src->scrub->pfss_tsval + + tsval_from_last) ? '1' : ' ', + SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? '2' : ' ', + SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' ')); + DPFPRINTF((" tsval: %lu tsecr: %lu +ticks: %lu " + "idle: %lus %lums\n", + tsval, tsecr, tsval_from_last, delta_ts.tv_sec, + delta_ts.tv_usec / 1000)); + DPFPRINTF((" src->tsval: %lu tsecr: %lu\n", + src->scrub->pfss_tsval, src->scrub->pfss_tsecr)); + DPFPRINTF((" dst->tsval: %lu tsecr: %lu tsval0: %lu" + "\n", dst->scrub->pfss_tsval, + dst->scrub->pfss_tsecr, dst->scrub->pfss_tsval0)); + if (pf_status.debug >= PF_DEBUG_MISC) { + pf_print_state(state); + pf_print_flags(th->th_flags); + printf("\n"); + } + REASON_SET(reason, PFRES_TS); + return (PF_DROP); + } + + /* XXX I'd really like to require tsecr but it's optional */ + + } else if (!got_ts && (th->th_flags & TH_RST) == 0 && + ((src->state == TCPS_ESTABLISHED && dst->state == TCPS_ESTABLISHED) + || pd->p_len > 0 || (th->th_flags & TH_SYN)) && + src->scrub && dst->scrub && + (src->scrub->pfss_flags & PFSS_PAWS) && + (dst->scrub->pfss_flags & PFSS_PAWS)) { + /* Didn't send a timestamp. Timestamps aren't really useful + * when: + * - connection opening or closing (often not even sent). + * but we must not let an attacker to put a FIN on a + * data packet to sneak it through our ESTABLISHED check. + * - on a TCP reset. RFC suggests not even looking at TS. + * - on an empty ACK. The TS will not be echoed so it will + * probably not help keep the RTT calculation in sync and + * there isn't as much danger when the sequence numbers + * got wrapped. So some stacks don't include TS on empty + * ACKs :-( + * + * To minimize the disruption to mostly RFC1323 conformant + * stacks, we will only require timestamps on data packets. + * + * And what do ya know, we cannot require timestamps on data + * packets. There appear to be devices that do legitimate + * TCP connection hijacking. There are HTTP devices that allow + * a 3whs (with timestamps) and then buffer the HTTP request. + * If the intermediate device has the HTTP response cache, it + * will spoof the response but not bother timestamping its + * packets. So we can look for the presence of a timestamp in + * the first data packet and if there, require it in all future + * packets. + */ + + if (pd->p_len > 0 && (src->scrub->pfss_flags & PFSS_DATA_TS)) { + /* + * Hey! Someone tried to sneak a packet in. Or the + * stack changed its RFC1323 behavior?!?! + */ + if (pf_status.debug >= PF_DEBUG_MISC) { + DPFPRINTF(("Did not receive expected RFC1323 " + "timestamp\n")); + pf_print_state(state); + pf_print_flags(th->th_flags); + printf("\n"); + } + REASON_SET(reason, PFRES_TS); + return (PF_DROP); + } + } + + + /* + * We will note if a host sends his data packets with or without + * timestamps. And require all data packets to contain a timestamp + * if the first does. PAWS implicitly requires that all data packets be + * timestamped. But I think there are middle-man devices that hijack + * TCP streams immedietly after the 3whs and don't timestamp their + * packets (seen in a WWW accelerator or cache). + */ + if (pd->p_len > 0 && src->scrub && (src->scrub->pfss_flags & + (PFSS_TIMESTAMP|PFSS_DATA_TS|PFSS_DATA_NOTS)) == PFSS_TIMESTAMP) { + if (got_ts) + src->scrub->pfss_flags |= PFSS_DATA_TS; + else { + src->scrub->pfss_flags |= PFSS_DATA_NOTS; + if (pf_status.debug >= PF_DEBUG_MISC && dst->scrub && + (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) { + /* Don't warn if other host rejected RFC1323 */ + DPFPRINTF(("Broken RFC1323 stack did not " + "timestamp data packet. Disabled PAWS " + "security.\n")); + pf_print_state(state); + pf_print_flags(th->th_flags); + printf("\n"); + } + } + } + + + /* + * Update PAWS values + */ + if (got_ts && src->scrub && PFSS_TIMESTAMP == (src->scrub->pfss_flags & + (PFSS_PAWS_IDLED|PFSS_TIMESTAMP))) { + getmicrouptime(&src->scrub->pfss_last); + if (SEQ_GEQ(tsval, src->scrub->pfss_tsval) || + (src->scrub->pfss_flags & PFSS_PAWS) == 0) + src->scrub->pfss_tsval = tsval; + + if (tsecr) { + if (SEQ_GEQ(tsecr, src->scrub->pfss_tsecr) || + (src->scrub->pfss_flags & PFSS_PAWS) == 0) + src->scrub->pfss_tsecr = tsecr; + + if ((src->scrub->pfss_flags & PFSS_PAWS) == 0 && + (SEQ_LT(tsval, src->scrub->pfss_tsval0) || + src->scrub->pfss_tsval0 == 0)) { + /* tsval0 MUST be the lowest timestamp */ + src->scrub->pfss_tsval0 = tsval; + } + + /* Only fully initialized after a TS gets echoed */ + if ((src->scrub->pfss_flags & PFSS_PAWS) == 0) + src->scrub->pfss_flags |= PFSS_PAWS; + } + } + /* I have a dream.... TCP segment reassembly.... */ return (0); } + int pf_normalize_tcpopt(struct pf_rule *r, struct mbuf *m, struct tcphdr *th, int off) diff --git a/sys/contrib/pf/net/pf_osfp.c b/sys/contrib/pf/net/pf_osfp.c index 98cc01ab1fb6..b2adcf29667d 100644 --- a/sys/contrib/pf/net/pf_osfp.c +++ b/sys/contrib/pf/net/pf_osfp.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pf_osfp.c,v 1.9 2004/01/04 20:08:42 pvalchev Exp $ */ +/* $OpenBSD: pf_osfp.c,v 1.10 2004/04/09 19:30:41 frantzen Exp $ */ /* * Copyright (c) 2003 Mike Frantzen @@ -233,9 +233,9 @@ void pf_osfp_initialize(void) { pool_init(&pf_osfp_entry_pl, sizeof(struct pf_osfp_entry), 0, 0, 0, - "pfosfpen", NULL); + "pfosfpen", &pool_allocator_nointr); pool_init(&pf_osfp_pl, sizeof(struct pf_os_fingerprint), 0, 0, 0, - "pfosfp", NULL); + "pfosfp", &pool_allocator_nointr); SLIST_INIT(&pf_osfp_list); } diff --git a/sys/contrib/pf/net/pf_table.c b/sys/contrib/pf/net/pf_table.c index 42d4cee0b7c3..621809a3b0e4 100644 --- a/sys/contrib/pf/net/pf_table.c +++ b/sys/contrib/pf/net/pf_table.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pf_table.c,v 1.47 2004/03/09 21:44:41 mcbride Exp $ */ +/* $OpenBSD: pf_table.c,v 1.62 2004/12/07 18:02:04 mcbride Exp $ */ /* * Copyright (c) 2002 Cedric Berger @@ -125,6 +125,7 @@ struct pfr_walktree { struct pool pfr_ktable_pl; struct pool pfr_kentry_pl; +struct pool pfr_kentry_pl2; struct sockaddr_in pfr_sin; struct sockaddr_in6 pfr_sin6; union sockaddr_union pfr_mask; @@ -138,7 +139,7 @@ void pfr_enqueue_addrs(struct pfr_ktable *, void pfr_mark_addrs(struct pfr_ktable *); struct pfr_kentry *pfr_lookup_addr(struct pfr_ktable *, struct pfr_addr *, int); -struct pfr_kentry *pfr_create_kentry(struct pfr_addr *); +struct pfr_kentry *pfr_create_kentry(struct pfr_addr *, int); void pfr_destroy_kentries(struct pfr_kentryworkq *); void pfr_destroy_kentry(struct pfr_kentry *); void pfr_insert_kentries(struct pfr_ktable *, @@ -155,6 +156,7 @@ int pfr_unroute_kentry(struct pfr_ktable *, struct pfr_kentry *); int pfr_walktree(struct radix_node *, void *); int pfr_validate_table(struct pfr_table *, int, int); +int pfr_fix_anchor(char *); void pfr_commit_ktable(struct pfr_ktable *, long); void pfr_insert_ktables(struct pfr_ktableworkq *); void pfr_insert_ktable(struct pfr_ktable *); @@ -187,9 +189,11 @@ void pfr_initialize(void) { pool_init(&pfr_ktable_pl, sizeof(struct pfr_ktable), 0, 0, 0, - "pfrktable", NULL); + "pfrktable", &pool_allocator_oldnointr); pool_init(&pfr_kentry_pl, sizeof(struct pfr_kentry), 0, 0, 0, - "pfrkentry", NULL); + "pfrkentry", &pool_allocator_oldnointr); + pool_init(&pfr_kentry_pl2, sizeof(struct pfr_kentry), 0, 0, 0, + "pfrkentry2", NULL); pfr_sin.sin_len = sizeof(pfr_sin); pfr_sin.sin_family = AF_INET; @@ -240,7 +244,7 @@ pfr_add_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, struct pfr_kentry *p, *q; struct pfr_addr ad; int i, rv, s, xadd = 0; - long tzero = time.tv_sec; + long tzero = time_second; ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY+PFR_FLAG_FEEDBACK); if (pfr_validate_table(tbl, 0, flags & PFR_FLAG_USERIOCTL)) @@ -272,7 +276,7 @@ pfr_add_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, ad.pfra_fback = PFR_FB_NONE; } if (p == NULL && q == NULL) { - p = pfr_create_kentry(&ad); + p = pfr_create_kentry(&ad, 0); if (p == NULL) senderr(ENOMEM); if (pfr_route_kentry(tmpkt, p)) { @@ -317,7 +321,7 @@ pfr_del_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, struct pfr_kentryworkq workq; struct pfr_kentry *p; struct pfr_addr ad; - int i, rv, s, xdel = 0; + int i, rv, s, xdel = 0, log = 1; ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY+PFR_FLAG_FEEDBACK); if (pfr_validate_table(tbl, 0, flags & PFR_FLAG_USERIOCTL)) @@ -327,7 +331,34 @@ pfr_del_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, return (ESRCH); if (kt->pfrkt_flags & PFR_TFLAG_CONST) return (EPERM); - pfr_mark_addrs(kt); + /* + * there are two algorithms to choose from here. + * with: + * n: number of addresses to delete + * N: number of addresses in the table + * + * one is O(N) and is better for large 'n' + * one is O(n*LOG(N)) and is better for small 'n' + * + * following code try to decide which one is best. + */ + for (i = kt->pfrkt_cnt; i > 0; i >>= 1) + log++; + if (size > kt->pfrkt_cnt/log) { + /* full table scan */ + pfr_mark_addrs(kt); + } else { + /* iterate over addresses to delete */ + for (i = 0; i < size; i++) { + if (COPYIN(addr+i, &ad, sizeof(ad))) + return (EFAULT); + if (pfr_validate_addr(&ad)) + return (EINVAL); + p = pfr_lookup_addr(kt, &ad, 1); + if (p != NULL) + p->pfrke_mark = 0; + } + } SLIST_INIT(&workq); for (i = 0; i < size; i++) { if (COPYIN(addr+i, &ad, sizeof(ad))) @@ -380,7 +411,7 @@ pfr_set_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, struct pfr_kentry *p, *q; struct pfr_addr ad; int i, rv, s, xadd = 0, xdel = 0, xchange = 0; - long tzero = time.tv_sec; + long tzero = time_second; ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY+PFR_FLAG_FEEDBACK); if (pfr_validate_table(tbl, 0, flags & PFR_FLAG_USERIOCTL)) @@ -421,7 +452,7 @@ pfr_set_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, ad.pfra_fback = PFR_FB_DUPLICATE; goto _skip; } - p = pfr_create_kentry(&ad); + p = pfr_create_kentry(&ad, 0); if (p == NULL) senderr(ENOMEM); if (pfr_route_kentry(tmpkt, p)) { @@ -568,7 +599,7 @@ pfr_get_astats(struct pfr_table *tbl, struct pfr_astats *addr, int *size, struct pfr_walktree w; struct pfr_kentryworkq workq; int rv, s; - long tzero = time.tv_sec; + long tzero = time_second; ACCEPT_FLAGS(PFR_FLAG_ATOMIC); /* XXX PFR_FLAG_CLSTATS disabled */ if (pfr_validate_table(tbl, 0, 0)) @@ -666,14 +697,18 @@ pfr_validate_addr(struct pfr_addr *ad) int i; switch (ad->pfra_af) { +#ifdef INET case AF_INET: if (ad->pfra_net > 32) return (-1); break; +#endif /* INET */ +#ifdef INET6 case AF_INET6: if (ad->pfra_net > 128) return (-1); break; +#endif /* INET6 */ default: return (-1); } @@ -736,7 +771,7 @@ pfr_lookup_addr(struct pfr_ktable *kt, struct pfr_addr *ad, int exact) if (ad->pfra_af == AF_INET) { FILLIN_SIN(sa.sin, ad->pfra_ip4addr); head = kt->pfrkt_ip4; - } else { + } else if ( ad->pfra_af == AF_INET6 ) { FILLIN_SIN6(sa.sin6, ad->pfra_ip6addr); head = kt->pfrkt_ip6; } @@ -758,22 +793,26 @@ pfr_lookup_addr(struct pfr_ktable *kt, struct pfr_addr *ad, int exact) } struct pfr_kentry * -pfr_create_kentry(struct pfr_addr *ad) +pfr_create_kentry(struct pfr_addr *ad, int intr) { struct pfr_kentry *ke; - ke = pool_get(&pfr_kentry_pl, PR_NOWAIT); + if (intr) + ke = pool_get(&pfr_kentry_pl2, PR_NOWAIT); + else + ke = pool_get(&pfr_kentry_pl, PR_NOWAIT); if (ke == NULL) return (NULL); bzero(ke, sizeof(*ke)); if (ad->pfra_af == AF_INET) FILLIN_SIN(ke->pfrke_sa.sin, ad->pfra_ip4addr); - else + else if (ad->pfra_af == AF_INET6) FILLIN_SIN6(ke->pfrke_sa.sin6, ad->pfra_ip6addr); ke->pfrke_af = ad->pfra_af; ke->pfrke_net = ad->pfra_net; ke->pfrke_not = ad->pfra_not; + ke->pfrke_intrpool = intr; return (ke); } @@ -791,7 +830,10 @@ pfr_destroy_kentries(struct pfr_kentryworkq *workq) void pfr_destroy_kentry(struct pfr_kentry *ke) { - pool_put(&pfr_kentry_pl, ke); + if (ke->pfrke_intrpool) + pool_put(&pfr_kentry_pl2, ke); + else + pool_put(&pfr_kentry_pl, ke); } void @@ -814,6 +856,29 @@ pfr_insert_kentries(struct pfr_ktable *kt, kt->pfrkt_cnt += n; } +int +pfr_insert_kentry(struct pfr_ktable *kt, struct pfr_addr *ad, long tzero) +{ + struct pfr_kentry *p; + int rv; + + p = pfr_lookup_addr(kt, ad, 1); + if (p != NULL) + return (0); + p = pfr_create_kentry(ad, 1); + if (p == NULL) + return (EINVAL); + + rv = pfr_route_kentry(kt, p); + if (rv) + return (rv); + + p->pfrke_tzero = tzero; + kt->pfrkt_cnt++; + + return (0); +} + void pfr_remove_kentries(struct pfr_ktable *kt, struct pfr_kentryworkq *workq) @@ -880,14 +945,14 @@ pfr_prepare_network(union sockaddr_union *sa, int af, int net) if (af == AF_INET) { sa->sin.sin_len = sizeof(sa->sin); sa->sin.sin_family = AF_INET; - sa->sin.sin_addr.s_addr = htonl(-1 << (32-net)); - } else { + sa->sin.sin_addr.s_addr = net ? htonl(-1 << (32-net)) : 0; + } else if (af == AF_INET6) { sa->sin6.sin6_len = sizeof(sa->sin6); sa->sin6.sin6_family = AF_INET6; for (i = 0; i < 4; i++) { if (net <= 32) { sa->sin6.sin6_addr.s6_addr32[i] = - htonl(-1 << (32-net)); + net ? htonl(-1 << (32-net)) : 0; break; } sa->sin6.sin6_addr.s6_addr32[i] = 0xFFFFFFFF; @@ -907,7 +972,7 @@ pfr_route_kentry(struct pfr_ktable *kt, struct pfr_kentry *ke) bzero(ke->pfrke_node, sizeof(ke->pfrke_node)); if (ke->pfrke_af == AF_INET) head = kt->pfrkt_ip4; - else + else if (ke->pfrke_af == AF_INET6) head = kt->pfrkt_ip6; s = splsoftnet(); @@ -931,15 +996,15 @@ pfr_unroute_kentry(struct pfr_ktable *kt, struct pfr_kentry *ke) if (ke->pfrke_af == AF_INET) head = kt->pfrkt_ip4; - else + else if (ke->pfrke_af == AF_INET6) head = kt->pfrkt_ip6; s = splsoftnet(); if (KENTRY_NETWORK(ke)) { pfr_prepare_network(&mask, ke->pfrke_af, ke->pfrke_net); - rn = rn_delete(&ke->pfrke_sa, &mask, head); + rn = rn_delete(&ke->pfrke_sa, &mask, head, NULL); } else - rn = rn_delete(&ke->pfrke_sa, NULL, head); + rn = rn_delete(&ke->pfrke_sa, NULL, head, NULL); splx(s); if (rn == NULL) { @@ -960,7 +1025,7 @@ pfr_copyout_addr(struct pfr_addr *ad, struct pfr_kentry *ke) ad->pfra_not = ke->pfrke_not; if (ad->pfra_af == AF_INET) ad->pfra_ip4addr = ke->pfrke_sa.sin.sin_addr; - else + else if (ad->pfra_af == AF_INET6) ad->pfra_ip6addr = ke->pfrke_sa.sin6.sin6_addr; } @@ -1029,7 +1094,7 @@ pfr_walktree(struct radix_node *rn, void *arg) &ke->pfrke_sa, AF_INET); w->pfrw_dyn->pfid_mask4 = *SUNION2PF( &pfr_mask, AF_INET); - } else { + } else if (ke->pfrke_af == AF_INET6){ if (w->pfrw_dyn->pfid_acnt6++ > 0) break; pfr_prepare_network(&pfr_mask, AF_INET6, ke->pfrke_net); @@ -1051,6 +1116,8 @@ pfr_clr_tables(struct pfr_table *filter, int *ndel, int flags) int s, xdel = 0; ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY+PFR_FLAG_ALLRSETS); + if (pfr_fix_anchor(filter->pfrt_anchor)) + return (EINVAL); if (pfr_table_count(filter, flags) < 0) return (ENOENT); @@ -1084,7 +1151,7 @@ pfr_add_tables(struct pfr_table *tbl, int size, int *nadd, int flags) struct pfr_ktableworkq addq, changeq; struct pfr_ktable *p, *q, *r, key; int i, rv, s, xadd = 0; - long tzero = time.tv_sec; + long tzero = time_second; ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY); SLIST_INIT(&addq); @@ -1112,7 +1179,6 @@ pfr_add_tables(struct pfr_table *tbl, int size, int *nadd, int flags) /* find or create root table */ bzero(key.pfrkt_anchor, sizeof(key.pfrkt_anchor)); - bzero(key.pfrkt_ruleset, sizeof(key.pfrkt_ruleset)); r = RB_FIND(pfr_ktablehead, &pfr_ktables, &key); if (r != NULL) { p->pfrkt_root = r; @@ -1207,6 +1273,8 @@ pfr_get_tables(struct pfr_table *filter, struct pfr_table *tbl, int *size, int n, nn; ACCEPT_FLAGS(PFR_FLAG_ALLRSETS); + if (pfr_fix_anchor(filter->pfrt_anchor)) + return (EINVAL); n = nn = pfr_table_count(filter, flags); if (n < 0) return (ENOENT); @@ -1237,10 +1305,12 @@ pfr_get_tstats(struct pfr_table *filter, struct pfr_tstats *tbl, int *size, struct pfr_ktable *p; struct pfr_ktableworkq workq; int s, n, nn; - long tzero = time.tv_sec; + long tzero = time_second; ACCEPT_FLAGS(PFR_FLAG_ATOMIC|PFR_FLAG_ALLRSETS); /* XXX PFR_FLAG_CLSTATS disabled */ + if (pfr_fix_anchor(filter->pfrt_anchor)) + return (EINVAL); n = nn = pfr_table_count(filter, flags); if (n < 0) return (ENOENT); @@ -1285,7 +1355,7 @@ pfr_clr_tstats(struct pfr_table *tbl, int size, int *nzero, int flags) struct pfr_ktableworkq workq; struct pfr_ktable *p, key; int i, s, xzero = 0; - long tzero = time.tv_sec; + long tzero = time_second; ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY+PFR_FLAG_ADDRSTOO); SLIST_INIT(&workq); @@ -1375,7 +1445,7 @@ pfr_ina_begin(struct pfr_table *trs, u_int32_t *ticket, int *ndel, int flags) int xdel = 0; ACCEPT_FLAGS(PFR_FLAG_DUMMY); - rs = pf_find_or_create_ruleset(trs->pfrt_anchor, trs->pfrt_ruleset); + rs = pf_find_or_create_ruleset(trs->pfrt_anchor); if (rs == NULL) return (ENOMEM); SLIST_INIT(&workq); @@ -1417,7 +1487,7 @@ pfr_ina_define(struct pfr_table *tbl, struct pfr_addr *addr, int size, if (pfr_validate_table(tbl, PFR_TFLAG_USRMASK, flags & PFR_FLAG_USERIOCTL)) return (EINVAL); - rs = pf_find_ruleset(tbl->pfrt_anchor, tbl->pfrt_ruleset); + rs = pf_find_ruleset(tbl->pfrt_anchor); if (rs == NULL || !rs->topen || ticket != rs->tticket) return (EBUSY); tbl->pfrt_flags |= PFR_TFLAG_INACTIVE; @@ -1463,7 +1533,7 @@ _skip: senderr(EINVAL); if (pfr_lookup_addr(shadow, &ad, 1) != NULL) continue; - p = pfr_create_kentry(&ad); + p = pfr_create_kentry(&ad, 0); if (p == NULL) senderr(ENOMEM); if (pfr_route_kentry(shadow, p)) { @@ -1508,7 +1578,7 @@ pfr_ina_rollback(struct pfr_table *trs, u_int32_t ticket, int *ndel, int flags) int xdel = 0; ACCEPT_FLAGS(PFR_FLAG_DUMMY); - rs = pf_find_ruleset(trs->pfrt_anchor, trs->pfrt_ruleset); + rs = pf_find_ruleset(trs->pfrt_anchor); if (rs == NULL || !rs->topen || ticket != rs->tticket) return (0); SLIST_INIT(&workq); @@ -1534,14 +1604,14 @@ int pfr_ina_commit(struct pfr_table *trs, u_int32_t ticket, int *nadd, int *nchange, int flags) { - struct pfr_ktable *p; + struct pfr_ktable *p, *q; struct pfr_ktableworkq workq; struct pf_ruleset *rs; int s, xadd = 0, xchange = 0; - long tzero = time.tv_sec; + long tzero = time_second; ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY); - rs = pf_find_ruleset(trs->pfrt_anchor, trs->pfrt_ruleset); + rs = pf_find_ruleset(trs->pfrt_anchor); if (rs == NULL || !rs->topen || ticket != rs->tticket) return (EBUSY); @@ -1560,8 +1630,10 @@ pfr_ina_commit(struct pfr_table *trs, u_int32_t ticket, int *nadd, if (!(flags & PFR_FLAG_DUMMY)) { if (flags & PFR_FLAG_ATOMIC) s = splsoftnet(); - SLIST_FOREACH(p, &workq, pfrkt_workq) + for (p = SLIST_FIRST(&workq); p != NULL; p = q) { + q = SLIST_NEXT(p, pfrkt_workq); pfr_commit_ktable(p, tzero); + } if (flags & PFR_FLAG_ATOMIC) splx(s); rs->topen = 0; @@ -1648,27 +1720,52 @@ pfr_validate_table(struct pfr_table *tbl, int allowedflags, int no_reserved) for (i = strlen(tbl->pfrt_name); i < PF_TABLE_NAME_SIZE; i++) if (tbl->pfrt_name[i]) return (-1); + if (pfr_fix_anchor(tbl->pfrt_anchor)) + return (-1); if (tbl->pfrt_flags & ~allowedflags) return (-1); return (0); } +/* + * Rewrite anchors referenced by tables to remove slashes + * and check for validity. + */ +int +pfr_fix_anchor(char *anchor) +{ + size_t siz = MAXPATHLEN; + int i; + + if (anchor[0] == '/') { + char *path; + int off; + + path = anchor; + off = 1; + while (*++path == '/') + off++; + bcopy(path, anchor, siz - off); + memset(anchor + siz - off, 0, off); + } + if (anchor[siz - 1]) + return (-1); + for (i = strlen(anchor); i < siz; i++) + if (anchor[i]) + return (-1); + return (0); +} + int pfr_table_count(struct pfr_table *filter, int flags) { struct pf_ruleset *rs; - struct pf_anchor *ac; if (flags & PFR_FLAG_ALLRSETS) return (pfr_ktable_cnt); - if (filter->pfrt_ruleset[0]) { - rs = pf_find_ruleset(filter->pfrt_anchor, - filter->pfrt_ruleset); - return ((rs != NULL) ? rs->tables : -1); - } if (filter->pfrt_anchor[0]) { - ac = pf_find_anchor(filter->pfrt_anchor); - return ((ac != NULL) ? ac->tables : -1); + rs = pf_find_ruleset(filter->pfrt_anchor); + return ((rs != NULL) ? rs->tables : -1); } return (pf_main_ruleset.tables); } @@ -1678,13 +1775,7 @@ pfr_skip_table(struct pfr_table *filter, struct pfr_ktable *kt, int flags) { if (flags & PFR_FLAG_ALLRSETS) return (0); - if (strncmp(filter->pfrt_anchor, kt->pfrkt_anchor, - PF_ANCHOR_NAME_SIZE)) - return (1); - if (!filter->pfrt_ruleset[0]) - return (0); - if (strncmp(filter->pfrt_ruleset, kt->pfrkt_ruleset, - PF_RULESET_NAME_SIZE)) + if (strcmp(filter->pfrt_anchor, kt->pfrkt_anchor)) return (1); return (0); } @@ -1712,10 +1803,12 @@ pfr_insert_ktable(struct pfr_ktable *kt) void pfr_setflags_ktables(struct pfr_ktableworkq *workq) { - struct pfr_ktable *p; + struct pfr_ktable *p, *q; - SLIST_FOREACH(p, workq, pfrkt_workq) + for (p = SLIST_FIRST(workq); p; p = q) { + q = SLIST_NEXT(p, pfrkt_workq); pfr_setflags_ktable(p, p->pfrkt_nflags); + } } void @@ -1790,16 +1883,13 @@ pfr_create_ktable(struct pfr_table *tbl, long tzero, int attachruleset) kt->pfrkt_t = *tbl; if (attachruleset) { - rs = pf_find_or_create_ruleset(tbl->pfrt_anchor, - tbl->pfrt_ruleset); + rs = pf_find_or_create_ruleset(tbl->pfrt_anchor); if (!rs) { pfr_destroy_ktable(kt, 0); return (NULL); } kt->pfrkt_rs = rs; rs->tables++; - if (rs->anchor != NULL) - rs->anchor->tables++; } if (!rn_inithead((void **)&kt->pfrkt_ip4, @@ -1843,8 +1933,6 @@ pfr_destroy_ktable(struct pfr_ktable *kt, int flushaddr) pfr_destroy_ktable(kt->pfrkt_shadow, flushaddr); if (kt->pfrkt_rs != NULL) { kt->pfrkt_rs->tables--; - if (kt->pfrkt_rs->anchor != NULL) - kt->pfrkt_rs->anchor->tables--; pf_remove_if_empty_ruleset(kt->pfrkt_rs); } pool_put(&pfr_ktable_pl, kt); @@ -1857,11 +1945,7 @@ pfr_ktable_compare(struct pfr_ktable *p, struct pfr_ktable *q) if ((d = strncmp(p->pfrkt_name, q->pfrkt_name, PF_TABLE_NAME_SIZE))) return (d); - if ((d = strncmp(p->pfrkt_anchor, q->pfrkt_anchor, - PF_ANCHOR_NAME_SIZE))) - return (d); - return (strncmp(p->pfrkt_ruleset, q->pfrkt_ruleset, - PF_RULESET_NAME_SIZE)); + return (strcmp(p->pfrkt_anchor, q->pfrkt_anchor)); } struct pfr_ktable * @@ -1884,18 +1968,22 @@ pfr_match_addr(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af) return (0); switch (af) { +#ifdef INET case AF_INET: pfr_sin.sin_addr.s_addr = a->addr32[0]; ke = (struct pfr_kentry *)rn_match(&pfr_sin, kt->pfrkt_ip4); if (ke && KENTRY_RNF_ROOT(ke)) ke = NULL; break; +#endif /* INET */ +#ifdef INET6 case AF_INET6: bcopy(a, &pfr_sin6.sin6_addr, sizeof(pfr_sin6.sin6_addr)); ke = (struct pfr_kentry *)rn_match(&pfr_sin6, kt->pfrkt_ip6); if (ke && KENTRY_RNF_ROOT(ke)) ke = NULL; break; +#endif /* INET6 */ } match = (ke && !ke->pfrke_not); if (match) @@ -1917,18 +2005,24 @@ pfr_update_stats(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af, return; switch (af) { +#ifdef INET case AF_INET: pfr_sin.sin_addr.s_addr = a->addr32[0]; ke = (struct pfr_kentry *)rn_match(&pfr_sin, kt->pfrkt_ip4); if (ke && KENTRY_RNF_ROOT(ke)) ke = NULL; break; +#endif /* INET */ +#ifdef INET6 case AF_INET6: bcopy(a, &pfr_sin6.sin6_addr, sizeof(pfr_sin6.sin6_addr)); ke = (struct pfr_kentry *)rn_match(&pfr_sin6, kt->pfrkt_ip6); if (ke && KENTRY_RNF_ROOT(ke)) ke = NULL; break; +#endif /* INET6 */ + default: + ; } if ((ke == NULL || ke->pfrke_not) != notrule) { if (op_pass != PFR_OP_PASS) @@ -1952,18 +2046,15 @@ pfr_attach_table(struct pf_ruleset *rs, char *name) bzero(&tbl, sizeof(tbl)); strlcpy(tbl.pfrt_name, name, sizeof(tbl.pfrt_name)); - if (ac != NULL) { + if (ac != NULL) strlcpy(tbl.pfrt_anchor, ac->name, sizeof(tbl.pfrt_anchor)); - strlcpy(tbl.pfrt_ruleset, rs->name, sizeof(tbl.pfrt_ruleset)); - } kt = pfr_lookup_table(&tbl); if (kt == NULL) { - kt = pfr_create_ktable(&tbl, time.tv_sec, 1); + kt = pfr_create_ktable(&tbl, time_second, 1); if (kt == NULL) return (NULL); if (ac != NULL) { bzero(tbl.pfrt_anchor, sizeof(tbl.pfrt_anchor)); - bzero(tbl.pfrt_ruleset, sizeof(tbl.pfrt_ruleset)); rt = pfr_lookup_table(&tbl); if (rt == NULL) { rt = pfr_create_ktable(&tbl, 0, 1); @@ -2001,8 +2092,10 @@ pfr_pool_get(struct pfr_ktable *kt, int *pidx, struct pf_addr *counter, union sockaddr_union mask; int idx = -1, use_counter = 0; - addr = (af == AF_INET) ? (struct pf_addr *)&pfr_sin.sin_addr : - (struct pf_addr *)&pfr_sin6.sin6_addr; + if (af == AF_INET) + addr = (struct pf_addr *)&pfr_sin.sin_addr; + else if (af == AF_INET6) + addr = (struct pf_addr *)&pfr_sin6.sin6_addr; if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) kt = kt->pfrkt_root; if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) @@ -2045,9 +2138,12 @@ _next_block: } for (;;) { /* we don't want to use a nested block */ - ke2 = (struct pfr_kentry *)(af == AF_INET ? - rn_match(&pfr_sin, kt->pfrkt_ip4) : - rn_match(&pfr_sin6, kt->pfrkt_ip6)); + if (af == AF_INET) + ke2 = (struct pfr_kentry *)rn_match(&pfr_sin, + kt->pfrkt_ip4); + else if (af == AF_INET6) + ke2 = (struct pfr_kentry *)rn_match(&pfr_sin6, + kt->pfrkt_ip6); /* no need to check KENTRY_RNF_ROOT() here */ if (ke2 == ke) { /* lookup return the same block - perfect */ @@ -2080,12 +2176,16 @@ pfr_kentry_byidx(struct pfr_ktable *kt, int idx, int af) w.pfrw_cnt = idx; switch (af) { +#ifdef INET case AF_INET: rn_walktree(kt->pfrkt_ip4, pfr_walktree, &w); return (w.pfrw_kentry); +#endif /* INET */ +#ifdef INET6 case AF_INET6: rn_walktree(kt->pfrkt_ip6, pfr_walktree, &w); return (w.pfrw_kentry); +#endif /* INET6 */ default: return (NULL); } diff --git a/sys/contrib/pf/net/pfvar.h b/sys/contrib/pf/net/pfvar.h index 851d39591960..d27d01d2323b 100644 --- a/sys/contrib/pf/net/pfvar.h +++ b/sys/contrib/pf/net/pfvar.h @@ -1,5 +1,4 @@ -/* $OpenBSD: pfvar.h,v 1.187 2004/03/22 04:54:18 mcbride Exp $ */ -/* add $OpenBSD: pfvar.h,v 1.194 2004/05/11 07:34:11 dhartmei Exp $ */ +/* $OpenBSD: pfvar.h,v 1.213 2005/03/03 07:13:39 dhartmei Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -34,11 +33,13 @@ #ifndef _NET_PFVAR_H_ #define _NET_PFVAR_H_ +#include #include #include #include #include +#include #include #include @@ -49,7 +50,7 @@ struct ip; enum { PF_INOUT, PF_IN, PF_OUT }; enum { PF_LAN_EXT, PF_EXT_GWY, PF_ID }; -enum { PF_PASS, PF_DROP, PF_SCRUB, PF_NAT, PF_NONAT, +enum { PF_PASS, PF_DROP, PF_SCRUB, PF_NOSCRUB, PF_NAT, PF_NONAT, PF_BINAT, PF_NOBINAT, PF_RDR, PF_NORDR, PF_SYNPROXY_DROP }; enum { PF_RULESET_SCRUB, PF_RULESET_FILTER, PF_RULESET_NAT, PF_RULESET_BINAT, PF_RULESET_RDR, PF_RULESET_MAX }; @@ -70,14 +71,35 @@ enum { PFTM_TCP_FIRST_PACKET, PFTM_TCP_OPENING, PFTM_TCP_ESTABLISHED, PFTM_OTHER_FIRST_PACKET, PFTM_OTHER_SINGLE, PFTM_OTHER_MULTIPLE, PFTM_FRAG, PFTM_INTERVAL, PFTM_ADAPTIVE_START, PFTM_ADAPTIVE_END, PFTM_SRC_NODE, - PFTM_MAX, PFTM_PURGE, PFTM_UNTIL_PACKET }; + PFTM_TS_DIFF, PFTM_MAX, PFTM_PURGE, PFTM_UNTIL_PACKET }; + +/* PFTM default values */ +#define PFTM_TCP_FIRST_PACKET_VAL 120 /* First TCP packet */ +#define PFTM_TCP_OPENING_VAL 30 /* No response yet */ +#define PFTM_TCP_ESTABLISHED_VAL 24*60*60/* Established */ +#define PFTM_TCP_CLOSING_VAL 15 * 60 /* Half closed */ +#define PFTM_TCP_FIN_WAIT_VAL 45 /* Got both FINs */ +#define PFTM_TCP_CLOSED_VAL 90 /* Got a RST */ +#define PFTM_UDP_FIRST_PACKET_VAL 60 /* First UDP packet */ +#define PFTM_UDP_SINGLE_VAL 30 /* Unidirectional */ +#define PFTM_UDP_MULTIPLE_VAL 60 /* Bidirectional */ +#define PFTM_ICMP_FIRST_PACKET_VAL 20 /* First ICMP packet */ +#define PFTM_ICMP_ERROR_REPLY_VAL 10 /* Got error response */ +#define PFTM_OTHER_FIRST_PACKET_VAL 60 /* First packet */ +#define PFTM_OTHER_SINGLE_VAL 30 /* Unidirectional */ +#define PFTM_OTHER_MULTIPLE_VAL 60 /* Bidirectional */ +#define PFTM_FRAG_VAL 30 /* Fragment expire */ +#define PFTM_INTERVAL_VAL 10 /* Expire interval */ +#define PFTM_SRC_NODE_VAL 0 /* Source tracking */ +#define PFTM_TS_DIFF_VAL 30 /* Allowed TS diff */ + enum { PF_NOPFROUTE, PF_FASTROUTE, PF_ROUTETO, PF_DUPTO, PF_REPLYTO }; enum { PF_LIMIT_STATES, PF_LIMIT_SRC_NODES, PF_LIMIT_FRAGS, PF_LIMIT_MAX }; #define PF_POOL_IDMASK 0x0f enum { PF_POOL_NONE, PF_POOL_BITMASK, PF_POOL_RANDOM, PF_POOL_SRCHASH, PF_POOL_ROUNDROBIN }; enum { PF_ADDR_ADDRMASK, PF_ADDR_NOROUTE, PF_ADDR_DYNIFTL, - PF_ADDR_TABLE }; + PF_ADDR_TABLE, PF_ADDR_RTLABEL }; #define PF_POOL_TYPEMASK 0x0f #define PF_POOL_STICKYADDR 0x20 #define PF_WSCALE_FLAG 0x80 @@ -114,6 +136,8 @@ struct pf_addr_wrap { } a; char ifname[IFNAMSIZ]; char tblname[PF_TABLE_NAME_SIZE]; + char rtlabelname[RTLABEL_LEN]; + u_int32_t rtlabel; } v; union { struct pfi_dynaddr *dyn; @@ -275,10 +299,12 @@ struct pfi_dynaddr { #endif /* PF_INET6_ONLY */ #endif /* PF_INET_INET6 */ -#define PF_MISMATCHAW(aw, x, af, not) \ +#define PF_MISMATCHAW(aw, x, af, neg) \ ( \ (((aw)->type == PF_ADDR_NOROUTE && \ pf_routable((x), (af))) || \ + ((aw)->type == PF_ADDR_RTLABEL && \ + !pf_rtlabel_match((x), (af), (aw))) || \ ((aw)->type == PF_ADDR_TABLE && \ !pfr_match_addr((aw)->p.tbl, (x), (af))) || \ ((aw)->type == PF_ADDR_DYNIFTL && \ @@ -287,7 +313,7 @@ struct pfi_dynaddr { !PF_AZERO(&(aw)->v.a.mask, (af)) && \ !PF_MATCHA(0, &(aw)->v.a.addr, \ &(aw)->v.a.mask, (x), (af)))) != \ - (not) \ + (neg) \ ) struct pf_rule_uid { @@ -303,7 +329,7 @@ struct pf_rule_gid { struct pf_rule_addr { struct pf_addr_wrap addr; u_int16_t port[2]; - u_int8_t not; + u_int8_t neg; u_int8_t port_op; }; @@ -445,6 +471,8 @@ union pf_rule_ptr { u_int32_t nr; }; +#define PF_ANCHOR_NAME_SIZE 64 + struct pf_rule { struct pf_rule_addr src; struct pf_rule_addr dst; @@ -464,12 +492,12 @@ struct pf_rule { char ifname[IFNAMSIZ]; char qname[PF_QNAME_SIZE]; char pqname[PF_QNAME_SIZE]; -#define PF_ANCHOR_NAME_SIZE 16 - char anchorname[PF_ANCHOR_NAME_SIZE]; #define PF_TAG_NAME_SIZE 16 char tagname[PF_TAG_NAME_SIZE]; char match_tagname[PF_TAG_NAME_SIZE]; + char overload_tblname[PF_TABLE_NAME_SIZE]; + TAILQ_ENTRY(pf_rule) entries; struct pf_pool rpool; @@ -479,6 +507,7 @@ struct pf_rule { struct pfi_kif *kif; struct pf_anchor *anchor; + struct pfr_ktable *overload_tbl; pf_osfp_t os_fingerprint; @@ -488,10 +517,16 @@ struct pf_rule { u_int32_t src_nodes; u_int32_t max_src_nodes; u_int32_t max_src_states; + u_int32_t max_src_conn; + struct { + u_int32_t limit; + u_int32_t seconds; + } max_src_conn_rate; u_int32_t qid; u_int32_t pqid; u_int32_t rt_listid; u_int32_t nr; + u_int32_t prob; u_int16_t return_icmp; u_int16_t return_icmp6; @@ -526,6 +561,12 @@ struct pf_rule { u_int8_t rt; u_int8_t return_ttl; u_int8_t tos; + u_int8_t anchor_relative; + u_int8_t anchor_wildcard; + +#define PF_FLUSH 0x01 +#define PF_FLUSH_GLOBAL 0x02 + u_int8_t flush; }; /* rule flags */ @@ -551,6 +592,16 @@ struct pf_rule { #define PFSTATE_HIWAT 10000 /* default state table size */ + +struct pf_threshold { + u_int32_t limit; +#define PF_THRESHOLD_MULT 1000 +#define PF_THRESHOLD_MAX 0xffffffff / PF_THRESHOLD_MULT + u_int32_t seconds; + u_int32_t count; + u_int32_t last; +}; + struct pf_src_node { RB_ENTRY(pf_src_node) entry; struct pf_addr addr; @@ -560,6 +611,8 @@ struct pf_src_node { u_int32_t bytes; u_int32_t packets; u_int32_t states; + u_int32_t conn; + struct pf_threshold conn_rate; u_int32_t creation; u_int32_t expire; sa_family_t af; @@ -569,11 +622,19 @@ struct pf_src_node { #define PFSNODE_HIWAT 10000 /* default source node table size */ struct pf_state_scrub { + struct timeval pfss_last; /* time received last packet */ + u_int32_t pfss_tsecr; /* last echoed timestamp */ + u_int32_t pfss_tsval; /* largest timestamp */ + u_int32_t pfss_tsval0; /* original timestamp */ u_int16_t pfss_flags; -#define PFSS_TIMESTAMP 0x0001 /* modulate timestamp */ - u_int8_t pfss_ttl; /* stashed TTL */ +#define PFSS_TIMESTAMP 0x0001 /* modulate timestamp */ +#define PFSS_PAWS 0x0010 /* stricter PAWS checks */ +#define PFSS_PAWS_IDLED 0x0020 /* was idle too long. no PAWS */ +#define PFSS_DATA_TS 0x0040 /* timestamp on data packets */ +#define PFSS_DATA_NOTS 0x0080 /* no timestamp on data packets */ + u_int8_t pfss_ttl; /* stashed TTL */ u_int8_t pad; - u_int32_t pfss_ts_mod; /* timestamp modulation */ + u_int32_t pfss_ts_mod; /* timestamp modulation */ }; struct pf_state_host { @@ -625,6 +686,7 @@ struct pf_state { u_int32_t packets[2]; u_int32_t bytes[2]; u_int32_t creatorid; + u_int16_t tag; sa_family_t af; u_int8_t proto; u_int8_t direction; @@ -634,6 +696,7 @@ struct pf_state { u_int8_t sync_flags; #define PFSTATE_NOSYNC 0x01 #define PFSTATE_FROMSYNC 0x02 +#define PFSTATE_STALE 0x04 u_int8_t pad; }; @@ -642,9 +705,6 @@ TAILQ_HEAD(pf_rulequeue, pf_rule); struct pf_anchor; struct pf_ruleset { - TAILQ_ENTRY(pf_ruleset) entries; -#define PF_RULESET_NAME_SIZE 16 - char name[PF_RULESET_NAME_SIZE]; struct { struct pf_rulequeue queues[2]; struct { @@ -659,19 +719,22 @@ struct pf_ruleset { int topen; }; -TAILQ_HEAD(pf_rulesetqueue, pf_ruleset); - +RB_HEAD(pf_anchor_global, pf_anchor); +RB_HEAD(pf_anchor_node, pf_anchor); struct pf_anchor { - TAILQ_ENTRY(pf_anchor) entries; + RB_ENTRY(pf_anchor) entry_global; + RB_ENTRY(pf_anchor) entry_node; + struct pf_anchor *parent; + struct pf_anchor_node children; char name[PF_ANCHOR_NAME_SIZE]; - struct pf_rulesetqueue rulesets; - int tables; + char path[MAXPATHLEN]; + struct pf_ruleset ruleset; + int refcnt; /* anchor rules */ }; - -TAILQ_HEAD(pf_anchorqueue, pf_anchor); +RB_PROTOTYPE(pf_anchor_global, pf_anchor, entry_global, pf_anchor_compare); +RB_PROTOTYPE(pf_anchor_node, pf_anchor, entry_node, pf_anchor_compare); #define PF_RESERVED_ANCHOR "_pf" -#define PF_INTERFACE_RULESET "_if" #define PFR_TFLAG_PERSIST 0x00000001 #define PFR_TFLAG_CONST 0x00000002 @@ -684,8 +747,7 @@ TAILQ_HEAD(pf_anchorqueue, pf_anchor); #define PFR_TFLAG_ALLMASK 0x0000003F struct pfr_table { - char pfrt_anchor[PF_ANCHOR_NAME_SIZE]; - char pfrt_ruleset[PF_RULESET_NAME_SIZE]; + char pfrt_anchor[MAXPATHLEN]; char pfrt_name[PF_TABLE_NAME_SIZE]; u_int32_t pfrt_flags; u_int8_t pfrt_fback; @@ -746,6 +808,7 @@ struct pfr_kentry { u_int8_t pfrke_net; u_int8_t pfrke_not; u_int8_t pfrke_mark; + u_int8_t pfrke_intrpool; }; SLIST_HEAD(pfr_ktableworkq, pfr_ktable); @@ -828,6 +891,8 @@ struct pfi_kif { #define PFI_IFLAG_CLONABLE 0x0010 /* clonable group */ #define PFI_IFLAG_DYNAMIC 0x0020 /* dynamic group */ #define PFI_IFLAG_ATTACHED 0x0040 /* interface attached */ +#define PFI_IFLAG_SKIP 0x0100 /* skip filtering on interface */ +#define PFI_IFLAG_SETABLE_MASK 0x0100 /* setable via DIOC{SET,CLR}IFFLAG */ struct pf_pdesc { u_int64_t tot_len; /* Make Mickey money */ @@ -845,11 +910,14 @@ struct pf_pdesc { struct pf_rule *nat_rule; /* nat/rdr rule applied to packet */ struct pf_addr *src; struct pf_addr *dst; + struct ether_header + *eh; u_int16_t *ip_sum; u_int32_t p_len; /* total length of payload */ u_int16_t flags; /* Let SCRUB trigger behavior in * state code. Easier than tags */ #define PFDESC_TCP_NORM 0x0001 /* TCP shall be statefully scrubbed */ +#define PFDESC_IP_REAS 0x0002 /* IP frags would've been reassembled */ sa_family_t af; u_int8_t proto; u_int8_t tos; @@ -866,7 +934,16 @@ struct pf_pdesc { #define PFRES_SHORT 3 /* Dropping short packet */ #define PFRES_NORM 4 /* Dropping by normalizer */ #define PFRES_MEMORY 5 /* Dropped due to lacking mem */ -#define PFRES_MAX 6 /* total+1 */ +#define PFRES_TS 6 /* Bad TCP Timestamp (RFC1323) */ +#define PFRES_CONGEST 7 /* Congestion (of ipintrq) */ +#define PFRES_IPOPTIONS 8 /* IP option */ +#define PFRES_PROTCKSUM 9 /* Protocol checksum invalid */ +#define PFRES_BADSTATE 10 /* State mismatch */ +#define PFRES_STATEINS 11 /* State insertion failure */ +#define PFRES_MAXSTATES 12 /* State limit */ +#define PFRES_SRCLIMIT 13 /* Source node/conn limit */ +#define PFRES_SYNPROXY 14 /* SYN proxy */ +#define PFRES_MAX 15 /* total+1 */ #define PFRES_NAMES { \ "match", \ @@ -875,6 +952,36 @@ struct pf_pdesc { "short", \ "normalize", \ "memory", \ + "bad-timestamp", \ + "congestion", \ + "ip-option", \ + "proto-cksum", \ + "state-mismatch", \ + "state-insert", \ + "state-limit", \ + "src-limit", \ + "synproxy", \ + NULL \ +} + +/* Counters for other things we want to keep track of */ +#define LCNT_STATES 0 /* states */ +#define LCNT_SRCSTATES 1 /* max-src-states */ +#define LCNT_SRCNODES 2 /* max-src-nodes */ +#define LCNT_SRCCONN 3 /* max-src-conn */ +#define LCNT_SRCCONNRATE 4 /* max-src-conn-rate */ +#define LCNT_OVERLOAD_TABLE 5 /* entry added to overload table */ +#define LCNT_OVERLOAD_FLUSH 6 /* state entries flushed */ +#define LCNT_MAX 7 /* total+1 */ + +#define LCNT_NAMES { \ + "max states per rule", \ + "max-src-states", \ + "max-src-nodes", \ + "max-src-conn", \ + "max-src-conn-rate", \ + "overload table insertion", \ + "overload flush states", \ NULL \ } @@ -932,6 +1039,7 @@ struct pf_pdesc { struct pf_status { u_int64_t counters[PFRES_MAX]; + u_int64_t lcounters[LCNT_MAX]; /* limit counters */ u_int64_t fcounters[FCNT_MAX]; u_int64_t scounters[SCNT_MAX]; u_int64_t pcounters[2][2][3]; @@ -1034,8 +1142,7 @@ struct pfioc_pooladdr { u_int8_t r_action; u_int8_t r_last; u_int8_t af; - char anchor[PF_ANCHOR_NAME_SIZE]; - char ruleset[PF_RULESET_NAME_SIZE]; + char anchor[MAXPATHLEN]; struct pf_pooladdr addr; }; @@ -1044,8 +1151,8 @@ struct pfioc_rule { u_int32_t ticket; u_int32_t pool_ticket; u_int32_t nr; - char anchor[PF_ANCHOR_NAME_SIZE]; - char ruleset[PF_RULESET_NAME_SIZE]; + char anchor[MAXPATHLEN]; + char anchor_call[MAXPATHLEN]; struct pf_rule rule; }; @@ -1126,15 +1233,10 @@ struct pfioc_qstats { u_int8_t scheduler; }; -struct pfioc_anchor { - u_int32_t nr; - char name[PF_ANCHOR_NAME_SIZE]; -}; - struct pfioc_ruleset { u_int32_t nr; - char anchor[PF_ANCHOR_NAME_SIZE]; - char name[PF_RULESET_NAME_SIZE]; + char path[MAXPATHLEN]; + char name[PF_ANCHOR_NAME_SIZE]; }; #define PF_RULESET_ALTQ (PF_RULESET_MAX) @@ -1144,8 +1246,7 @@ struct pfioc_trans { int esize; /* size of each element in bytes */ struct pfioc_trans_e { int rs_num; - char anchor[PF_ANCHOR_NAME_SIZE]; - char ruleset[PF_RULESET_NAME_SIZE]; + char anchor[MAXPATHLEN]; u_int32_t ticket; } *array; }; @@ -1202,9 +1303,7 @@ struct pfioc_iface { #define DIOCSTART _IO ('D', 1) #define DIOCSTOP _IO ('D', 2) -#define DIOCBEGINRULES _IOWR('D', 3, struct pfioc_rule) #define DIOCADDRULE _IOWR('D', 4, struct pfioc_rule) -#define DIOCCOMMITRULES _IOWR('D', 5, struct pfioc_rule) #define DIOCGETRULES _IOWR('D', 6, struct pfioc_rule) #define DIOCGETRULE _IOWR('D', 7, struct pfioc_rule) /* XXX cut 8 - 17 */ @@ -1227,9 +1326,7 @@ struct pfioc_iface { #define DIOCKILLSTATES _IOWR('D', 41, struct pfioc_state_kill) #define DIOCSTARTALTQ _IO ('D', 42) #define DIOCSTOPALTQ _IO ('D', 43) -#define DIOCBEGINALTQS _IOWR('D', 44, u_int32_t) #define DIOCADDALTQ _IOWR('D', 45, struct pfioc_altq) -#define DIOCCOMMITALTQS _IOWR('D', 46, u_int32_t) #define DIOCGETALTQS _IOWR('D', 47, struct pfioc_altq) #define DIOCGETALTQ _IOWR('D', 48, struct pfioc_altq) #define DIOCCHANGEALTQ _IOWR('D', 49, struct pfioc_altq) @@ -1239,8 +1336,7 @@ struct pfioc_iface { #define DIOCGETADDRS _IOWR('D', 53, struct pfioc_pooladdr) #define DIOCGETADDR _IOWR('D', 54, struct pfioc_pooladdr) #define DIOCCHANGEADDR _IOWR('D', 55, struct pfioc_pooladdr) -#define DIOCGETANCHORS _IOWR('D', 56, struct pfioc_anchor) -#define DIOCGETANCHOR _IOWR('D', 57, struct pfioc_anchor) +/* XXX cut 55 - 57 */ #define DIOCGETRULESETS _IOWR('D', 58, struct pfioc_ruleset) #define DIOCGETRULESET _IOWR('D', 59, struct pfioc_ruleset) #define DIOCRCLRTABLES _IOWR('D', 60, struct pfioc_table) @@ -1258,8 +1354,6 @@ struct pfioc_iface { #define DIOCRCLRASTATS _IOWR('D', 72, struct pfioc_table) #define DIOCRTSTADDRS _IOWR('D', 73, struct pfioc_table) #define DIOCRSETTFLAGS _IOWR('D', 74, struct pfioc_table) -#define DIOCRINABEGIN _IOWR('D', 75, struct pfioc_table) -#define DIOCRINACOMMIT _IOWR('D', 76, struct pfioc_table) #define DIOCRINADEFINE _IOWR('D', 77, struct pfioc_table) #define DIOCOSFPFLUSH _IO('D', 78) #define DIOCOSFPADD _IOWR('D', 79, struct pf_osfp_ioctl) @@ -1272,6 +1366,8 @@ struct pfioc_iface { #define DIOCSETHOSTID _IOWR('D', 86, u_int32_t) #define DIOCIGETIFACES _IOWR('D', 87, struct pfioc_iface) #define DIOCICLRISTATS _IOWR('D', 88, struct pfioc_iface) +#define DIOCSETIFFLAG _IOWR('D', 89, struct pfioc_iface) +#define DIOCCLRIFFLAG _IOWR('D', 90, struct pfioc_iface) #ifdef _KERNEL RB_HEAD(pf_src_tree, pf_src_node); @@ -1284,7 +1380,7 @@ RB_PROTOTYPE(pf_state_tree_id, pf_state, extern struct pf_state_tree_id tree_id; extern struct pf_state_queue state_updates; -extern struct pf_anchorqueue pf_anchors; +extern struct pf_anchor_global pf_anchors; extern struct pf_ruleset pf_main_ruleset; TAILQ_HEAD(pf_poolqueue, pf_pool); extern struct pf_poolqueue pf_pools[2]; @@ -1306,13 +1402,13 @@ extern int pf_tbladdr_setup(struct pf_ruleset *, extern void pf_tbladdr_remove(struct pf_addr_wrap *); extern void pf_tbladdr_copyout(struct pf_addr_wrap *); extern void pf_calc_skip_steps(struct pf_rulequeue *); -extern void pf_update_anchor_rules(void); extern struct pool pf_src_tree_pl, pf_rule_pl; extern struct pool pf_state_pl, pf_altq_pl, pf_pooladdr_pl; extern struct pool pf_state_scrub_pl; extern void pf_purge_timeout(void *); extern void pf_purge_expired_src_nodes(void); extern void pf_purge_expired_states(void); +extern void pf_purge_expired_state(struct pf_state *); extern int pf_insert_state(struct pfi_kif *, struct pf_state *); extern int pf_insert_src_node(struct pf_src_node **, @@ -1322,11 +1418,11 @@ void pf_src_tree_remove_state(struct pf_state *); extern struct pf_state *pf_find_state_byid(struct pf_state *); extern struct pf_state *pf_find_state_all(struct pf_state *key, u_int8_t tree, int *more); +extern void pf_print_state(struct pf_state *); +extern void pf_print_flags(u_int8_t); extern struct pf_anchor *pf_find_anchor(const char *); -extern struct pf_ruleset *pf_find_ruleset(char *, char *); -extern struct pf_ruleset *pf_find_or_create_ruleset( - char[PF_ANCHOR_NAME_SIZE], - char[PF_RULESET_NAME_SIZE]); +extern struct pf_ruleset *pf_find_ruleset(const char *); +extern struct pf_ruleset *pf_find_or_create_ruleset(const char *); extern void pf_remove_if_empty_ruleset( struct pf_ruleset *); extern u_int16_t pf_cksum_fixup(u_int16_t, u_int16_t, u_int16_t, @@ -1340,11 +1436,11 @@ void pf_rm_rule(struct pf_rulequeue *, struct pf_rule *); #ifdef INET -int pf_test(int, struct ifnet *, struct mbuf **); +int pf_test(int, struct ifnet *, struct mbuf **, struct ether_header *); #endif /* INET */ #ifdef INET6 -int pf_test6(int, struct ifnet *, struct mbuf **); +int pf_test6(int, struct ifnet *, struct mbuf **, struct ether_header *); void pf_poolmask(struct pf_addr *, struct pf_addr*, struct pf_addr *, struct pf_addr *, u_int8_t); void pf_addr_inc(struct pf_addr *, sa_family_t); @@ -1363,20 +1459,23 @@ int pf_match_uid(u_int8_t, uid_t, uid_t, uid_t); int pf_match_gid(u_int8_t, gid_t, gid_t, gid_t); void pf_normalize_init(void); -int pf_normalize_ip(struct mbuf **, int, struct pfi_kif *, u_short *); -int pf_normalize_ip6(struct mbuf **, int, struct pfi_kif *, u_short *); +int pf_normalize_ip(struct mbuf **, int, struct pfi_kif *, u_short *, + struct pf_pdesc *); +int pf_normalize_ip6(struct mbuf **, int, struct pfi_kif *, u_short *, + struct pf_pdesc *); int pf_normalize_tcp(int, struct pfi_kif *, struct mbuf *, int, int, void *, struct pf_pdesc *); void pf_normalize_tcp_cleanup(struct pf_state *); int pf_normalize_tcp_init(struct mbuf *, int, struct pf_pdesc *, struct tcphdr *, struct pf_state_peer *, struct pf_state_peer *); int pf_normalize_tcp_stateful(struct mbuf *, int, struct pf_pdesc *, - u_short *, struct tcphdr *, struct pf_state_peer *, - struct pf_state_peer *, int *); + u_short *, struct tcphdr *, struct pf_state *, + struct pf_state_peer *, struct pf_state_peer *, int *); u_int32_t pf_state_expires(const struct pf_state *); void pf_purge_expired_fragments(void); int pf_routable(struct pf_addr *addr, sa_family_t af); +int pf_rtlabel_match(struct pf_addr *, sa_family_t, struct pf_addr_wrap *); void pfr_initialize(void); int pfr_match_addr(struct pfr_ktable *, struct pf_addr *, sa_family_t); void pfr_update_stats(struct pfr_ktable *, struct pf_addr *, sa_family_t, @@ -1395,6 +1494,7 @@ int pfr_get_tstats(struct pfr_table *, struct pfr_tstats *, int *, int); int pfr_clr_tstats(struct pfr_table *, int, int *, int); int pfr_set_tflags(struct pfr_table *, int, int, int, int *, int *, int); int pfr_clr_addrs(struct pfr_table *, int *, int); +int pfr_insert_kentry(struct pfr_ktable *, struct pfr_addr *, long); int pfr_add_addrs(struct pfr_table *, struct pfr_addr *, int, int *, int); int pfr_del_addrs(struct pfr_table *, struct pfr_addr *, int, int *, @@ -1430,6 +1530,8 @@ void pfi_dynaddr_remove(struct pf_addr_wrap *); void pfi_fill_oldstatus(struct pf_status *); int pfi_clr_istats(const char *, int *, int); int pfi_get_ifaces(const char *, struct pfi_if *, int *, int); +int pfi_set_flags(const char *, int); +int pfi_clear_flags(const char *, int); int pfi_match_addr(struct pfi_dynaddr *, struct pf_addr *, sa_family_t); @@ -1437,6 +1539,7 @@ extern struct pfi_statehead pfi_statehead; u_int16_t pf_tagname2tag(char *); void pf_tag2tagname(u_int16_t, char *); +void pf_tag_ref(u_int16_t); void pf_tag_unref(u_int16_t); int pf_tag_packet(struct mbuf *, struct pf_tag *, int); u_int32_t pf_qname2qid(char *); From 608d90c70d6c8580e38e1235b6b5bdf0ee5f7d3f Mon Sep 17 00:00:00 2001 From: mlaier Date: Thu, 8 Sep 2005 14:59:36 +0000 Subject: [PATCH 6/7] Loopback four fixes from OpenBSD for problems reported to the freebsd-pf mailing list onto the vendor branch: pf_ioctl.c Revision 1.153 Sun Aug 7 11:37:33 2005 UTC by dhartmei | verify ticket in DIOCADDADDR, from Boris Polevoy, ok deraadt@ pf_ioctl.c Revision 1.158 Mon Sep 5 14:51:08 2005 UTC by dhartmei | in DIOCCHANGERULE, properly initialize table, if used in NAT rule. | from Boris Polevoy , ok mcbride@ pf.c Revision 1.502 Mon Aug 22 11:54:25 2005 UTC by dhartmei | when nat'ing icmp 'connections', replace icmp id with proxy values | (similar to proxy ports for tcp/udp). not all clients use | per-invokation random ids, this allows multiple concurrent | connections from such clients. | thanks for testing to Rod Whitworth, "looks ok" markus@ pf.c Revision 1.501 Mon Aug 22 09:48:05 2005 UTC by dhartmei | fix rdr to bitmask replacement address pool. patch from Max Laier, | reported by Boris Polevoy, tested by Jean Debogue, ok henning@ --- sys/contrib/pf/net/pf.c | 81 ++++++++++++++++++++++++++--------- sys/contrib/pf/net/pf_ioctl.c | 7 +++ 2 files changed, 67 insertions(+), 21 deletions(-) diff --git a/sys/contrib/pf/net/pf.c b/sys/contrib/pf/net/pf.c index dc263d198af2..340b988f90df 100644 --- a/sys/contrib/pf/net/pf.c +++ b/sys/contrib/pf/net/pf.c @@ -2153,6 +2153,11 @@ pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r, if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn)) return (1); + if (proto == IPPROTO_ICMP) { + low = 1; + high = 65535; + } + do { key.af = af; key.proto = proto; @@ -2164,7 +2169,8 @@ pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r, * port search; start random, step; * similar 2 portloop in in_pcbbind */ - if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP)) { + if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP || + proto == IPPROTO_ICMP)) { key.gwy.port = dport; if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL) return (0); @@ -2420,6 +2426,11 @@ pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction, case PF_RDR: { if (pf_map_addr(pd->af, r, saddr, naddr, NULL, sn)) return (NULL); + if ((r->rpool.opts & PF_POOL_TYPEMASK) == + PF_POOL_BITMASK) + PF_POOLMASK(naddr, naddr, + &r->rpool.cur->addr.v.a.mask, daddr, + pd->af); if (r->rpool.proxy_port[1]) { u_int32_t tmp_nport; @@ -3335,7 +3346,7 @@ pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction, struct pf_ruleset *ruleset = NULL; struct pf_src_node *nsn = NULL; u_short reason; - u_int16_t icmpid; + u_int16_t icmpid, bport, nport = 0; sa_family_t af = pd->af; u_int8_t icmptype, icmpcode; int state_icmp = 0; @@ -3384,15 +3395,21 @@ pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction, r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); if (direction == PF_OUT) { + bport = nport = icmpid; /* check outgoing packet for BINAT/NAT */ if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn, - saddr, icmpid, daddr, icmpid, &pd->naddr, NULL)) != NULL) { + saddr, icmpid, daddr, icmpid, &pd->naddr, &nport)) != + NULL) { PF_ACPY(&pd->baddr, saddr, af); switch (af) { #ifdef INET case AF_INET: pf_change_a(&saddr->v4.s_addr, pd->ip_sum, pd->naddr.v4.s_addr, 0); + pd->hdr.icmp->icmp_cksum = pf_cksum_fixup( + pd->hdr.icmp->icmp_cksum, icmpid, nport, 0); + pd->hdr.icmp->icmp_id = nport; + m_copyback(m, off, ICMP_MINLEN, pd->hdr.icmp); break; #endif /* INET */ #ifdef INET6 @@ -3408,9 +3425,11 @@ pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction, pd->nat_rule = nr; } } else { + bport = nport = icmpid; /* check incoming packet for BINAT/RDR */ if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn, - saddr, icmpid, daddr, icmpid, &pd->naddr, NULL)) != NULL) { + saddr, icmpid, daddr, icmpid, &pd->naddr, &nport)) != + NULL) { PF_ACPY(&pd->baddr, daddr, af); switch (af) { #ifdef INET @@ -3560,24 +3579,28 @@ cleanup: s->af = af; if (direction == PF_OUT) { PF_ACPY(&s->gwy.addr, saddr, af); - s->gwy.port = icmpid; + s->gwy.port = nport; PF_ACPY(&s->ext.addr, daddr, af); - s->ext.port = icmpid; - if (nr != NULL) + s->ext.port = 0; + if (nr != NULL) { PF_ACPY(&s->lan.addr, &pd->baddr, af); - else + s->lan.port = bport; + } else { PF_ACPY(&s->lan.addr, &s->gwy.addr, af); - s->lan.port = icmpid; + s->lan.port = s->gwy.port; + } } else { PF_ACPY(&s->lan.addr, daddr, af); - s->lan.port = icmpid; + s->lan.port = nport; PF_ACPY(&s->ext.addr, saddr, af); - s->ext.port = icmpid; - if (nr != NULL) + s->ext.port = 0; + if (nr != NULL) { PF_ACPY(&s->gwy.addr, &pd->baddr, af); - else + s->gwy.port = bport; + } else { PF_ACPY(&s->gwy.addr, &s->lan.addr, af); - s->gwy.port = icmpid; + s->gwy.port = s->lan.port; + } } s->creation = time_second; s->expire = time_second; @@ -4506,13 +4529,13 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, if (direction == PF_IN) { PF_ACPY(&key.ext.addr, pd->src, key.af); PF_ACPY(&key.gwy.addr, pd->dst, key.af); - key.ext.port = icmpid; + key.ext.port = 0; key.gwy.port = icmpid; } else { PF_ACPY(&key.lan.addr, pd->src, key.af); PF_ACPY(&key.ext.addr, pd->dst, key.af); key.lan.port = icmpid; - key.ext.port = icmpid; + key.ext.port = 0; } STATE_LOOKUP(); @@ -4521,7 +4544,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, (*state)->timeout = PFTM_ICMP_ERROR_REPLY; /* translate source/destination address, if necessary */ - if (PF_ANEQ(&(*state)->lan.addr, &(*state)->gwy.addr, pd->af)) { + if (STATE_TRANSLATE(*state)) { if (direction == PF_OUT) { switch (pd->af) { #ifdef INET @@ -4529,6 +4552,14 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, pf_change_a(&saddr->v4.s_addr, pd->ip_sum, (*state)->gwy.addr.v4.s_addr, 0); + pd->hdr.icmp->icmp_cksum = + pf_cksum_fixup( + pd->hdr.icmp->icmp_cksum, icmpid, + (*state)->gwy.port, 0); + pd->hdr.icmp->icmp_id = + (*state)->gwy.port; + m_copyback(m, off, ICMP_MINLEN, + pd->hdr.icmp); break; #endif /* INET */ #ifdef INET6 @@ -4549,6 +4580,14 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, pf_change_a(&daddr->v4.s_addr, pd->ip_sum, (*state)->lan.addr.v4.s_addr, 0); + pd->hdr.icmp->icmp_cksum = + pf_cksum_fixup( + pd->hdr.icmp->icmp_cksum, icmpid, + (*state)->lan.port, 0); + pd->hdr.icmp->icmp_id = + (*state)->lan.port; + m_copyback(m, off, ICMP_MINLEN, + pd->hdr.icmp); break; #endif /* INET */ #ifdef INET6 @@ -4875,13 +4914,13 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, if (direction == PF_IN) { PF_ACPY(&key.ext.addr, pd2.dst, key.af); PF_ACPY(&key.gwy.addr, pd2.src, key.af); - key.ext.port = iih.icmp_id; + key.ext.port = 0; key.gwy.port = iih.icmp_id; } else { PF_ACPY(&key.lan.addr, pd2.dst, key.af); PF_ACPY(&key.ext.addr, pd2.src, key.af); key.lan.port = iih.icmp_id; - key.ext.port = iih.icmp_id; + key.ext.port = 0; } STATE_LOOKUP(); @@ -4927,13 +4966,13 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, if (direction == PF_IN) { PF_ACPY(&key.ext.addr, pd2.dst, key.af); PF_ACPY(&key.gwy.addr, pd2.src, key.af); - key.ext.port = iih.icmp6_id; + key.ext.port = 0; key.gwy.port = iih.icmp6_id; } else { PF_ACPY(&key.lan.addr, pd2.dst, key.af); PF_ACPY(&key.ext.addr, pd2.src, key.af); key.lan.port = iih.icmp6_id; - key.ext.port = iih.icmp6_id; + key.ext.port = 0; } STATE_LOOKUP(); diff --git a/sys/contrib/pf/net/pf_ioctl.c b/sys/contrib/pf/net/pf_ioctl.c index f73c67b852fc..d4cb3c7b3ddd 100644 --- a/sys/contrib/pf/net/pf_ioctl.c +++ b/sys/contrib/pf/net/pf_ioctl.c @@ -1454,6 +1454,9 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = EINVAL; if (pf_anchor_setup(newrule, ruleset, pcr->anchor_call)) error = EINVAL; + TAILQ_FOREACH(pa, &pf_pabuf, entries) + if (pf_tbladdr_setup(ruleset, &pa->addr)) + error = EINVAL; if (newrule->overload_tblname[0]) { if ((newrule->overload_tbl = pfr_attach_table( @@ -2035,6 +2038,10 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) case DIOCADDADDR: { struct pfioc_pooladdr *pp = (struct pfioc_pooladdr *)addr; + if (pp->ticket != ticket_pabuf) { + error = EBUSY; + break; + } #ifndef INET if (pp->af == AF_INET) { error = EAFNOSUPPORT; From dc6ae5fc92df6a060834e8dee2fee112a4586afc Mon Sep 17 00:00:00 2001 From: mlaier Date: Tue, 3 Jul 2007 12:06:01 +0000 Subject: [PATCH 7/7] Import pf from OpenBSD 4.1 --- sys/contrib/pf/net/if_pflog.c | 132 ++-- sys/contrib/pf/net/if_pflog.h | 16 +- sys/contrib/pf/net/if_pfsync.c | 522 ++++++++++--- sys/contrib/pf/net/if_pfsync.h | 69 +- sys/contrib/pf/net/pf.c | 1214 +++++++++++++++++++------------ sys/contrib/pf/net/pf_if.c | 709 ++++++++---------- sys/contrib/pf/net/pf_ioctl.c | 1000 +++++++++++++------------ sys/contrib/pf/net/pf_norm.c | 143 ++-- sys/contrib/pf/net/pf_osfp.c | 94 ++- sys/contrib/pf/net/pf_ruleset.c | 415 +++++++++++ sys/contrib/pf/net/pf_table.c | 10 +- sys/contrib/pf/net/pfvar.h | 363 +++++---- 12 files changed, 2985 insertions(+), 1702 deletions(-) create mode 100644 sys/contrib/pf/net/pf_ruleset.c diff --git a/sys/contrib/pf/net/if_pflog.c b/sys/contrib/pf/net/if_pflog.c index 41e1e6564713..561a2f6f4b29 100644 --- a/sys/contrib/pf/net/if_pflog.c +++ b/sys/contrib/pf/net/if_pflog.c @@ -1,4 +1,4 @@ -/* $OpenBSD: if_pflog.c,v 1.12 2004/05/19 17:50:51 dhartmei Exp $ */ +/* $OpenBSD: if_pflog.c,v 1.22 2006/12/15 09:31:20 otto Exp $ */ /* * The authors of this code are John Ioannidis (ji@tla.org), * Angelos D. Keromytis (kermit@csd.uch.gr) and @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -72,44 +73,90 @@ #define DPRINTF(x) #endif -struct pflog_softc pflogif[NPFLOG]; - void pflogattach(int); int pflogoutput(struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *); int pflogioctl(struct ifnet *, u_long, caddr_t); -void pflogrtrequest(int, struct rtentry *, struct sockaddr *); void pflogstart(struct ifnet *); +int pflog_clone_create(struct if_clone *, int); +int pflog_clone_destroy(struct ifnet *); + +LIST_HEAD(, pflog_softc) pflogif_list; +struct if_clone pflog_cloner = + IF_CLONE_INITIALIZER("pflog", pflog_clone_create, pflog_clone_destroy); + +struct ifnet *pflogifs[PFLOGIFS_MAX]; /* for fast access */ extern int ifqmaxlen; void pflogattach(int npflog) +{ + int i; + LIST_INIT(&pflogif_list); + for (i = 0; i < PFLOGIFS_MAX; i++) + pflogifs[i] = NULL; + (void) pflog_clone_create(&pflog_cloner, 0); + if_clone_attach(&pflog_cloner); +} + +int +pflog_clone_create(struct if_clone *ifc, int unit) { struct ifnet *ifp; - int i; + struct pflog_softc *pflogif; + int s; - bzero(pflogif, sizeof(pflogif)); + if (unit >= PFLOGIFS_MAX) + return (EINVAL); - for (i = 0; i < NPFLOG; i++) { - ifp = &pflogif[i].sc_if; - snprintf(ifp->if_xname, sizeof ifp->if_xname, "pflog%d", i); - ifp->if_softc = &pflogif[i]; - ifp->if_mtu = PFLOGMTU; - ifp->if_ioctl = pflogioctl; - ifp->if_output = pflogoutput; - ifp->if_start = pflogstart; - ifp->if_type = IFT_PFLOG; - ifp->if_snd.ifq_maxlen = ifqmaxlen; - ifp->if_hdrlen = PFLOG_HDRLEN; - if_attach(ifp); - if_alloc_sadl(ifp); + if ((pflogif = malloc(sizeof(*pflogif), M_DEVBUF, M_NOWAIT)) == NULL) + return (ENOMEM); + bzero(pflogif, sizeof(*pflogif)); + + pflogif->sc_unit = unit; + ifp = &pflogif->sc_if; + snprintf(ifp->if_xname, sizeof ifp->if_xname, "pflog%d", unit); + ifp->if_softc = pflogif; + ifp->if_mtu = PFLOGMTU; + ifp->if_ioctl = pflogioctl; + ifp->if_output = pflogoutput; + ifp->if_start = pflogstart; + ifp->if_type = IFT_PFLOG; + ifp->if_snd.ifq_maxlen = ifqmaxlen; + ifp->if_hdrlen = PFLOG_HDRLEN; + if_attach(ifp); + if_alloc_sadl(ifp); #if NBPFILTER > 0 - bpfattach(&pflogif[i].sc_if.if_bpf, ifp, DLT_PFLOG, - PFLOG_HDRLEN); + bpfattach(&pflogif->sc_if.if_bpf, ifp, DLT_PFLOG, PFLOG_HDRLEN); #endif - } + + s = splnet(); + LIST_INSERT_HEAD(&pflogif_list, pflogif, sc_list); + pflogifs[unit] = ifp; + splx(s); + + return (0); +} + +int +pflog_clone_destroy(struct ifnet *ifp) +{ + struct pflog_softc *pflogif = ifp->if_softc; + int s; + + s = splnet(); + pflogifs[pflogif->sc_unit] = NULL; + LIST_REMOVE(pflogif, sc_list); + splx(s); + +#if NBPFILTER > 0 + bpfdetach(ifp); +#endif + if_detach(ifp); + free(pflogif, M_DEVBUF); + return (0); } /* @@ -122,7 +169,7 @@ pflogstart(struct ifnet *ifp) int s; for (;;) { - s = splimp(); + s = splnet(); IF_DROP(&ifp->if_snd); IF_DEQUEUE(&ifp->if_snd, m); splx(s); @@ -142,14 +189,6 @@ pflogoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, return (0); } -/* ARGSUSED */ -void -pflogrtrequest(int cmd, struct rtentry *rt, struct sockaddr *sa) -{ - if (rt) - rt->rt_rmx.rmx_mtu = PFLOGMTU; -} - /* ARGSUSED */ int pflogioctl(struct ifnet *ifp, u_long cmd, caddr_t data) @@ -174,16 +213,18 @@ pflogioctl(struct ifnet *ifp, u_long cmd, caddr_t data) int pflog_packet(struct pfi_kif *kif, struct mbuf *m, sa_family_t af, u_int8_t dir, u_int8_t reason, struct pf_rule *rm, struct pf_rule *am, - struct pf_ruleset *ruleset) + struct pf_ruleset *ruleset, struct pf_pdesc *pd) { #if NBPFILTER > 0 struct ifnet *ifn; struct pfloghdr hdr; - struct mbuf m1; - if (kif == NULL || m == NULL || rm == NULL) + if (kif == NULL || m == NULL || rm == NULL || pd == NULL) return (-1); + if ((ifn = pflogifs[rm->logif]) == NULL || !ifn->if_bpf) + return (0); + bzero(&hdr, sizeof(hdr)); hdr.length = PFLOG_REAL_HDRLEN; hdr.af = af; @@ -201,6 +242,17 @@ pflog_packet(struct pfi_kif *kif, struct mbuf *m, sa_family_t af, u_int8_t dir, strlcpy(hdr.ruleset, ruleset->anchor->name, sizeof(hdr.ruleset)); } + if (rm->log & PF_LOG_SOCKET_LOOKUP && !pd->lookup.done) + pd->lookup.done = pf_socket_lookup(dir, pd); + if (pd->lookup.done > 0) { + hdr.uid = pd->lookup.uid; + hdr.pid = pd->lookup.pid; + } else { + hdr.uid = UID_MAX; + hdr.pid = NO_PID; + } + hdr.rule_uid = rm->cuid; + hdr.rule_pid = rm->cpid; hdr.dir = dir; #ifdef INET @@ -213,14 +265,10 @@ pflog_packet(struct pfi_kif *kif, struct mbuf *m, sa_family_t af, u_int8_t dir, } #endif /* INET */ - m1.m_next = m; - m1.m_len = PFLOG_HDRLEN; - m1.m_data = (char *) &hdr; - - ifn = &(pflogif[0].sc_if); - - if (ifn->if_bpf) - bpf_mtap(ifn->if_bpf, &m1); + ifn->if_opackets++; + ifn->if_obytes += m->m_pkthdr.len; + bpf_mtap_hdr(ifn->if_bpf, (char *)&hdr, PFLOG_HDRLEN, m, + BPF_DIRECTION_OUT); #endif return (0); diff --git a/sys/contrib/pf/net/if_pflog.h b/sys/contrib/pf/net/if_pflog.h index 7a43b10c215e..e9e0b01e5e5f 100644 --- a/sys/contrib/pf/net/if_pflog.h +++ b/sys/contrib/pf/net/if_pflog.h @@ -1,4 +1,4 @@ -/* $OpenBSD: if_pflog.h,v 1.11 2004/05/19 17:50:51 dhartmei Exp $ */ +/* $OpenBSD: if_pflog.h,v 1.14 2006/10/25 11:27:01 henning Exp $ */ /* * Copyright 2001 Niels Provos * All rights reserved. @@ -27,8 +27,12 @@ #ifndef _NET_IF_PFLOG_H_ #define _NET_IF_PFLOG_H_ +#define PFLOGIFS_MAX 16 + struct pflog_softc { - struct ifnet sc_if; /* the interface */ + struct ifnet sc_if; /* the interface */ + int sc_unit; + LIST_ENTRY(pflog_softc) sc_list; }; #define PFLOG_RULESET_NAME_SIZE 16 @@ -42,6 +46,10 @@ struct pfloghdr { char ruleset[PFLOG_RULESET_NAME_SIZE]; u_int32_t rulenr; u_int32_t subrulenr; + uid_t uid; + pid_t pid; + uid_t rule_uid; + pid_t rule_pid; u_int8_t dir; u_int8_t pad[3]; }; @@ -64,9 +72,9 @@ struct old_pfloghdr { #ifdef _KERNEL #if NPFLOG > 0 -#define PFLOG_PACKET(i,x,a,b,c,d,e,f,g) pflog_packet(i,a,b,c,d,e,f,g) +#define PFLOG_PACKET(i,x,a,b,c,d,e,f,g,h) pflog_packet(i,a,b,c,d,e,f,g,h) #else -#define PFLOG_PACKET(i,x,a,b,c,d,e,f,g) ((void)0) +#define PFLOG_PACKET(i,x,a,b,c,d,e,f,g,h) ((void)0) #endif /* NPFLOG > 0 */ #endif /* _KERNEL */ #endif /* _NET_IF_PFLOG_H_ */ diff --git a/sys/contrib/pf/net/if_pfsync.c b/sys/contrib/pf/net/if_pfsync.c index d0f56d096a73..11063397e3e4 100644 --- a/sys/contrib/pf/net/if_pfsync.c +++ b/sys/contrib/pf/net/if_pfsync.c @@ -1,4 +1,4 @@ -/* $OpenBSD: if_pfsync.c,v 1.46 2005/02/20 15:58:38 mcbride Exp $ */ +/* $OpenBSD: if_pfsync.c,v 1.73 2006/11/16 13:13:38 henning Exp $ */ /* * Copyright (c) 2002 Michael Shalayeff @@ -26,8 +26,6 @@ * THE POSSIBILITY OF SUCH DAMAGE. */ -#include "bpfilter.h" -#include "pfsync.h" #include #include @@ -43,11 +41,12 @@ #include #include #include +#include +#include #include #include #ifdef INET -#include #include #include #include @@ -55,20 +54,20 @@ #endif #ifdef INET6 -#ifndef INET -#include -#endif #include #endif /* INET6 */ #include "carp.h" #if NCARP > 0 -extern int carp_suppress_preempt; +#include #endif #include #include +#include "bpfilter.h" +#include "pfsync.h" + #define PFSYNC_MINMTU \ (sizeof(struct pfsync_header) + sizeof(struct pf_state)) @@ -79,12 +78,17 @@ int pfsyncdebug; #define DPRINTF(x) #endif -struct pfsync_softc pfsyncif; -struct pfsyncstats pfsyncstats; +struct pfsync_softc *pfsyncif = NULL; +struct pfsyncstats pfsyncstats; void pfsyncattach(int); +int pfsync_clone_create(struct if_clone *, int); +int pfsync_clone_destroy(struct ifnet *); void pfsync_setmtu(struct pfsync_softc *, int); -int pfsync_insert_net_state(struct pfsync_state *); +int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, + struct pf_state_peer *); +int pfsync_insert_net_state(struct pfsync_state *, u_int8_t); +void pfsync_update_net_tdb(struct pfsync_tdb *); int pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *); int pfsyncioctl(struct ifnet *, u_long, caddr_t); @@ -93,7 +97,10 @@ void pfsyncstart(struct ifnet *); struct mbuf *pfsync_get_mbuf(struct pfsync_softc *, u_int8_t, void **); int pfsync_request_update(struct pfsync_state_upd *, struct in_addr *); int pfsync_sendout(struct pfsync_softc *); +int pfsync_tdb_sendout(struct pfsync_softc *); +int pfsync_sendout_mbuf(struct pfsync_softc *, struct mbuf *); void pfsync_timeout(void *); +void pfsync_tdb_timeout(void *); void pfsync_send_bus(struct pfsync_softc *, u_int8_t); void pfsync_bulk_update(void *); void pfsync_bulkfail(void *); @@ -101,41 +108,77 @@ void pfsync_bulkfail(void *); int pfsync_sync_ok; extern int ifqmaxlen; +struct if_clone pfsync_cloner = + IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy); + void pfsyncattach(int npfsync) +{ + if_clone_attach(&pfsync_cloner); +} +int +pfsync_clone_create(struct if_clone *ifc, int unit) { struct ifnet *ifp; + if (unit != 0) + return (EINVAL); + pfsync_sync_ok = 1; - bzero(&pfsyncif, sizeof(pfsyncif)); - pfsyncif.sc_mbuf = NULL; - pfsyncif.sc_mbuf_net = NULL; - pfsyncif.sc_statep.s = NULL; - pfsyncif.sc_statep_net.s = NULL; - pfsyncif.sc_maxupdates = 128; - pfsyncif.sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP; - pfsyncif.sc_sendaddr.s_addr = INADDR_PFSYNC_GROUP; - pfsyncif.sc_ureq_received = 0; - pfsyncif.sc_ureq_sent = 0; - ifp = &pfsyncif.sc_if; - strlcpy(ifp->if_xname, "pfsync0", sizeof ifp->if_xname); - ifp->if_softc = &pfsyncif; + if ((pfsyncif = malloc(sizeof(*pfsyncif), M_DEVBUF, M_NOWAIT)) == NULL) + return (ENOMEM); + bzero(pfsyncif, sizeof(*pfsyncif)); + pfsyncif->sc_mbuf = NULL; + pfsyncif->sc_mbuf_net = NULL; + pfsyncif->sc_mbuf_tdb = NULL; + pfsyncif->sc_statep.s = NULL; + pfsyncif->sc_statep_net.s = NULL; + pfsyncif->sc_statep_tdb.t = NULL; + pfsyncif->sc_maxupdates = 128; + pfsyncif->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP; + pfsyncif->sc_sendaddr.s_addr = INADDR_PFSYNC_GROUP; + pfsyncif->sc_ureq_received = 0; + pfsyncif->sc_ureq_sent = 0; + pfsyncif->sc_bulk_send_next = NULL; + pfsyncif->sc_bulk_terminator = NULL; + ifp = &pfsyncif->sc_if; + snprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit); + ifp->if_softc = pfsyncif; ifp->if_ioctl = pfsyncioctl; ifp->if_output = pfsyncoutput; ifp->if_start = pfsyncstart; ifp->if_type = IFT_PFSYNC; ifp->if_snd.ifq_maxlen = ifqmaxlen; ifp->if_hdrlen = PFSYNC_HDRLEN; - pfsync_setmtu(&pfsyncif, MCLBYTES); - timeout_set(&pfsyncif.sc_tmo, pfsync_timeout, &pfsyncif); - timeout_set(&pfsyncif.sc_bulk_tmo, pfsync_bulk_update, &pfsyncif); - timeout_set(&pfsyncif.sc_bulkfail_tmo, pfsync_bulkfail, &pfsyncif); + pfsync_setmtu(pfsyncif, ETHERMTU); + timeout_set(&pfsyncif->sc_tmo, pfsync_timeout, pfsyncif); + timeout_set(&pfsyncif->sc_tdb_tmo, pfsync_tdb_timeout, pfsyncif); + timeout_set(&pfsyncif->sc_bulk_tmo, pfsync_bulk_update, pfsyncif); + timeout_set(&pfsyncif->sc_bulkfail_tmo, pfsync_bulkfail, pfsyncif); if_attach(ifp); if_alloc_sadl(ifp); -#if NBPFILTER > 0 - bpfattach(&pfsyncif.sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN); +#if NCARP > 0 + if_addgroup(ifp, "carp"); #endif + +#if NBPFILTER > 0 + bpfattach(&pfsyncif->sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN); +#endif + + return (0); +} + +int +pfsync_clone_destroy(struct ifnet *ifp) +{ +#if NBPFILTER > 0 + bpfdetach(ifp); +#endif + if_detach(ifp); + free(pfsyncif, M_DEVBUF); + pfsyncif = NULL; + return (0); } /* @@ -148,7 +191,7 @@ pfsyncstart(struct ifnet *ifp) int s; for (;;) { - s = splimp(); + s = splnet(); IF_DROP(&ifp->if_snd); IF_DEQUEUE(&ifp->if_snd, m); splx(s); @@ -161,7 +204,21 @@ pfsyncstart(struct ifnet *ifp) } int -pfsync_insert_net_state(struct pfsync_state *sp) +pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, + struct pf_state_peer *d) +{ + if (s->scrub.scrub_flag && d->scrub == NULL) { + d->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT); + if (d->scrub == NULL) + return (ENOMEM); + bzero(d->scrub, sizeof(*d->scrub)); + } + + return (0); +} + +int +pfsync_insert_net_state(struct pfsync_state *sp, u_int8_t chksum_flag) { struct pf_state *st = NULL; struct pf_rule *r = NULL; @@ -173,7 +230,7 @@ pfsync_insert_net_state(struct pfsync_state *sp) return (EINVAL); } - kif = pfi_lookup_create(sp->ifname); + kif = pfi_kif_get(sp->ifname); if (kif == NULL) { if (pf_status.debug >= PF_DEBUG_MISC) printf("pfsync_insert_net_state: " @@ -183,19 +240,33 @@ pfsync_insert_net_state(struct pfsync_state *sp) } /* - * Just use the default rule until we have infrastructure to find the - * best matching rule. + * If the ruleset checksums match, it's safe to associate the state + * with the rule of that number. */ - r = &pf_default_rule; + if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && chksum_flag) + r = pf_main_ruleset.rules[ + PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)]; + else + r = &pf_default_rule; if (!r->max_states || r->states < r->max_states) st = pool_get(&pf_state_pl, PR_NOWAIT); if (st == NULL) { - pfi_maybe_destroy(kif); + pfi_kif_unref(kif, PFI_KIF_REF_NONE); return (ENOMEM); } bzero(st, sizeof(*st)); + /* allocate memory for scrub info */ + if (pfsync_alloc_scrub_memory(&sp->src, &st->src) || + pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) { + pfi_kif_unref(kif, PFI_KIF_REF_NONE); + if (st->src.scrub) + pool_put(&pf_state_scrub_pl, st->src.scrub); + pool_put(&pf_state_pl, st); + return (ENOMEM); + } + st->rule.ptr = r; /* XXX get pointers to nat_rule and anchor */ @@ -225,11 +296,14 @@ pfsync_insert_net_state(struct pfsync_state *sp) st->creatorid = sp->creatorid; st->sync_flags = PFSTATE_FROMSYNC; - if (pf_insert_state(kif, st)) { - pfi_maybe_destroy(kif); + pfi_kif_unref(kif, PFI_KIF_REF_NONE); /* XXX when we have nat_rule/anchors, use STATE_DEC_COUNTERS */ r->states--; + if (st->dst.scrub) + pool_put(&pf_state_scrub_pl, st->dst.scrub); + if (st->src.scrub) + pool_put(&pf_state_scrub_pl, st->src.scrub); pool_put(&pf_state_pl, st); return (EINVAL); } @@ -242,22 +316,25 @@ pfsync_input(struct mbuf *m, ...) { struct ip *ip = mtod(m, struct ip *); struct pfsync_header *ph; - struct pfsync_softc *sc = &pfsyncif; - struct pf_state *st, key; + struct pfsync_softc *sc = pfsyncif; + struct pf_state *st; + struct pf_state_cmp key; struct pfsync_state *sp; struct pfsync_state_upd *up; struct pfsync_state_del *dp; struct pfsync_state_clr *cp; struct pfsync_state_upd_req *rup; struct pfsync_state_bus *bus; + struct pfsync_tdb *pt; struct in_addr src; struct mbuf *mp; int iplen, action, error, i, s, count, offp, sfail, stale = 0; + u_int8_t chksum_flag = 0; pfsyncstats.pfsyncs_ipackets++; /* verify that we have a sync interface configured */ - if (!sc->sc_sync_ifp || !pf_status.running) + if (!sc || !sc->sc_sync_ifp || !pf_status.running) goto done; /* verify that the packet came in on the right interface */ @@ -306,6 +383,9 @@ pfsync_input(struct mbuf *m, ...) /* Cheaper to grab this now than having to mess with mbufs later */ src = ip->ip_src; + if (!bcmp(&ph->pf_chksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) + chksum_flag++; + switch (action) { case PFSYNC_ACT_CLR: { struct pf_state *nexts; @@ -323,28 +403,24 @@ pfsync_input(struct mbuf *m, ...) if (cp->ifname[0] == '\0') { for (st = RB_MIN(pf_state_tree_id, &tree_id); st; st = nexts) { - nexts = RB_NEXT(pf_state_tree_id, &tree_id, st); + nexts = RB_NEXT(pf_state_tree_id, &tree_id, st); if (st->creatorid == creatorid) { - st->timeout = PFTM_PURGE; - pf_purge_expired_state(st); + st->sync_flags |= PFSTATE_FROMSYNC; + pf_unlink_state(st); } } } else { - kif = pfi_lookup_if(cp->ifname); - if (kif == NULL) { - if (pf_status.debug >= PF_DEBUG_MISC) - printf("pfsync_input: PFSYNC_ACT_CLR " - "bad interface: %s\n", cp->ifname); + if ((kif = pfi_kif_get(cp->ifname)) == NULL) { splx(s); - goto done; + return; } for (st = RB_MIN(pf_state_tree_lan_ext, &kif->pfik_lan_ext); st; st = nexts) { nexts = RB_NEXT(pf_state_tree_lan_ext, &kif->pfik_lan_ext, st); if (st->creatorid == creatorid) { - st->timeout = PFTM_PURGE; - pf_purge_expired_state(st); + st->sync_flags |= PFSTATE_FROMSYNC; + pf_unlink_state(st); } } } @@ -375,7 +451,8 @@ pfsync_input(struct mbuf *m, ...) continue; } - if ((error = pfsync_insert_net_state(sp))) { + if ((error = pfsync_insert_net_state(sp, + chksum_flag))) { if (error == ENOMEM) { splx(s); goto done; @@ -414,7 +491,7 @@ pfsync_input(struct mbuf *m, ...) st = pf_find_state_byid(&key); if (st == NULL) { /* insert the update */ - if (pfsync_insert_net_state(sp)) + if (pfsync_insert_net_state(sp, chksum_flag)) pfsyncstats.pfsyncs_badstate++; continue; } @@ -453,7 +530,7 @@ pfsync_input(struct mbuf *m, ...) */ if (st->src.state > sp->src.state) sfail = 5; - else if ( st->dst.state > sp->dst.state) + else if (st->dst.state > sp->dst.state) sfail = 6; } if (sfail) { @@ -478,6 +555,7 @@ pfsync_input(struct mbuf *m, ...) } continue; } + pfsync_alloc_scrub_memory(&sp->dst, &st->dst); pf_state_peer_ntoh(&sp->src, &st->src); pf_state_peer_ntoh(&sp->dst, &st->dst); st->expire = ntohl(sp->expire) + time_second; @@ -509,9 +587,8 @@ pfsync_input(struct mbuf *m, ...) pfsyncstats.pfsyncs_badstate++; continue; } - st->timeout = PFTM_PURGE; st->sync_flags |= PFSTATE_FROMSYNC; - pf_purge_expired_state(st); + pf_unlink_state(st); } splx(s); break; @@ -604,6 +681,7 @@ pfsync_input(struct mbuf *m, ...) PFSYNC_FLAG_STALE); continue; } + pfsync_alloc_scrub_memory(&up->dst, &st->dst); pf_state_peer_ntoh(&up->src, &st->src); pf_state_peer_ntoh(&up->dst, &st->dst); st->expire = ntohl(up->expire) + time_second; @@ -632,9 +710,8 @@ pfsync_input(struct mbuf *m, ...) pfsyncstats.pfsyncs_badstate++; continue; } - st->timeout = PFTM_PURGE; st->sync_flags |= PFSTATE_FROMSYNC; - pf_purge_expired_state(st); + pf_unlink_state(st); } splx(s); break; @@ -660,6 +737,10 @@ pfsync_input(struct mbuf *m, ...) if (key.id == 0 && key.creatorid == 0) { sc->sc_ureq_received = time_uptime; + if (sc->sc_bulk_send_next == NULL) + sc->sc_bulk_send_next = + TAILQ_FIRST(&state_list); + sc->sc_bulk_terminator = sc->sc_bulk_send_next; if (pf_status.debug >= PF_DEBUG_MISC) printf("pfsync: received " "bulk update request\n"); @@ -709,7 +790,7 @@ pfsync_input(struct mbuf *m, ...) timeout_del(&sc->sc_bulkfail_tmo); #if NCARP > 0 if (!pfsync_sync_ok) - carp_suppress_preempt--; + carp_group_demote_adj(&sc->sc_if, -1); #endif pfsync_sync_ok = 1; if (pf_status.debug >= PF_DEBUG_MISC) @@ -723,6 +804,18 @@ pfsync_input(struct mbuf *m, ...) break; } break; + case PFSYNC_ACT_TDB_UPD: + if ((mp = m_pulldown(m, iplen + sizeof(*ph), + count * sizeof(*pt), &offp)) == NULL) { + pfsyncstats.pfsyncs_badlen++; + return; + } + s = splsoftnet(); + for (i = 0, pt = (struct pfsync_tdb *)(mp->m_data + offp); + i < count; i++, pt++) + pfsync_update_net_tdb(pt); + splx(s); + break; } done: @@ -862,7 +955,7 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) sc->sc_ureq_sent = time_uptime; #if NCARP > 0 if (pfsync_sync_ok) - carp_suppress_preempt++; + carp_group_demote_adj(&sc->sc_if, 1); #endif pfsync_sync_ok = 0; if (pf_status.debug >= PF_DEBUG_MISC) @@ -938,6 +1031,10 @@ pfsync_get_mbuf(struct pfsync_softc *sc, u_int8_t action, void **sp) len = sizeof(struct pfsync_header) + sizeof(struct pfsync_state_bus); break; + case PFSYNC_ACT_TDB_UPD: + len = (sc->sc_maxcount * sizeof(struct pfsync_tdb)) + + sizeof(struct pfsync_header); + break; default: len = (sc->sc_maxcount * sizeof(struct pfsync_state)) + sizeof(struct pfsync_header); @@ -962,17 +1059,23 @@ pfsync_get_mbuf(struct pfsync_softc *sc, u_int8_t action, void **sp) h->af = 0; h->count = 0; h->action = action; + if (action != PFSYNC_ACT_TDB_UPD) + bcopy(&pf_status.pf_chksum, &h->pf_chksum, + PF_MD5_DIGEST_LENGTH); *sp = (void *)((char *)h + PFSYNC_HDRLEN); - timeout_add(&sc->sc_tmo, hz); + if (action == PFSYNC_ACT_TDB_UPD) + timeout_add(&sc->sc_tdb_tmo, hz); + else + timeout_add(&sc->sc_tmo, hz); return (m); } int pfsync_pack_state(u_int8_t action, struct pf_state *st, int flags) { - struct ifnet *ifp = &pfsyncif.sc_if; - struct pfsync_softc *sc = ifp->if_softc; + struct ifnet *ifp = NULL; + struct pfsync_softc *sc = pfsyncif; struct pfsync_header *h, *h_net; struct pfsync_state *sp = NULL; struct pfsync_state_upd *up = NULL; @@ -982,6 +1085,10 @@ pfsync_pack_state(u_int8_t action, struct pf_state *st, int flags) int s, ret = 0; u_int8_t i = 255, newaction = 0; + if (sc == NULL) + return (0); + ifp = &sc->sc_if; + /* * If a packet falls in the forest and there's nobody around to * hear, does it make a sound? @@ -1044,8 +1151,6 @@ pfsync_pack_state(u_int8_t action, struct pf_state *st, int flags) secs = time_second; st->pfsync_time = time_uptime; - TAILQ_REMOVE(&state_updates, st, u.s.entry_updates); - TAILQ_INSERT_TAIL(&state_updates, st, u.s.entry_updates); if (sp == NULL) { /* not a "duplicate" update */ @@ -1067,10 +1172,10 @@ pfsync_pack_state(u_int8_t action, struct pf_state *st, int flags) bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr)); sp->creation = htonl(secs - st->creation); - sp->packets[0] = htonl(st->packets[0]); - sp->packets[1] = htonl(st->packets[1]); - sp->bytes[0] = htonl(st->bytes[0]); - sp->bytes[1] = htonl(st->bytes[1]); + pf_state_counter_hton(st->packets[0], sp->packets[0]); + pf_state_counter_hton(st->packets[1], sp->packets[1]); + pf_state_counter_hton(st->bytes[0], sp->bytes[0]); + pf_state_counter_hton(st->bytes[1], sp->bytes[1]); if ((r = st->rule.ptr) == NULL) sp->rule = htonl(-1); else @@ -1169,12 +1274,16 @@ pfsync_pack_state(u_int8_t action, struct pf_state *st, int flags) int pfsync_request_update(struct pfsync_state_upd *up, struct in_addr *src) { - struct ifnet *ifp = &pfsyncif.sc_if; + struct ifnet *ifp = NULL; struct pfsync_header *h; - struct pfsync_softc *sc = ifp->if_softc; + struct pfsync_softc *sc = pfsyncif; struct pfsync_state_upd_req *rup; int ret = 0; + if (sc == NULL) + return (0); + + ifp = &sc->sc_if; if (sc->sc_mbuf == NULL) { if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ, (void *)&sc->sc_statep.s)) == NULL) @@ -1211,11 +1320,15 @@ pfsync_request_update(struct pfsync_state_upd *up, struct in_addr *src) int pfsync_clear_states(u_int32_t creatorid, char *ifname) { - struct ifnet *ifp = &pfsyncif.sc_if; - struct pfsync_softc *sc = ifp->if_softc; + struct ifnet *ifp = NULL; + struct pfsync_softc *sc = pfsyncif; struct pfsync_state_clr *cp; int s, ret; + if (sc == NULL) + return (0); + + ifp = &sc->sc_if; s = splnet(); if (sc->sc_mbuf != NULL) pfsync_sendout(sc); @@ -1246,6 +1359,17 @@ pfsync_timeout(void *v) splx(s); } +void +pfsync_tdb_timeout(void *v) +{ + struct pfsync_softc *sc = v; + int s; + + s = splnet(); + pfsync_tdb_sendout(sc); + splx(s); +} + /* This must be called in splnet() */ void pfsync_send_bus(struct pfsync_softc *sc, u_int8_t status) @@ -1282,28 +1406,39 @@ pfsync_bulk_update(void *v) * Grab at most PFSYNC_BULKPACKETS worth of states which have not * been sent since the latest request was made. */ - while ((state = TAILQ_FIRST(&state_updates)) != NULL && - ++i < (sc->sc_maxcount * PFSYNC_BULKPACKETS)) { - if (state->pfsync_time > sc->sc_ureq_received) { - /* we're done */ - pfsync_send_bus(sc, PFSYNC_BUS_END); - sc->sc_ureq_received = 0; - timeout_del(&sc->sc_bulk_tmo); - if (pf_status.debug >= PF_DEBUG_MISC) - printf("pfsync: bulk update complete\n"); - break; - } else { - /* send an update and move to end of list */ - if (!state->sync_flags) + state = sc->sc_bulk_send_next; + if (state) + do { + /* send state update if syncable and not already sent */ + if (!state->sync_flags + && state->timeout < PFTM_MAX + && state->pfsync_time <= sc->sc_ureq_received) { pfsync_pack_state(PFSYNC_ACT_UPD, state, 0); - state->pfsync_time = time_uptime; - TAILQ_REMOVE(&state_updates, state, u.s.entry_updates); - TAILQ_INSERT_TAIL(&state_updates, state, - u.s.entry_updates); + i++; + } - /* look again for more in a bit */ - timeout_add(&sc->sc_bulk_tmo, 1); - } + /* figure next state to send */ + state = TAILQ_NEXT(state, u.s.entry_list); + + /* wrap to start of list if we hit the end */ + if (!state) + state = TAILQ_FIRST(&state_list); + } while (i < sc->sc_maxcount * PFSYNC_BULKPACKETS && + state != sc->sc_bulk_terminator); + + if (!state || state == sc->sc_bulk_terminator) { + /* we're done */ + pfsync_send_bus(sc, PFSYNC_BUS_END); + sc->sc_ureq_received = 0; + sc->sc_bulk_send_next = NULL; + sc->sc_bulk_terminator = NULL; + timeout_del(&sc->sc_bulk_tmo); + if (pf_status.debug >= PF_DEBUG_MISC) + printf("pfsync: bulk update complete\n"); + } else { + /* look again for more in a bit */ + timeout_add(&sc->sc_bulk_tmo, 1); + sc->sc_bulk_send_next = state; } if (sc->sc_mbuf != NULL) pfsync_sendout(sc); @@ -1334,7 +1469,7 @@ pfsync_bulkfail(void *v) sc->sc_bulk_tries = 0; #if NCARP > 0 if (!pfsync_sync_ok) - carp_suppress_preempt--; + carp_group_demote_adj(&sc->sc_if, -1); #endif pfsync_sync_ok = 1; if (pf_status.debug >= PF_DEBUG_MISC) @@ -1346,10 +1481,11 @@ pfsync_bulkfail(void *v) /* This must be called in splnet() */ int -pfsync_sendout(sc) - struct pfsync_softc *sc; +pfsync_sendout(struct pfsync_softc *sc) { +#if NBPFILTER > 0 struct ifnet *ifp = &sc->sc_if; +#endif struct mbuf *m; timeout_del(&sc->sc_tmo); @@ -1362,7 +1498,7 @@ pfsync_sendout(sc) #if NBPFILTER > 0 if (ifp->if_bpf) - bpf_mtap(ifp->if_bpf, m); + bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT); #endif if (sc->sc_mbuf_net) { @@ -1372,10 +1508,41 @@ pfsync_sendout(sc) sc->sc_statep_net.s = NULL; } - if (sc->sc_sync_ifp || sc->sc_sync_peer.s_addr != INADDR_PFSYNC_GROUP) { - struct ip *ip; - struct sockaddr sa; + return pfsync_sendout_mbuf(sc, m); +} +int +pfsync_tdb_sendout(struct pfsync_softc *sc) +{ +#if NBPFILTER > 0 + struct ifnet *ifp = &sc->sc_if; +#endif + struct mbuf *m; + + timeout_del(&sc->sc_tdb_tmo); + + if (sc->sc_mbuf_tdb == NULL) + return (0); + m = sc->sc_mbuf_tdb; + sc->sc_mbuf_tdb = NULL; + sc->sc_statep_tdb.t = NULL; + +#if NBPFILTER > 0 + if (ifp->if_bpf) + bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT); +#endif + + return pfsync_sendout_mbuf(sc, m); +} + +int +pfsync_sendout_mbuf(struct pfsync_softc *sc, struct mbuf *m) +{ + struct sockaddr sa; + struct ip *ip; + + if (sc->sc_sync_ifp || + sc->sc_sync_peer.s_addr != INADDR_PFSYNC_GROUP) { M_PREPEND(m, sizeof(struct ip), M_DONTWAIT); if (m == NULL) { pfsyncstats.pfsyncs_onomem++; @@ -1409,3 +1576,154 @@ pfsync_sendout(sc) return (0); } + +/* Update an in-kernel tdb. Silently fail if no tdb is found. */ +void +pfsync_update_net_tdb(struct pfsync_tdb *pt) +{ + struct tdb *tdb; + int s; + + /* check for invalid values */ + if (ntohl(pt->spi) <= SPI_RESERVED_MAX || + (pt->dst.sa.sa_family != AF_INET && + pt->dst.sa.sa_family != AF_INET6)) + goto bad; + + s = spltdb(); + tdb = gettdb(pt->spi, &pt->dst, pt->sproto); + if (tdb) { + pt->rpl = ntohl(pt->rpl); + pt->cur_bytes = betoh64(pt->cur_bytes); + + /* Neither replay nor byte counter should ever decrease. */ + if (pt->rpl < tdb->tdb_rpl || + pt->cur_bytes < tdb->tdb_cur_bytes) { + splx(s); + goto bad; + } + + tdb->tdb_rpl = pt->rpl; + tdb->tdb_cur_bytes = pt->cur_bytes; + } + splx(s); + return; + + bad: + if (pf_status.debug >= PF_DEBUG_MISC) + printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: " + "invalid value\n"); + pfsyncstats.pfsyncs_badstate++; + return; +} + +/* One of our local tdbs have been updated, need to sync rpl with others */ +int +pfsync_update_tdb(struct tdb *tdb, int output) +{ + struct ifnet *ifp = NULL; + struct pfsync_softc *sc = pfsyncif; + struct pfsync_header *h; + struct pfsync_tdb *pt = NULL; + int s, i, ret; + + if (sc == NULL) + return (0); + + ifp = &sc->sc_if; + if (ifp->if_bpf == NULL && sc->sc_sync_ifp == NULL && + sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) { + /* Don't leave any stale pfsync packets hanging around. */ + if (sc->sc_mbuf_tdb != NULL) { + m_freem(sc->sc_mbuf_tdb); + sc->sc_mbuf_tdb = NULL; + sc->sc_statep_tdb.t = NULL; + } + return (0); + } + + s = splnet(); + if (sc->sc_mbuf_tdb == NULL) { + if ((sc->sc_mbuf_tdb = pfsync_get_mbuf(sc, PFSYNC_ACT_TDB_UPD, + (void *)&sc->sc_statep_tdb.t)) == NULL) { + splx(s); + return (ENOMEM); + } + h = mtod(sc->sc_mbuf_tdb, struct pfsync_header *); + } else { + h = mtod(sc->sc_mbuf_tdb, struct pfsync_header *); + if (h->action != PFSYNC_ACT_TDB_UPD) { + /* + * XXX will never happen as long as there's + * only one "TDB action". + */ + pfsync_tdb_sendout(sc); + sc->sc_mbuf_tdb = pfsync_get_mbuf(sc, + PFSYNC_ACT_TDB_UPD, (void *)&sc->sc_statep_tdb.t); + if (sc->sc_mbuf_tdb == NULL) { + splx(s); + return (ENOMEM); + } + h = mtod(sc->sc_mbuf_tdb, struct pfsync_header *); + } else if (sc->sc_maxupdates) { + /* + * If it's an update, look in the packet to see if + * we already have an update for the state. + */ + struct pfsync_tdb *u = + (void *)((char *)h + PFSYNC_HDRLEN); + + for (i = 0; !pt && i < h->count; i++) { + if (tdb->tdb_spi == u->spi && + tdb->tdb_sproto == u->sproto && + !bcmp(&tdb->tdb_dst, &u->dst, + SA_LEN(&u->dst.sa))) { + pt = u; + pt->updates++; + } + u++; + } + } + } + + if (pt == NULL) { + /* not a "duplicate" update */ + pt = sc->sc_statep_tdb.t++; + sc->sc_mbuf_tdb->m_pkthdr.len = + sc->sc_mbuf_tdb->m_len += sizeof(struct pfsync_tdb); + h->count++; + bzero(pt, sizeof(*pt)); + + pt->spi = tdb->tdb_spi; + memcpy(&pt->dst, &tdb->tdb_dst, sizeof pt->dst); + pt->sproto = tdb->tdb_sproto; + } + + /* + * When a failover happens, the master's rpl is probably above + * what we see here (we may be up to a second late), so + * increase it a bit for outbound tdbs to manage most such + * situations. + * + * For now, just add an offset that is likely to be larger + * than the number of packets we can see in one second. The RFC + * just says the next packet must have a higher seq value. + * + * XXX What is a good algorithm for this? We could use + * a rate-determined increase, but to know it, we would have + * to extend struct tdb. + * XXX pt->rpl can wrap over MAXINT, but if so the real tdb + * will soon be replaced anyway. For now, just don't handle + * this edge case. + */ +#define RPL_INCR 16384 + pt->rpl = htonl(tdb->tdb_rpl + (output ? RPL_INCR : 0)); + pt->cur_bytes = htobe64(tdb->tdb_cur_bytes); + + if (h->count == sc->sc_maxcount || + (sc->sc_maxupdates && (pt->updates >= sc->sc_maxupdates))) + ret = pfsync_tdb_sendout(sc); + + splx(s); + return (ret); +} diff --git a/sys/contrib/pf/net/if_pfsync.h b/sys/contrib/pf/net/if_pfsync.h index ddd049a81fda..5ed465e716a2 100644 --- a/sys/contrib/pf/net/if_pfsync.h +++ b/sys/contrib/pf/net/if_pfsync.h @@ -1,4 +1,4 @@ -/* $OpenBSD: if_pfsync.h,v 1.19 2005/01/20 17:47:38 mcbride Exp $ */ +/* $OpenBSD: if_pfsync.h,v 1.30 2006/10/31 14:49:01 henning Exp $ */ /* * Copyright (c) 2001 Michael Shalayeff @@ -35,6 +35,7 @@ struct pfsync_state_scrub { u_int16_t pfss_flags; u_int8_t pfss_ttl; /* stashed TTL */ +#define PFSYNC_SCRUB_FLAG_VALID 0x01 u_int8_t scrub_flag; u_int32_t pfss_ts_mod; /* timestamp modulation */ } __packed; @@ -54,8 +55,7 @@ struct pfsync_state_peer { u_int16_t mss; /* Maximum segment size option */ u_int8_t state; /* active state level */ u_int8_t wscale; /* window scaling factor */ - u_int8_t scrub_flag; - u_int8_t pad[5]; + u_int8_t pad[6]; } __packed; struct pfsync_state { @@ -72,8 +72,8 @@ struct pfsync_state { u_int32_t nat_rule; u_int32_t creation; u_int32_t expire; - u_int32_t packets[2]; - u_int32_t bytes[2]; + u_int32_t packets[2][2]; + u_int32_t bytes[2][2]; u_int32_t creatorid; sa_family_t af; u_int8_t proto; @@ -88,6 +88,16 @@ struct pfsync_state { #define PFSYNC_FLAG_COMPRESS 0x01 #define PFSYNC_FLAG_STALE 0x02 +struct pfsync_tdb { + u_int32_t spi; + union sockaddr_union dst; + u_int32_t rpl; + u_int64_t cur_bytes; + u_int8_t sproto; + u_int8_t updates; + u_int8_t pad[2]; +} __packed; + struct pfsync_state_upd { u_int32_t id[2]; struct pfsync_state_peer src; @@ -143,6 +153,10 @@ union sc_statep { struct pfsync_state_upd_req *r; }; +union sc_tdb_statep { + struct pfsync_tdb *t; +}; + extern int pfsync_sync_ok; struct pfsync_softc { @@ -151,26 +165,33 @@ struct pfsync_softc { struct ip_moptions sc_imo; struct timeout sc_tmo; + struct timeout sc_tdb_tmo; struct timeout sc_bulk_tmo; struct timeout sc_bulkfail_tmo; struct in_addr sc_sync_peer; struct in_addr sc_sendaddr; struct mbuf *sc_mbuf; /* current cumulative mbuf */ struct mbuf *sc_mbuf_net; /* current cumulative mbuf */ + struct mbuf *sc_mbuf_tdb; /* dito for TDB updates */ union sc_statep sc_statep; union sc_statep sc_statep_net; + union sc_tdb_statep sc_statep_tdb; u_int32_t sc_ureq_received; u_int32_t sc_ureq_sent; + struct pf_state *sc_bulk_send_next; + struct pf_state *sc_bulk_terminator; int sc_bulk_tries; int sc_maxcount; /* number of states in mtu */ int sc_maxupdates; /* number of updates/state */ }; + +extern struct pfsync_softc *pfsyncif; #endif struct pfsync_header { u_int8_t version; -#define PFSYNC_VERSION 2 +#define PFSYNC_VERSION 3 u_int8_t af; u_int8_t action; #define PFSYNC_ACT_CLR 0 /* clear all states */ @@ -183,8 +204,10 @@ struct pfsync_header { #define PFSYNC_ACT_DEL_F 7 /* delete fragments */ #define PFSYNC_ACT_UREQ 8 /* request "uncompressed" state */ #define PFSYNC_ACT_BUS 9 /* Bulk Update Status */ -#define PFSYNC_ACT_MAX 10 +#define PFSYNC_ACT_TDB_UPD 10 /* TDB replay counter update */ +#define PFSYNC_ACT_MAX 11 u_int8_t count; + u_int8_t pf_chksum[PF_MD5_DIGEST_LENGTH]; } __packed; #define PFSYNC_BULKPACKETS 1 /* # of packets per timeout */ @@ -193,7 +216,7 @@ struct pfsync_header { #define PFSYNC_ACTIONS \ "CLR ST", "INS ST", "UPD ST", "DEL ST", \ "UPD ST COMP", "DEL ST COMP", "INS FR", "DEL FR", \ - "UPD REQ", "BLK UPD STAT" + "UPD REQ", "BLK UPD STAT", "TDB UPD" #define PFSYNC_DFLTTL 255 @@ -236,6 +259,13 @@ struct pfsyncreq { (d)->mss = htons((s)->mss); \ (d)->state = (s)->state; \ (d)->wscale = (s)->wscale; \ + if ((s)->scrub) { \ + (d)->scrub.pfss_flags = \ + htons((s)->scrub->pfss_flags & PFSS_TIMESTAMP); \ + (d)->scrub.pfss_ttl = (s)->scrub->pfss_ttl; \ + (d)->scrub.pfss_ts_mod = htonl((s)->scrub->pfss_ts_mod);\ + (d)->scrub.scrub_flag = PFSYNC_SCRUB_FLAG_VALID; \ + } \ } while (0) #define pf_state_peer_ntoh(s,d) do { \ @@ -246,6 +276,13 @@ struct pfsyncreq { (d)->mss = ntohs((s)->mss); \ (d)->state = (s)->state; \ (d)->wscale = (s)->wscale; \ + if ((s)->scrub.scrub_flag == PFSYNC_SCRUB_FLAG_VALID && \ + (d)->scrub != NULL) { \ + (d)->scrub->pfss_flags = \ + ntohs((s)->scrub.pfss_flags) & PFSS_TIMESTAMP; \ + (d)->scrub->pfss_ttl = (s)->scrub.pfss_ttl; \ + (d)->scrub->pfss_ts_mod = ntohl((s)->scrub.pfss_ts_mod);\ + } \ } while (0) #define pf_state_host_hton(s,d) do { \ @@ -258,6 +295,17 @@ struct pfsyncreq { (d)->port = (s)->port; \ } while (0) +#define pf_state_counter_hton(s,d) do { \ + d[0] = htonl((s>>32)&0xffffffff); \ + d[1] = htonl(s&0xffffffff); \ +} while (0) + +#define pf_state_counter_ntoh(s,d) do { \ + d = ntohl(s[0]); \ + d = d<<32; \ + d += ntohl(s[1]); \ +} while (0) + #ifdef _KERNEL void pfsync_input(struct mbuf *, ...); int pfsync_clear_states(u_int32_t, char *); @@ -267,7 +315,8 @@ int pfsync_pack_state(u_int8_t, struct pf_state *, int); (st->proto == IPPROTO_PFSYNC)) \ st->sync_flags |= PFSTATE_NOSYNC; \ else if (!st->sync_flags) \ - pfsync_pack_state(PFSYNC_ACT_INS, (st), 1); \ + pfsync_pack_state(PFSYNC_ACT_INS, (st), \ + PFSYNC_FLAG_COMPRESS); \ st->sync_flags &= ~PFSTATE_FROMSYNC; \ } while (0) #define pfsync_update_state(st) do { \ @@ -280,8 +329,8 @@ int pfsync_pack_state(u_int8_t, struct pf_state *, int); if (!st->sync_flags) \ pfsync_pack_state(PFSYNC_ACT_DEL, (st), \ PFSYNC_FLAG_COMPRESS); \ - st->sync_flags &= ~PFSTATE_FROMSYNC; \ } while (0) +int pfsync_update_tdb(struct tdb *, int); #endif #endif /* _NET_IF_PFSYNC_H_ */ diff --git a/sys/contrib/pf/net/pf.c b/sys/contrib/pf/net/pf.c index 340b988f90df..793dc3415020 100644 --- a/sys/contrib/pf/net/pf.c +++ b/sys/contrib/pf/net/pf.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pf.c,v 1.483 2005/03/15 17:38:43 dhartmei Exp $ */ +/* $OpenBSD: pf.c,v 1.527 2007/02/22 15:23:23 pyr Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -48,11 +48,14 @@ #include #include #include +#include +#include #include #include #include #include +#include #include #include @@ -92,8 +95,6 @@ * Global variables */ -struct pf_anchor_global pf_anchors; -struct pf_ruleset pf_main_ruleset; struct pf_altqqueue pf_altqs[2]; struct pf_palist pf_pabuf; struct pf_altqqueue *pf_altqs_active; @@ -105,8 +106,6 @@ u_int32_t ticket_altqs_inactive; int altqs_inactive_open; u_int32_t ticket_pabuf; -struct timeout pf_expire_to; /* expire timeout */ - struct pf_anchor_stackframe { struct pf_ruleset *rs; struct pf_rule *r; @@ -127,6 +126,8 @@ int pf_check_threshold(struct pf_threshold *); void pf_change_ap(struct pf_addr *, u_int16_t *, u_int16_t *, u_int16_t *, struct pf_addr *, u_int16_t, u_int8_t, sa_family_t); +int pf_modulate_sack(struct mbuf *, int, struct pf_pdesc *, + struct tcphdr *, struct pf_state_peer *); #ifdef INET6 void pf_change_a6(struct pf_addr *, u_int16_t *, struct pf_addr *, u_int8_t); @@ -139,7 +140,7 @@ void pf_send_tcp(const struct pf_rule *, sa_family_t, const struct pf_addr *, const struct pf_addr *, u_int16_t, u_int16_t, u_int32_t, u_int32_t, u_int8_t, u_int16_t, u_int16_t, u_int8_t, int, - struct ether_header *, struct ifnet *); + u_int16_t, struct ether_header *, struct ifnet *); void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t, sa_family_t, struct pf_rule *); struct pf_rule *pf_match_translation(struct pf_pdesc *, struct mbuf *, @@ -182,9 +183,11 @@ int pf_test_state_icmp(struct pf_state **, int, void *, struct pf_pdesc *, u_short *); int pf_test_state_other(struct pf_state **, int, struct pfi_kif *, struct pf_pdesc *); -struct pf_tag *pf_get_tag(struct mbuf *); int pf_match_tag(struct mbuf *, struct pf_rule *, - struct pf_tag **, int *); + struct pf_mtag *, int *); +int pf_step_out_of_anchor(int *, struct pf_ruleset **, + int, struct pf_rule **, struct pf_rule **, + int *); void pf_hash(struct pf_addr *, struct pf_addr *, struct pf_poolhashkey *, sa_family_t); int pf_map_addr(u_int8_t, struct pf_rule *, @@ -195,11 +198,12 @@ int pf_get_sport(sa_family_t, u_int8_t, struct pf_rule *, struct pf_addr *, u_int16_t*, u_int16_t, u_int16_t, struct pf_src_node **); void pf_route(struct mbuf **, struct pf_rule *, int, - struct ifnet *, struct pf_state *); + struct ifnet *, struct pf_state *, + struct pf_pdesc *); void pf_route6(struct mbuf **, struct pf_rule *, int, - struct ifnet *, struct pf_state *); -int pf_socket_lookup(uid_t *, gid_t *, - int, struct pf_pdesc *); + struct ifnet *, struct pf_state *, + struct pf_pdesc *); +int pf_socket_lookup(int, struct pf_pdesc *); u_int8_t pf_get_wscale(struct mbuf *, int, u_int16_t, sa_family_t); u_int16_t pf_get_mss(struct mbuf *, int, u_int16_t, @@ -212,16 +216,20 @@ int pf_check_proto_cksum(struct mbuf *, int, int, u_int8_t, sa_family_t); int pf_addr_wrap_neq(struct pf_addr_wrap *, struct pf_addr_wrap *); -static int pf_add_mbuf_tag(struct mbuf *, u_int); struct pf_state *pf_find_state_recurse(struct pfi_kif *, - struct pf_state *, u_int8_t); + struct pf_state_cmp *, u_int8_t); int pf_src_connlimit(struct pf_state **); int pf_check_congestion(struct ifqueue *); +extern struct pool pfr_ktable_pl; +extern struct pool pfr_kentry_pl; + struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = { { &pf_state_pl, PFSTATE_HIWAT }, { &pf_src_tree_pl, PFSNODE_HIWAT }, - { &pf_frent_pl, PFFRAG_FRENT_HIWAT } + { &pf_frent_pl, PFFRAG_FRENT_HIWAT }, + { &pfr_ktable_pl, PFR_KTABLE_HIWAT }, + { &pfr_kentry_pl, PFR_KENTRY_HIWAT } }; #define STATE_LOOKUP() \ @@ -252,9 +260,8 @@ struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = { (s)->lan.addr.addr32[3] != (s)->gwy.addr.addr32[3])) || \ (s)->lan.port != (s)->gwy.port -#define BOUND_IFACE(r, k) (((r)->rule_flag & PFRULE_IFBOUND) ? (k) : \ - ((r)->rule_flag & PFRULE_GRBOUND) ? (k)->pfik_parent : \ - (k)->pfik_parent->pfik_parent) +#define BOUND_IFACE(r, k) \ + ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all #define STATE_INC_COUNTERS(s) \ do { \ @@ -281,12 +288,11 @@ static __inline int pf_state_compare_ext_gwy(struct pf_state *, struct pf_state *); static __inline int pf_state_compare_id(struct pf_state *, struct pf_state *); -static __inline int pf_anchor_compare(struct pf_anchor *, struct pf_anchor *); struct pf_src_tree tree_src_tracking; struct pf_state_tree_id tree_id; -struct pf_state_queue state_updates; +struct pf_state_queue state_list; RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare); RB_GENERATE(pf_state_tree_lan_ext, pf_state, @@ -295,8 +301,6 @@ RB_GENERATE(pf_state_tree_ext_gwy, pf_state, u.s.entry_ext_gwy, pf_state_compare_ext_gwy); RB_GENERATE(pf_state_tree_id, pf_state, u.s.entry_id, pf_state_compare_id); -RB_GENERATE(pf_anchor_global, pf_anchor, entry_global, pf_anchor_compare); -RB_GENERATE(pf_anchor_node, pf_anchor, entry_node, pf_anchor_compare); static __inline int pf_src_compare(struct pf_src_node *a, struct pf_src_node *b) @@ -493,14 +497,6 @@ pf_state_compare_id(struct pf_state *a, struct pf_state *b) return (0); } -static __inline int -pf_anchor_compare(struct pf_anchor *a, struct pf_anchor *b) -{ - int c = strcmp(a->path, b->path); - - return (c ? (c < 0 ? -1 : 1) : 0); -} - #ifdef INET6 void pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af) @@ -522,14 +518,14 @@ pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af) #endif /* INET6 */ struct pf_state * -pf_find_state_byid(struct pf_state *key) +pf_find_state_byid(struct pf_state_cmp *key) { pf_status.fcounters[FCNT_STATE_SEARCH]++; - return (RB_FIND(pf_state_tree_id, &tree_id, key)); + return (RB_FIND(pf_state_tree_id, &tree_id, (struct pf_state *)key)); } struct pf_state * -pf_find_state_recurse(struct pfi_kif *kif, struct pf_state *key, u_int8_t tree) +pf_find_state_recurse(struct pfi_kif *kif, struct pf_state_cmp *key, u_int8_t tree) { struct pf_state *s; @@ -537,20 +533,20 @@ pf_find_state_recurse(struct pfi_kif *kif, struct pf_state *key, u_int8_t tree) switch (tree) { case PF_LAN_EXT: - for (; kif != NULL; kif = kif->pfik_parent) { - s = RB_FIND(pf_state_tree_lan_ext, - &kif->pfik_lan_ext, key); - if (s != NULL) - return (s); - } + if ((s = RB_FIND(pf_state_tree_lan_ext, &kif->pfik_lan_ext, + (struct pf_state *)key)) != NULL) + return (s); + if ((s = RB_FIND(pf_state_tree_lan_ext, &pfi_all->pfik_lan_ext, + (struct pf_state *)key)) != NULL) + return (s); return (NULL); case PF_EXT_GWY: - for (; kif != NULL; kif = kif->pfik_parent) { - s = RB_FIND(pf_state_tree_ext_gwy, - &kif->pfik_ext_gwy, key); - if (s != NULL) - return (s); - } + if ((s = RB_FIND(pf_state_tree_ext_gwy, &kif->pfik_ext_gwy, + (struct pf_state *)key)) != NULL) + return (s); + if ((s = RB_FIND(pf_state_tree_ext_gwy, &pfi_all->pfik_ext_gwy, + (struct pf_state *)key)) != NULL) + return (s); return (NULL); default: panic("pf_find_state_recurse"); @@ -558,7 +554,7 @@ pf_find_state_recurse(struct pfi_kif *kif, struct pf_state *key, u_int8_t tree) } struct pf_state * -pf_find_state_all(struct pf_state *key, u_int8_t tree, int *more) +pf_find_state_all(struct pf_state_cmp *key, u_int8_t tree, int *more) { struct pf_state *s, *ss = NULL; struct pfi_kif *kif; @@ -569,7 +565,7 @@ pf_find_state_all(struct pf_state *key, u_int8_t tree, int *more) case PF_LAN_EXT: TAILQ_FOREACH(kif, &pfi_statehead, pfik_w_states) { s = RB_FIND(pf_state_tree_lan_ext, - &kif->pfik_lan_ext, key); + &kif->pfik_lan_ext, (struct pf_state *)key); if (s == NULL) continue; if (more == NULL) @@ -581,7 +577,7 @@ pf_find_state_all(struct pf_state *key, u_int8_t tree, int *more) case PF_EXT_GWY: TAILQ_FOREACH(kif, &pfi_statehead, pfik_w_states) { s = RB_FIND(pf_state_tree_ext_gwy, - &kif->pfik_ext_gwy, key); + &kif->pfik_ext_gwy, (struct pf_state *)key); if (s == NULL) continue; if (more == NULL) @@ -632,6 +628,7 @@ pf_src_connlimit(struct pf_state **state) int bad = 0; (*state)->src_node->conn++; + (*state)->src.tcp_est = 1; pf_add_threshold(&(*state)->src_node->conn_rate); if ((*state)->rule.ptr->max_src_conn && @@ -845,11 +842,10 @@ pf_insert_state(struct pfi_kif *kif, struct pf_state *state) RB_REMOVE(pf_state_tree_ext_gwy, &kif->pfik_ext_gwy, state); return (-1); } - TAILQ_INSERT_HEAD(&state_updates, state, u.s.entry_updates); - + TAILQ_INSERT_TAIL(&state_list, state, u.s.entry_list); pf_status.fcounters[FCNT_STATE_INSERT]++; pf_status.states++; - pfi_attach_state(kif); + pfi_kif_ref(kif, PFI_KIF_REF_STATE); #if NPFSYNC pfsync_insert_state(state); #endif @@ -857,18 +853,28 @@ pf_insert_state(struct pfi_kif *kif, struct pf_state *state) } void -pf_purge_timeout(void *arg) +pf_purge_thread(void *v) { - struct timeout *to = arg; - int s; + int nloops = 0, s; - s = splsoftnet(); - pf_purge_expired_states(); - pf_purge_expired_fragments(); - pf_purge_expired_src_nodes(); - splx(s); + for (;;) { + tsleep(pf_purge_thread, PWAIT, "pftm", 1 * hz); - timeout_add(to, pf_default_rule.timeout[PFTM_INTERVAL] * hz); + s = splsoftnet(); + + /* process a fraction of the state table every second */ + pf_purge_expired_states(1 + (pf_status.states + / pf_default_rule.timeout[PFTM_INTERVAL])); + + /* purge other expired types every PFTM_INTERVAL seconds */ + if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) { + pf_purge_expired_fragments(); + pf_purge_expired_src_nodes(0); + nloops = 0; + } + + splx(s); + } } u_int32_t @@ -884,6 +890,7 @@ pf_state_expires(const struct pf_state *state) return (time_second); if (state->timeout == PFTM_UNTIL_PACKET) return (0); + KASSERT(state->timeout != PFTM_UNLINKED); KASSERT(state->timeout < PFTM_MAX); timeout = state->rule.ptr->timeout[state->timeout]; if (!timeout) @@ -908,14 +915,21 @@ pf_state_expires(const struct pf_state *state) } void -pf_purge_expired_src_nodes(void) +pf_purge_expired_src_nodes(int waslocked) { struct pf_src_node *cur, *next; + int locked = waslocked; for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) { next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur); if (cur->states <= 0 && cur->expire <= time_second) { + if (! locked) { + rw_enter_write(&pf_consistency_lock); + next = RB_NEXT(pf_src_tree, + &tree_src_tracking, cur); + locked = 1; + } if (cur->rule.ptr != NULL) { cur->rule.ptr->src_nodes--; if (cur->rule.ptr->states <= 0 && @@ -928,6 +942,9 @@ pf_purge_expired_src_nodes(void) pool_put(&pf_src_tree_pl, cur); } } + + if (locked && !waslocked) + rw_exit_write(&pf_consistency_lock); } void @@ -937,8 +954,7 @@ pf_src_tree_remove_state(struct pf_state *s) if (s->src_node != NULL) { if (s->proto == IPPROTO_TCP) { - if (s->src.state == PF_TCPS_PROXY_DST || - s->timeout >= PFTM_TCP_ESTABLISHED) + if (s->src.tcp_est) --s->src_node->conn; } if (--s->src_node->states <= 0) { @@ -961,24 +977,42 @@ pf_src_tree_remove_state(struct pf_state *s) s->src_node = s->nat_src_node = NULL; } +/* callers should be at splsoftnet */ void -pf_purge_expired_state(struct pf_state *cur) +pf_unlink_state(struct pf_state *cur) { - if (cur->src.state == PF_TCPS_PROXY_DST) + if (cur->src.state == PF_TCPS_PROXY_DST) { pf_send_tcp(cur->rule.ptr, cur->af, &cur->ext.addr, &cur->lan.addr, cur->ext.port, cur->lan.port, cur->src.seqhi, cur->src.seqlo + 1, - TH_RST|TH_ACK, 0, 0, 0, 1, NULL, NULL); + TH_RST|TH_ACK, 0, 0, 0, 1, cur->tag, NULL, NULL); + } RB_REMOVE(pf_state_tree_ext_gwy, &cur->u.s.kif->pfik_ext_gwy, cur); RB_REMOVE(pf_state_tree_lan_ext, &cur->u.s.kif->pfik_lan_ext, cur); RB_REMOVE(pf_state_tree_id, &tree_id, cur); #if NPFSYNC - pfsync_delete_state(cur); + if (cur->creatorid == pf_status.hostid) + pfsync_delete_state(cur); #endif + cur->timeout = PFTM_UNLINKED; pf_src_tree_remove_state(cur); +} + +/* callers should be at splsoftnet and hold the + * write_lock on pf_consistency_lock */ +void +pf_free_state(struct pf_state *cur) +{ +#if NPFSYNC + if (pfsyncif != NULL && + (pfsyncif->sc_bulk_send_next == cur || + pfsyncif->sc_bulk_terminator == cur)) + return; +#endif + KASSERT(cur->timeout == PFTM_UNLINKED); if (--cur->rule.ptr->states <= 0 && cur->rule.ptr->src_nodes <= 0) pf_rm_rule(NULL, cur->rule.ptr); @@ -990,8 +1024,8 @@ pf_purge_expired_state(struct pf_state *cur) if (--cur->anchor.ptr->states <= 0) pf_rm_rule(NULL, cur->anchor.ptr); pf_normalize_tcp_cleanup(cur); - pfi_detach_state(cur->u.s.kif); - TAILQ_REMOVE(&state_updates, cur, u.s.entry_updates); + pfi_kif_unref(cur->u.s.kif, PFI_KIF_REF_STATE); + TAILQ_REMOVE(&state_list, cur, u.s.entry_list); if (cur->tag) pf_tag_unref(cur->tag); pool_put(&pf_state_pl, cur); @@ -1000,16 +1034,44 @@ pf_purge_expired_state(struct pf_state *cur) } void -pf_purge_expired_states(void) +pf_purge_expired_states(u_int32_t maxcheck) { - struct pf_state *cur, *next; + static struct pf_state *cur = NULL; + struct pf_state *next; + int locked = 0; - for (cur = RB_MIN(pf_state_tree_id, &tree_id); - cur; cur = next) { - next = RB_NEXT(pf_state_tree_id, &tree_id, cur); - if (pf_state_expires(cur) <= time_second) - pf_purge_expired_state(cur); + while (maxcheck--) { + /* wrap to start of list when we hit the end */ + if (cur == NULL) { + cur = TAILQ_FIRST(&state_list); + if (cur == NULL) + break; /* list empty */ + } + + /* get next state, as cur may get deleted */ + next = TAILQ_NEXT(cur, u.s.entry_list); + + if (cur->timeout == PFTM_UNLINKED) { + /* free unlinked state */ + if (! locked) { + rw_enter_write(&pf_consistency_lock); + locked = 1; + } + pf_free_state(cur); + } else if (pf_state_expires(cur) <= time_second) { + /* unlink and free expired state */ + pf_unlink_state(cur); + if (! locked) { + rw_enter_write(&pf_consistency_lock); + locked = 1; + } + pf_free_state(cur); + } + cur = next; } + + if (locked) + rw_exit_write(&pf_consistency_lock); } int @@ -1235,9 +1297,12 @@ pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2) case PF_ADDR_DYNIFTL: return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt); case PF_ADDR_NOROUTE: + case PF_ADDR_URPFFAILED: return (0); case PF_ADDR_TABLE: return (aw1->p.tbl != aw2->p.tbl); + case PF_ADDR_RTLABEL: + return (aw1->v.rtlabel != aw2->v.rtlabel); default: printf("invalid address type: %d\n", aw1->type); return (1); @@ -1424,12 +1489,70 @@ pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa, } } + +/* + * Need to modulate the sequence numbers in the TCP SACK option + * (credits to Krzysztof Pfaff for report and patch) + */ +int +pf_modulate_sack(struct mbuf *m, int off, struct pf_pdesc *pd, + struct tcphdr *th, struct pf_state_peer *dst) +{ + int hlen = (th->th_off << 2) - sizeof(*th), thoptlen = hlen; + u_int8_t opts[MAX_TCPOPTLEN], *opt = opts; + int copyback = 0, i, olen; + struct sackblk sack; + +#define TCPOLEN_SACKLEN (TCPOLEN_SACK + 2) + if (hlen < TCPOLEN_SACKLEN || + !pf_pull_hdr(m, off + sizeof(*th), opts, hlen, NULL, NULL, pd->af)) + return 0; + + while (hlen >= TCPOLEN_SACKLEN) { + olen = opt[1]; + switch (*opt) { + case TCPOPT_EOL: /* FALLTHROUGH */ + case TCPOPT_NOP: + opt++; + hlen--; + break; + case TCPOPT_SACK: + if (olen > hlen) + olen = hlen; + if (olen >= TCPOLEN_SACKLEN) { + for (i = 2; i + TCPOLEN_SACK <= olen; + i += TCPOLEN_SACK) { + memcpy(&sack, &opt[i], sizeof(sack)); + pf_change_a(&sack.start, &th->th_sum, + htonl(ntohl(sack.start) - + dst->seqdiff), 0); + pf_change_a(&sack.end, &th->th_sum, + htonl(ntohl(sack.end) - + dst->seqdiff), 0); + memcpy(&opt[i], &sack, sizeof(sack)); + } + copyback = 1; + } + /* FALLTHROUGH */ + default: + if (olen < 2) + olen = 2; + hlen -= olen; + opt += olen; + } + } + + if (copyback) + m_copyback(m, off + sizeof(*th), thoptlen, opts); + return (copyback); +} + void pf_send_tcp(const struct pf_rule *r, sa_family_t af, const struct pf_addr *saddr, const struct pf_addr *daddr, u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack, u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag, - struct ether_header *eh, struct ifnet *ifp) + u_int16_t rtag, struct ether_header *eh, struct ifnet *ifp) { struct mbuf *m; int len, tlen; @@ -1440,7 +1563,8 @@ pf_send_tcp(const struct pf_rule *r, sa_family_t af, struct ip6_hdr *h6; #endif /* INET6 */ struct tcphdr *th; - char *opt; + char *opt; + struct pf_mtag *pf_mtag; /* maximum segment size tcp option */ tlen = sizeof(struct tcphdr); @@ -1464,30 +1588,24 @@ pf_send_tcp(const struct pf_rule *r, sa_family_t af, m = m_gethdr(M_DONTWAIT, MT_HEADER); if (m == NULL) return; - if (tag) { - struct m_tag *mtag; - - mtag = m_tag_get(PACKET_TAG_PF_GENERATED, 0, M_NOWAIT); - if (mtag == NULL) { - m_freem(m); - return; - } - m_tag_prepend(m, mtag); + if ((pf_mtag = pf_get_mtag(m)) == NULL) { + m_freem(m); + return; } + if (tag) + pf_mtag->flags |= PF_TAG_GENERATED; + + pf_mtag->tag = rtag; + + if (r != NULL && r->rtableid >= 0) + pf_mtag->rtableid = r->rtableid; + #ifdef ALTQ if (r != NULL && r->qid) { - struct m_tag *mtag; - struct altq_tag *atag; - - mtag = m_tag_get(PACKET_TAG_PF_QID, sizeof(*atag), M_NOWAIT); - if (mtag != NULL) { - atag = (struct altq_tag *)(mtag + 1); - atag->qid = r->qid; - /* add hints for ecn */ - atag->af = af; - atag->hdr = mtod(m, struct ip *); - m_tag_prepend(m, mtag); - } + pf_mtag->qid = r->qid; + /* add hints for ecn */ + pf_mtag->af = af; + pf_mtag->hdr = mtod(m, struct ip *); } #endif /* ALTQ */ m->m_data += max_linkhdr; @@ -1597,39 +1715,31 @@ void pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af, struct pf_rule *r) { - struct m_tag *mtag; + struct pf_mtag *pf_mtag; struct mbuf *m0; - mtag = m_tag_get(PACKET_TAG_PF_GENERATED, 0, M_NOWAIT); - if (mtag == NULL) - return; m0 = m_copy(m, 0, M_COPYALL); - if (m0 == NULL) { - m_tag_free(mtag); + + if ((pf_mtag = pf_get_mtag(m0)) == NULL) return; - } - m_tag_prepend(m0, mtag); + pf_mtag->flags |= PF_TAG_GENERATED; + + if (r->rtableid >= 0) + pf_mtag->rtableid = r->rtableid; #ifdef ALTQ if (r->qid) { - struct altq_tag *atag; - - mtag = m_tag_get(PACKET_TAG_PF_QID, sizeof(*atag), M_NOWAIT); - if (mtag != NULL) { - atag = (struct altq_tag *)(mtag + 1); - atag->qid = r->qid; - /* add hints for ecn */ - atag->af = af; - atag->hdr = mtod(m0, struct ip *); - m_tag_prepend(m0, mtag); - } + pf_mtag->qid = r->qid; + /* add hints for ecn */ + pf_mtag->af = af; + pf_mtag->hdr = mtod(m0, struct ip *); } #endif /* ALTQ */ switch (af) { #ifdef INET case AF_INET: - icmp_error(m0, type, code, 0, (void *)NULL); + icmp_error(m0, type, code, 0, 0); break; #endif /* INET */ #ifdef INET6 @@ -1737,58 +1847,71 @@ pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g) return (pf_match(op, a1, a2, g)); } -struct pf_tag * -pf_get_tag(struct mbuf *m) +struct pf_mtag * +pf_find_mtag(struct mbuf *m) { struct m_tag *mtag; - if ((mtag = m_tag_find(m, PACKET_TAG_PF_TAG, NULL)) != NULL) - return ((struct pf_tag *)(mtag + 1)); - else + if ((mtag = m_tag_find(m, PACKET_TAG_PF, NULL)) == NULL) return (NULL); + + return ((struct pf_mtag *)(mtag + 1)); +} + +struct pf_mtag * +pf_get_mtag(struct mbuf *m) +{ + struct m_tag *mtag; + + if ((mtag = m_tag_find(m, PACKET_TAG_PF, NULL)) == NULL) { + mtag = m_tag_get(PACKET_TAG_PF, sizeof(struct pf_mtag), + M_NOWAIT); + if (mtag == NULL) + return (NULL); + bzero(mtag + 1, sizeof(struct pf_mtag)); + m_tag_prepend(m, mtag); + } + + return ((struct pf_mtag *)(mtag + 1)); } int -pf_match_tag(struct mbuf *m, struct pf_rule *r, struct pf_tag **pftag, int *tag) +pf_match_tag(struct mbuf *m, struct pf_rule *r, struct pf_mtag *pf_mtag, + int *tag) { - if (*tag == -1) { /* find mbuf tag */ - *pftag = pf_get_tag(m); - if (*pftag != NULL) - *tag = (*pftag)->tag; - else - *tag = 0; - } + if (*tag == -1) + *tag = pf_mtag->tag; return ((!r->match_tag_not && r->match_tag == *tag) || (r->match_tag_not && r->match_tag != *tag)); } int -pf_tag_packet(struct mbuf *m, struct pf_tag *pftag, int tag) +pf_tag_packet(struct mbuf *m, struct pf_mtag *pf_mtag, int tag, int rtableid) { - struct m_tag *mtag; - - if (tag <= 0) + if (tag <= 0 && rtableid < 0) return (0); - if (pftag == NULL) { - mtag = m_tag_get(PACKET_TAG_PF_TAG, sizeof(*pftag), M_NOWAIT); - if (mtag == NULL) + if (pf_mtag == NULL) + if ((pf_mtag = pf_get_mtag(m)) == NULL) return (1); - ((struct pf_tag *)(mtag + 1))->tag = tag; - m_tag_prepend(m, mtag); - } else - pftag->tag = tag; + if (tag > 0) + pf_mtag->tag = tag; + if (rtableid >= 0) + pf_mtag->rtableid = rtableid; return (0); } static void pf_step_into_anchor(int *depth, struct pf_ruleset **rs, int n, - struct pf_rule **r, struct pf_rule **a) + struct pf_rule **r, struct pf_rule **a, int *match) { struct pf_anchor_stackframe *f; + (*r)->anchor->match = 0; + if (match) + *match = 0; if (*depth >= sizeof(pf_anchor_stack) / sizeof(pf_anchor_stack[0])) { printf("pf_step_into_anchor: stack overflow\n"); @@ -1815,17 +1938,23 @@ pf_step_into_anchor(int *depth, struct pf_ruleset **rs, int n, *r = TAILQ_FIRST((*rs)->rules[n].active.ptr); } -static void +int pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, int n, - struct pf_rule **r, struct pf_rule **a) + struct pf_rule **r, struct pf_rule **a, int *match) { struct pf_anchor_stackframe *f; + int quick = 0; do { if (*depth <= 0) break; f = pf_anchor_stack + *depth - 1; if (f->parent != NULL && f->child != NULL) { + if (f->child->match || + (match != NULL && *match)) { + f->r->anchor->match = 1; + *match = 0; + } f->child = RB_NEXT(pf_anchor_node, f->parent, f->child); if (f->child != NULL) { *rs = &f->child->ruleset; @@ -1840,8 +1969,12 @@ pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, int n, if (*depth == 0 && a != NULL) *a = NULL; *rs = f->rs; + if (f->r->anchor->match || (match != NULL && *match)) + quick = f->r->quick; *r = TAILQ_NEXT(f->r, entries); } while (*r == NULL); + + return (quick); } #ifdef INET6 @@ -2145,7 +2278,7 @@ pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r, struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high, struct pf_src_node **sn) { - struct pf_state key; + struct pf_state_cmp key; struct pf_addr init_addr; u_int16_t cut; @@ -2237,8 +2370,8 @@ pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off, { struct pf_rule *r, *rm = NULL; struct pf_ruleset *ruleset = NULL; - struct pf_tag *pftag = NULL; int tag = -1; + int rtableid = -1; int asd = 0; r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr); @@ -2256,8 +2389,7 @@ pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off, } r->evaluations++; - if (r->kif != NULL && - (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot) + if (pfi_kif_match(r->kif, kif) == r->ifnot) r = r->skip[PF_SKIP_IFP].ptr; else if (r->direction && r->direction != direction) r = r->skip[PF_SKIP_DIR].ptr; @@ -2265,7 +2397,8 @@ pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off, r = r->skip[PF_SKIP_AF].ptr; else if (r->proto && r->proto != pd->proto) r = r->skip[PF_SKIP_PROTO].ptr; - else if (PF_MISMATCHAW(&src->addr, saddr, pd->af, src->neg)) + else if (PF_MISMATCHAW(&src->addr, saddr, pd->af, + src->neg, kif)) r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR : PF_SKIP_DST_ADDR].ptr; else if (src->port_op && !pf_match_port(src->port_op, @@ -2273,15 +2406,16 @@ pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off, r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT : PF_SKIP_DST_PORT].ptr; else if (dst != NULL && - PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->neg)) + PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->neg, NULL)) r = r->skip[PF_SKIP_DST_ADDR].ptr; - else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af, 0)) + else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af, + 0, NULL)) r = TAILQ_NEXT(r, entries); else if (dst != NULL && dst->port_op && !pf_match_port(dst->port_op, dst->port[0], dst->port[1], dport)) r = r->skip[PF_SKIP_DST_PORT].ptr; - else if (r->match_tag && !pf_match_tag(m, r, &pftag, &tag)) + else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag)) r = TAILQ_NEXT(r, entries); else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto != IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, m, @@ -2290,15 +2424,19 @@ pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off, else { if (r->tag) tag = r->tag; + if (r->rtableid >= 0) + rtableid = r->rtableid; if (r->anchor == NULL) { rm = r; } else - pf_step_into_anchor(&asd, &ruleset, rs_num, &r, NULL); + pf_step_into_anchor(&asd, &ruleset, rs_num, + &r, NULL, NULL); } if (r == NULL) - pf_step_out_of_anchor(&asd, &ruleset, rs_num, &r, NULL); + pf_step_out_of_anchor(&asd, &ruleset, rs_num, &r, + NULL, NULL); } - if (pf_tag_packet(m, pftag, tag)) + if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) return (NULL); if (rm != NULL && (rm->action == PF_NONAT || rm->action == PF_NORDR || rm->action == PF_NOBINAT)) @@ -2458,28 +2596,35 @@ pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction, } int -pf_socket_lookup(uid_t *uid, gid_t *gid, int direction, struct pf_pdesc *pd) +pf_socket_lookup(int direction, struct pf_pdesc *pd) { struct pf_addr *saddr, *daddr; u_int16_t sport, dport; struct inpcbtable *tb; struct inpcb *inp; - *uid = UID_MAX; - *gid = GID_MAX; + if (pd == NULL) + return (-1); + pd->lookup.uid = UID_MAX; + pd->lookup.gid = GID_MAX; + pd->lookup.pid = NO_PID; switch (pd->proto) { case IPPROTO_TCP: + if (pd->hdr.tcp == NULL) + return (-1); sport = pd->hdr.tcp->th_sport; dport = pd->hdr.tcp->th_dport; tb = &tcbtable; break; case IPPROTO_UDP: + if (pd->hdr.udp == NULL) + return (-1); sport = pd->hdr.udp->uh_sport; dport = pd->hdr.udp->uh_dport; tb = &udbtable; break; default: - return (0); + return (-1); } if (direction == PF_IN) { saddr = pd->src; @@ -2500,7 +2645,7 @@ pf_socket_lookup(uid_t *uid, gid_t *gid, int direction, struct pf_pdesc *pd) if (inp == NULL) { inp = in_pcblookup_listen(tb, daddr->v4, dport, 0); if (inp == NULL) - return (0); + return (-1); } break; #endif /* INET */ @@ -2511,16 +2656,17 @@ pf_socket_lookup(uid_t *uid, gid_t *gid, int direction, struct pf_pdesc *pd) if (inp == NULL) { inp = in6_pcblookup_listen(tb, &daddr->v6, dport, 0); if (inp == NULL) - return (0); + return (-1); } break; #endif /* INET6 */ default: - return (0); + return (-1); } - *uid = inp->inp_socket->so_euid; - *gid = inp->inp_socket->so_egid; + pd->lookup.uid = inp->inp_socket->so_euid; + pd->lookup.gid = inp->inp_socket->so_egid; + pd->lookup.pid = inp->inp_socket->so_cpid; return (1); } @@ -2691,18 +2837,15 @@ pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction, struct tcphdr *th = pd->hdr.tcp; u_int16_t bport, nport = 0; sa_family_t af = pd->af; - int lookup = -1; - uid_t uid; - gid_t gid; struct pf_rule *r, *a = NULL; struct pf_ruleset *ruleset = NULL; struct pf_src_node *nsn = NULL; u_short reason; int rewrite = 0; - struct pf_tag *pftag = NULL; - int tag = -1; + int tag = -1, rtableid = -1; u_int16_t mss = tcp_mssdflt; int asd = 0; + int match = 0; if (pf_check_congestion(ifq)) { REASON_SET(&reason, PFRES_CONGEST); @@ -2743,8 +2886,7 @@ pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction, while (r != NULL) { r->evaluations++; - if (r->kif != NULL && - (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot) + if (pfi_kif_match(r->kif, kif) == r->ifnot) r = r->skip[PF_SKIP_IFP].ptr; else if (r->direction && r->direction != direction) r = r->skip[PF_SKIP_DIR].ptr; @@ -2752,35 +2894,37 @@ pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction, r = r->skip[PF_SKIP_AF].ptr; else if (r->proto && r->proto != IPPROTO_TCP) r = r->skip[PF_SKIP_PROTO].ptr; - else if (PF_MISMATCHAW(&r->src.addr, saddr, af, r->src.neg)) + else if (PF_MISMATCHAW(&r->src.addr, saddr, af, + r->src.neg, kif)) r = r->skip[PF_SKIP_SRC_ADDR].ptr; else if (r->src.port_op && !pf_match_port(r->src.port_op, r->src.port[0], r->src.port[1], th->th_sport)) r = r->skip[PF_SKIP_SRC_PORT].ptr; - else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, r->dst.neg)) + else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, + r->dst.neg, NULL)) r = r->skip[PF_SKIP_DST_ADDR].ptr; else if (r->dst.port_op && !pf_match_port(r->dst.port_op, r->dst.port[0], r->dst.port[1], th->th_dport)) r = r->skip[PF_SKIP_DST_PORT].ptr; - else if (r->tos && !(r->tos & pd->tos)) + else if (r->tos && !(r->tos == pd->tos)) r = TAILQ_NEXT(r, entries); else if (r->rule_flag & PFRULE_FRAGMENT) r = TAILQ_NEXT(r, entries); else if ((r->flagset & th->th_flags) != r->flags) r = TAILQ_NEXT(r, entries); - else if (r->uid.op && (lookup != -1 || (lookup = - pf_socket_lookup(&uid, &gid, direction, pd), 1)) && + else if (r->uid.op && (pd->lookup.done || (pd->lookup.done = + pf_socket_lookup(direction, pd), 1)) && !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1], - uid)) + pd->lookup.uid)) r = TAILQ_NEXT(r, entries); - else if (r->gid.op && (lookup != -1 || (lookup = - pf_socket_lookup(&uid, &gid, direction, pd), 1)) && + else if (r->gid.op && (pd->lookup.done || (pd->lookup.done = + pf_socket_lookup(direction, pd), 1)) && !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1], - gid)) + pd->lookup.gid)) r = TAILQ_NEXT(r, entries); else if (r->prob && r->prob <= arc4random()) r = TAILQ_NEXT(r, entries); - else if (r->match_tag && !pf_match_tag(m, r, &pftag, &tag)) + else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag)) r = TAILQ_NEXT(r, entries); else if (r->os_fingerprint != PF_OSFP_ANY && !pf_osfp_match( pf_osfp_fingerprint(pd, m, off, th), r->os_fingerprint)) @@ -2788,7 +2932,10 @@ pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction, else { if (r->tag) tag = r->tag; + if (r->rtableid >= 0) + rtableid = r->rtableid; if (r->anchor == NULL) { + match = 1; *rm = r; *am = a; *rsm = ruleset; @@ -2797,11 +2944,11 @@ pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction, r = TAILQ_NEXT(r, entries); } else pf_step_into_anchor(&asd, &ruleset, - PF_RULESET_FILTER, &r, &a); + PF_RULESET_FILTER, &r, &a, &match); } - if (r == NULL) - pf_step_out_of_anchor(&asd, &ruleset, - PF_RULESET_FILTER, &r, &a); + if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset, + PF_RULESET_FILTER, &r, &a, &match)) + break; } r = *rm; a = *am; @@ -2809,10 +2956,11 @@ pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction, REASON_SET(&reason, PFRES_MATCH); - if (r->log) { + if (r->log || (nr != NULL && nr->natpass && nr->log)) { if (rewrite) m_copyback(m, off, sizeof(*th), th); - PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset); + PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr, + a, ruleset, pd); } if ((r->action == PF_DROP) && @@ -2843,7 +2991,7 @@ pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction, pf_send_tcp(r, af, pd->dst, pd->src, th->th_dport, th->th_sport, ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0, - r->return_ttl, 1, pd->eh, kif->pfik_ifp); + r->return_ttl, 1, 0, pd->eh, kif->pfik_ifp); } else if ((af == AF_INET) && r->return_icmp) pf_send_icmp(m, r->return_icmp >> 8, r->return_icmp & 255, af, r); @@ -2855,7 +3003,7 @@ pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction, if (r->action == PF_DROP) return (PF_DROP); - if (pf_tag_packet(m, pftag, tag)) { + if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) { REASON_SET(&reason, PFRES_MEMORY); return (PF_DROP); } @@ -2875,7 +3023,7 @@ pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction, REASON_SET(&reason, PFRES_MAXSTATES); goto cleanup; } - /* src node for flter rule */ + /* src node for filter rule */ if ((r->rule_flag & PFRULE_SRCTRACK || r->rpool.opts & PF_POOL_STICKYADDR) && pf_insert_src_node(&sn, r, saddr, af) != 0) { @@ -2915,7 +3063,9 @@ cleanup: s->anchor.ptr = a; STATE_INC_COUNTERS(s); s->allow_opts = r->allow_opts; - s->log = r->log & 2; + s->log = r->log & PF_LOG_ALL; + if (nr != NULL) + s->log |= nr->log & PF_LOG_ALL; s->proto = IPPROTO_TCP; s->direction = direction; s->af = af; @@ -2950,7 +3100,8 @@ cleanup: if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN && r->keep_state == PF_STATE_MODULATE) { /* Generate sequence number modulator */ - while ((s->src.seqdiff = htonl(arc4random())) == 0) + while ((s->src.seqdiff = + tcp_rndiss_next() - s->src.seqlo) == 0) ; pf_change_a(&th->th_seq, &th->th_sum, htonl(s->src.seqlo + s->src.seqdiff), 0); @@ -3043,7 +3194,7 @@ cleanup: s->src.mss = mss; pf_send_tcp(r, af, daddr, saddr, th->th_dport, th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1, - TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, NULL, NULL); + TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, NULL, NULL); REASON_SET(&reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); } @@ -3067,17 +3218,14 @@ pf_test_udp(struct pf_rule **rm, struct pf_state **sm, int direction, struct udphdr *uh = pd->hdr.udp; u_int16_t bport, nport = 0; sa_family_t af = pd->af; - int lookup = -1; - uid_t uid; - gid_t gid; struct pf_rule *r, *a = NULL; struct pf_ruleset *ruleset = NULL; struct pf_src_node *nsn = NULL; u_short reason; int rewrite = 0; - struct pf_tag *pftag = NULL; - int tag = -1; + int tag = -1, rtableid = -1; int asd = 0; + int match = 0; if (pf_check_congestion(ifq)) { REASON_SET(&reason, PFRES_CONGEST); @@ -3118,8 +3266,7 @@ pf_test_udp(struct pf_rule **rm, struct pf_state **sm, int direction, while (r != NULL) { r->evaluations++; - if (r->kif != NULL && - (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot) + if (pfi_kif_match(r->kif, kif) == r->ifnot) r = r->skip[PF_SKIP_IFP].ptr; else if (r->direction && r->direction != direction) r = r->skip[PF_SKIP_DIR].ptr; @@ -3127,40 +3274,45 @@ pf_test_udp(struct pf_rule **rm, struct pf_state **sm, int direction, r = r->skip[PF_SKIP_AF].ptr; else if (r->proto && r->proto != IPPROTO_UDP) r = r->skip[PF_SKIP_PROTO].ptr; - else if (PF_MISMATCHAW(&r->src.addr, saddr, af, r->src.neg)) + else if (PF_MISMATCHAW(&r->src.addr, saddr, af, + r->src.neg, kif)) r = r->skip[PF_SKIP_SRC_ADDR].ptr; else if (r->src.port_op && !pf_match_port(r->src.port_op, r->src.port[0], r->src.port[1], uh->uh_sport)) r = r->skip[PF_SKIP_SRC_PORT].ptr; - else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, r->dst.neg)) + else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, + r->dst.neg, NULL)) r = r->skip[PF_SKIP_DST_ADDR].ptr; else if (r->dst.port_op && !pf_match_port(r->dst.port_op, r->dst.port[0], r->dst.port[1], uh->uh_dport)) r = r->skip[PF_SKIP_DST_PORT].ptr; - else if (r->tos && !(r->tos & pd->tos)) + else if (r->tos && !(r->tos == pd->tos)) r = TAILQ_NEXT(r, entries); else if (r->rule_flag & PFRULE_FRAGMENT) r = TAILQ_NEXT(r, entries); - else if (r->uid.op && (lookup != -1 || (lookup = - pf_socket_lookup(&uid, &gid, direction, pd), 1)) && + else if (r->uid.op && (pd->lookup.done || (pd->lookup.done = + pf_socket_lookup(direction, pd), 1)) && !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1], - uid)) + pd->lookup.uid)) r = TAILQ_NEXT(r, entries); - else if (r->gid.op && (lookup != -1 || (lookup = - pf_socket_lookup(&uid, &gid, direction, pd), 1)) && + else if (r->gid.op && (pd->lookup.done || (pd->lookup.done = + pf_socket_lookup(direction, pd), 1)) && !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1], - gid)) + pd->lookup.gid)) r = TAILQ_NEXT(r, entries); else if (r->prob && r->prob <= arc4random()) r = TAILQ_NEXT(r, entries); - else if (r->match_tag && !pf_match_tag(m, r, &pftag, &tag)) + else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag)) r = TAILQ_NEXT(r, entries); else if (r->os_fingerprint != PF_OSFP_ANY) r = TAILQ_NEXT(r, entries); else { if (r->tag) tag = r->tag; + if (r->rtableid >= 0) + rtableid = r->rtableid; if (r->anchor == NULL) { + match = 1; *rm = r; *am = a; *rsm = ruleset; @@ -3169,11 +3321,11 @@ pf_test_udp(struct pf_rule **rm, struct pf_state **sm, int direction, r = TAILQ_NEXT(r, entries); } else pf_step_into_anchor(&asd, &ruleset, - PF_RULESET_FILTER, &r, &a); + PF_RULESET_FILTER, &r, &a, &match); } - if (r == NULL) - pf_step_out_of_anchor(&asd, &ruleset, - PF_RULESET_FILTER, &r, &a); + if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset, + PF_RULESET_FILTER, &r, &a, &match)) + break; } r = *rm; a = *am; @@ -3181,10 +3333,11 @@ pf_test_udp(struct pf_rule **rm, struct pf_state **sm, int direction, REASON_SET(&reason, PFRES_MATCH); - if (r->log) { + if (r->log || (nr != NULL && nr->natpass && nr->log)) { if (rewrite) m_copyback(m, off, sizeof(*uh), uh); - PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset); + PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr, + a, ruleset, pd); } if ((r->action == PF_DROP) && @@ -3213,7 +3366,7 @@ pf_test_udp(struct pf_rule **rm, struct pf_state **sm, int direction, if (r->action == PF_DROP) return (PF_DROP); - if (pf_tag_packet(m, pftag, tag)) { + if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) { REASON_SET(&reason, PFRES_MEMORY); return (PF_DROP); } @@ -3229,7 +3382,7 @@ pf_test_udp(struct pf_rule **rm, struct pf_state **sm, int direction, REASON_SET(&reason, PFRES_MAXSTATES); goto cleanup; } - /* src node for flter rule */ + /* src node for filter rule */ if ((r->rule_flag & PFRULE_SRCTRACK || r->rpool.opts & PF_POOL_STICKYADDR) && pf_insert_src_node(&sn, r, saddr, af) != 0) { @@ -3269,7 +3422,9 @@ cleanup: s->anchor.ptr = a; STATE_INC_COUNTERS(s); s->allow_opts = r->allow_opts; - s->log = r->log & 2; + s->log = r->log & PF_LOG_ALL; + if (nr != NULL) + s->log |= nr->log & PF_LOG_ALL; s->proto = IPPROTO_UDP; s->direction = direction; s->af = af; @@ -3350,12 +3505,12 @@ pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction, sa_family_t af = pd->af; u_int8_t icmptype, icmpcode; int state_icmp = 0; - struct pf_tag *pftag = NULL; - int tag = -1; + int tag = -1, rtableid = -1; #ifdef INET6 int rewrite = 0; #endif /* INET6 */ int asd = 0; + int match = 0; if (pf_check_congestion(ifq)) { REASON_SET(&reason, PFRES_CONGEST); @@ -3454,8 +3609,7 @@ pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction, while (r != NULL) { r->evaluations++; - if (r->kif != NULL && - (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot) + if (pfi_kif_match(r->kif, kif) == r->ifnot) r = r->skip[PF_SKIP_IFP].ptr; else if (r->direction && r->direction != direction) r = r->skip[PF_SKIP_DIR].ptr; @@ -3463,28 +3617,33 @@ pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction, r = r->skip[PF_SKIP_AF].ptr; else if (r->proto && r->proto != pd->proto) r = r->skip[PF_SKIP_PROTO].ptr; - else if (PF_MISMATCHAW(&r->src.addr, saddr, af, r->src.neg)) + else if (PF_MISMATCHAW(&r->src.addr, saddr, af, + r->src.neg, kif)) r = r->skip[PF_SKIP_SRC_ADDR].ptr; - else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, r->dst.neg)) + else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, + r->dst.neg, NULL)) r = r->skip[PF_SKIP_DST_ADDR].ptr; else if (r->type && r->type != icmptype + 1) r = TAILQ_NEXT(r, entries); else if (r->code && r->code != icmpcode + 1) r = TAILQ_NEXT(r, entries); - else if (r->tos && !(r->tos & pd->tos)) + else if (r->tos && !(r->tos == pd->tos)) r = TAILQ_NEXT(r, entries); else if (r->rule_flag & PFRULE_FRAGMENT) r = TAILQ_NEXT(r, entries); else if (r->prob && r->prob <= arc4random()) r = TAILQ_NEXT(r, entries); - else if (r->match_tag && !pf_match_tag(m, r, &pftag, &tag)) + else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag)) r = TAILQ_NEXT(r, entries); else if (r->os_fingerprint != PF_OSFP_ANY) r = TAILQ_NEXT(r, entries); else { if (r->tag) tag = r->tag; + if (r->rtableid >= 0) + rtableid = r->rtableid; if (r->anchor == NULL) { + match = 1; *rm = r; *am = a; *rsm = ruleset; @@ -3493,11 +3652,11 @@ pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction, r = TAILQ_NEXT(r, entries); } else pf_step_into_anchor(&asd, &ruleset, - PF_RULESET_FILTER, &r, &a); + PF_RULESET_FILTER, &r, &a, &match); } - if (r == NULL) - pf_step_out_of_anchor(&asd, &ruleset, - PF_RULESET_FILTER, &r, &a); + if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset, + PF_RULESET_FILTER, &r, &a, &match)) + break; } r = *rm; a = *am; @@ -3505,19 +3664,20 @@ pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction, REASON_SET(&reason, PFRES_MATCH); - if (r->log) { + if (r->log || (nr != NULL && nr->natpass && nr->log)) { #ifdef INET6 if (rewrite) m_copyback(m, off, sizeof(struct icmp6_hdr), pd->hdr.icmp6); #endif /* INET6 */ - PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset); + PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr, + a, ruleset, pd); } if (r->action != PF_PASS) return (PF_DROP); - if (pf_tag_packet(m, pftag, tag)) { + if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) { REASON_SET(&reason, PFRES_MEMORY); return (PF_DROP); } @@ -3533,7 +3693,7 @@ pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction, REASON_SET(&reason, PFRES_MAXSTATES); goto cleanup; } - /* src node for flter rule */ + /* src node for filter rule */ if ((r->rule_flag & PFRULE_SRCTRACK || r->rpool.opts & PF_POOL_STICKYADDR) && pf_insert_src_node(&sn, r, saddr, af) != 0) { @@ -3573,7 +3733,9 @@ cleanup: s->anchor.ptr = a; STATE_INC_COUNTERS(s); s->allow_opts = r->allow_opts; - s->log = r->log & 2; + s->log = r->log & PF_LOG_ALL; + if (nr != NULL) + s->log |= nr->log & PF_LOG_ALL; s->proto = pd->proto; s->direction = direction; s->af = af; @@ -3651,9 +3813,9 @@ pf_test_other(struct pf_rule **rm, struct pf_state **sm, int direction, struct pf_addr *saddr = pd->src, *daddr = pd->dst; sa_family_t af = pd->af; u_short reason; - struct pf_tag *pftag = NULL; - int tag = -1; + int tag = -1, rtableid = -1; int asd = 0; + int match = 0; if (pf_check_congestion(ifq)) { REASON_SET(&reason, PFRES_CONGEST); @@ -3710,8 +3872,7 @@ pf_test_other(struct pf_rule **rm, struct pf_state **sm, int direction, while (r != NULL) { r->evaluations++; - if (r->kif != NULL && - (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot) + if (pfi_kif_match(r->kif, kif) == r->ifnot) r = r->skip[PF_SKIP_IFP].ptr; else if (r->direction && r->direction != direction) r = r->skip[PF_SKIP_DIR].ptr; @@ -3719,24 +3880,29 @@ pf_test_other(struct pf_rule **rm, struct pf_state **sm, int direction, r = r->skip[PF_SKIP_AF].ptr; else if (r->proto && r->proto != pd->proto) r = r->skip[PF_SKIP_PROTO].ptr; - else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, r->src.neg)) + else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, + r->src.neg, kif)) r = r->skip[PF_SKIP_SRC_ADDR].ptr; - else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, r->dst.neg)) + else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, + r->dst.neg, NULL)) r = r->skip[PF_SKIP_DST_ADDR].ptr; - else if (r->tos && !(r->tos & pd->tos)) + else if (r->tos && !(r->tos == pd->tos)) r = TAILQ_NEXT(r, entries); else if (r->rule_flag & PFRULE_FRAGMENT) r = TAILQ_NEXT(r, entries); else if (r->prob && r->prob <= arc4random()) r = TAILQ_NEXT(r, entries); - else if (r->match_tag && !pf_match_tag(m, r, &pftag, &tag)) + else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag)) r = TAILQ_NEXT(r, entries); else if (r->os_fingerprint != PF_OSFP_ANY) r = TAILQ_NEXT(r, entries); else { if (r->tag) tag = r->tag; + if (r->rtableid >= 0) + rtableid = r->rtableid; if (r->anchor == NULL) { + match = 1; *rm = r; *am = a; *rsm = ruleset; @@ -3745,11 +3911,11 @@ pf_test_other(struct pf_rule **rm, struct pf_state **sm, int direction, r = TAILQ_NEXT(r, entries); } else pf_step_into_anchor(&asd, &ruleset, - PF_RULESET_FILTER, &r, &a); + PF_RULESET_FILTER, &r, &a, &match); } - if (r == NULL) - pf_step_out_of_anchor(&asd, &ruleset, - PF_RULESET_FILTER, &r, &a); + if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset, + PF_RULESET_FILTER, &r, &a, &match)) + break; } r = *rm; a = *am; @@ -3757,8 +3923,9 @@ pf_test_other(struct pf_rule **rm, struct pf_state **sm, int direction, REASON_SET(&reason, PFRES_MATCH); - if (r->log) - PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset); + if (r->log || (nr != NULL && nr->natpass && nr->log)) + PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr, + a, ruleset, pd); if ((r->action == PF_DROP) && ((r->rule_flag & PFRULE_RETURNICMP) || @@ -3797,7 +3964,7 @@ pf_test_other(struct pf_rule **rm, struct pf_state **sm, int direction, if (r->action != PF_PASS) return (PF_DROP); - if (pf_tag_packet(m, pftag, tag)) { + if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) { REASON_SET(&reason, PFRES_MEMORY); return (PF_DROP); } @@ -3813,7 +3980,7 @@ pf_test_other(struct pf_rule **rm, struct pf_state **sm, int direction, REASON_SET(&reason, PFRES_MAXSTATES); goto cleanup; } - /* src node for flter rule */ + /* src node for filter rule */ if ((r->rule_flag & PFRULE_SRCTRACK || r->rpool.opts & PF_POOL_STICKYADDR) && pf_insert_src_node(&sn, r, saddr, af) != 0) { @@ -3853,7 +4020,9 @@ cleanup: s->anchor.ptr = a; STATE_INC_COUNTERS(s); s->allow_opts = r->allow_opts; - s->log = r->log & 2; + s->log = r->log & PF_LOG_ALL; + if (nr != NULL) + s->log |= nr->log & PF_LOG_ALL; s->proto = pd->proto; s->direction = direction; s->af = af; @@ -3913,15 +4082,14 @@ pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, struct pf_ruleset *ruleset = NULL; sa_family_t af = pd->af; u_short reason; - struct pf_tag *pftag = NULL; int tag = -1; int asd = 0; + int match = 0; r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); while (r != NULL) { r->evaluations++; - if (r->kif != NULL && - (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot) + if (pfi_kif_match(r->kif, kif) == r->ifnot) r = r->skip[PF_SKIP_IFP].ptr; else if (r->direction && r->direction != direction) r = r->skip[PF_SKIP_DIR].ptr; @@ -3929,11 +4097,13 @@ pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, r = r->skip[PF_SKIP_AF].ptr; else if (r->proto && r->proto != pd->proto) r = r->skip[PF_SKIP_PROTO].ptr; - else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, r->src.neg)) + else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, + r->src.neg, kif)) r = r->skip[PF_SKIP_SRC_ADDR].ptr; - else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, r->dst.neg)) + else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, + r->dst.neg, NULL)) r = r->skip[PF_SKIP_DST_ADDR].ptr; - else if (r->tos && !(r->tos & pd->tos)) + else if (r->tos && !(r->tos == pd->tos)) r = TAILQ_NEXT(r, entries); else if (r->src.port_op || r->dst.port_op || r->flagset || r->type || r->code || @@ -3941,10 +4111,11 @@ pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, r = TAILQ_NEXT(r, entries); else if (r->prob && r->prob <= arc4random()) r = TAILQ_NEXT(r, entries); - else if (r->match_tag && !pf_match_tag(m, r, &pftag, &tag)) + else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag)) r = TAILQ_NEXT(r, entries); else { if (r->anchor == NULL) { + match = 1; *rm = r; *am = a; *rsm = ruleset; @@ -3953,11 +4124,11 @@ pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, r = TAILQ_NEXT(r, entries); } else pf_step_into_anchor(&asd, &ruleset, - PF_RULESET_FILTER, &r, &a); + PF_RULESET_FILTER, &r, &a, &match); } - if (r == NULL) - pf_step_out_of_anchor(&asd, &ruleset, - PF_RULESET_FILTER, &r, &a); + if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset, + PF_RULESET_FILTER, &r, &a, &match)) + break; } r = *rm; a = *am; @@ -3966,12 +4137,13 @@ pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, REASON_SET(&reason, PFRES_MATCH); if (r->log) - PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset); + PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset, + pd); if (r->action != PF_PASS) return (PF_DROP); - if (pf_tag_packet(m, pftag, tag)) { + if (pf_tag_packet(m, pd->pf_mtag, tag, -1)) { REASON_SET(&reason, PFRES_MEMORY); return (PF_DROP); } @@ -3984,7 +4156,7 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, struct mbuf *m, int off, void *h, struct pf_pdesc *pd, u_short *reason) { - struct pf_state key; + struct pf_state_cmp key; struct tcphdr *th = pd->hdr.tcp; u_int16_t win = ntohs(th->th_win); u_int32_t ack, end, seq, orig_seq; @@ -4031,7 +4203,7 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, pd->src, th->th_dport, th->th_sport, (*state)->src.seqhi, ntohl(th->th_seq) + 1, TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1, - NULL, NULL); + 0, NULL, NULL); REASON_SET(reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); } else if (!(th->th_flags & TH_ACK) || @@ -4069,7 +4241,7 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, pf_send_tcp((*state)->rule.ptr, pd->af, &src->addr, &dst->addr, src->port, dst->port, (*state)->dst.seqhi, 0, TH_SYN, 0, - (*state)->src.mss, 0, 0, NULL, NULL); + (*state)->src.mss, 0, 0, (*state)->tag, NULL, NULL); REASON_SET(reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); } else if (((th->th_flags & (TH_SYN|TH_ACK)) != @@ -4084,20 +4256,20 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, pd->src, th->th_dport, th->th_sport, ntohl(th->th_ack), ntohl(th->th_seq) + 1, TH_ACK, (*state)->src.max_win, 0, 0, 0, - NULL, NULL); + (*state)->tag, NULL, NULL); pf_send_tcp((*state)->rule.ptr, pd->af, &src->addr, &dst->addr, src->port, dst->port, (*state)->src.seqhi + 1, (*state)->src.seqlo + 1, TH_ACK, (*state)->dst.max_win, 0, 0, 1, - NULL, NULL); + 0, NULL, NULL); (*state)->src.seqdiff = (*state)->dst.seqhi - (*state)->src.seqlo; (*state)->dst.seqdiff = (*state)->src.seqhi - (*state)->dst.seqlo; (*state)->src.seqhi = (*state)->src.seqlo + - (*state)->src.max_win; - (*state)->dst.seqhi = (*state)->dst.seqlo + (*state)->dst.max_win; + (*state)->dst.seqhi = (*state)->dst.seqlo + + (*state)->src.max_win; (*state)->src.wscale = (*state)->dst.wscale = 0; (*state)->src.state = (*state)->dst.state = TCPS_ESTABLISHED; @@ -4132,7 +4304,7 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, /* Deferred generation of sequence number modulator */ if (dst->seqdiff && !src->seqdiff) { - while ((src->seqdiff = htonl(arc4random())) == 0) + while ((src->seqdiff = tcp_rndiss_next() - seq) == 0) ; ack = ntohl(th->th_ack) - dst->seqdiff; pf_change_a(&th->th_seq, &th->th_sum, htonl(seq + @@ -4221,6 +4393,25 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, ackskew = dst->seqlo - ack; + + /* + * Need to demodulate the sequence numbers in any TCP SACK options + * (Selective ACK). We could optionally validate the SACK values + * against the current ACK window, either forwards or backwards, but + * I'm not confident that SACK has been implemented properly + * everywhere. It wouldn't surprise me if several stacks accidently + * SACK too far backwards of previously ACKed data. There really aren't + * any security implications of bad SACKing unless the target stack + * doesn't validate the option length correctly. Someone trying to + * spoof into a TCP connection won't bother blindly sending SACK + * options anyway. + */ + if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) { + if (pf_modulate_sack(m, off, pd, th, dst)) + copyback = 1; + } + + #define MAXACKWINDOW (0xffff + 1500) /* 1500 is an arbitrary fudge factor */ if (SEQ_GEQ(src->seqhi, end) && /* Last octet inside other's window space */ @@ -4231,8 +4422,8 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, (ackskew <= (MAXACKWINDOW << sws)) && /* Acking not more than one window forward */ ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo || - (pd->flags & PFDESC_IP_REAS) == 0)) { - /* Require an exact sequence match on resets when possible */ + (orig_seq == src->seqlo + 1) || (pd->flags & PFDESC_IP_REAS) == 0)) { + /* Require an exact/+1 sequence match on resets when possible */ if (dst->scrub || src->scrub) { if (pf_normalize_tcp_stateful(m, off, pd, reason, th, @@ -4278,8 +4469,8 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, if (src->state >= TCPS_FIN_WAIT_2 && dst->state >= TCPS_FIN_WAIT_2) (*state)->timeout = PFTM_TCP_CLOSED; - else if (src->state >= TCPS_FIN_WAIT_2 || - dst->state >= TCPS_FIN_WAIT_2) + else if (src->state >= TCPS_CLOSING && + dst->state >= TCPS_CLOSING) (*state)->timeout = PFTM_TCP_FIN_WAIT; else if (src->state < TCPS_ESTABLISHED || dst->state < TCPS_ESTABLISHED) @@ -4325,9 +4516,10 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, printf("pf: loose state match: "); pf_print_state(*state); pf_print_flags(th->th_flags); - printf(" seq=%u ack=%u len=%u ackskew=%d pkts=%d:%d\n", - seq, ack, pd->p_len, ackskew, - (*state)->packets[0], (*state)->packets[1]); + printf(" seq=%u (%u) ack=%u len=%u ackskew=%d " + "pkts=%llu:%llu\n", seq, orig_seq, ack, pd->p_len, + ackskew, (*state)->packets[0], + (*state)->packets[1]); } if (dst->scrub || src->scrub) { @@ -4368,7 +4560,7 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, pd->dst, pd->src, th->th_dport, th->th_sport, ntohl(th->th_ack), 0, TH_RST, 0, 0, - (*state)->rule.ptr->return_ttl, 1, + (*state)->rule.ptr->return_ttl, 1, 0, pd->eh, kif->pfik_ifp); src->seqlo = 0; src->seqhi = 1; @@ -4377,8 +4569,9 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, printf("pf: BAD state: "); pf_print_state(*state); pf_print_flags(th->th_flags); - printf(" seq=%u ack=%u len=%u ackskew=%d pkts=%d:%d " - "dir=%s,%s\n", seq, ack, pd->p_len, ackskew, + printf(" seq=%u (%u) ack=%u len=%u ackskew=%d " + "pkts=%llu:%llu dir=%s,%s\n", + seq, orig_seq, ack, pd->p_len, ackskew, (*state)->packets[0], (*state)->packets[1], direction == PF_IN ? "in" : "out", direction == (*state)->direction ? "fwd" : "rev"); @@ -4421,7 +4614,7 @@ pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif, struct mbuf *m, int off, void *h, struct pf_pdesc *pd) { struct pf_state_peer *src, *dst; - struct pf_state key; + struct pf_state_cmp key; struct udphdr *uh = pd->hdr.udp; key.af = pd->af; @@ -4485,6 +4678,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, u_int16_t icmpid, *icmpsum; u_int8_t icmptype; int state_icmp = 0; + struct pf_state_cmp key; switch (pd->proto) { #ifdef INET @@ -4522,8 +4716,6 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, * ICMP query/reply message not related to a TCP/UDP packet. * Search for an ICMP state. */ - struct pf_state key; - key.af = pd->af; key.proto = pd->proto; if (direction == PF_IN) { @@ -4715,7 +4907,6 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, case IPPROTO_TCP: { struct tcphdr th; u_int32_t seq; - struct pf_state key; struct pf_state_peer *src, *dst; u_int8_t dws; int copyback = 0; @@ -4832,7 +5023,6 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, } case IPPROTO_UDP: { struct udphdr uh; - struct pf_state key; if (!pf_pull_hdr(m, off2, &uh, sizeof(uh), NULL, reason, pd2.af)) { @@ -4899,7 +5089,6 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, #ifdef INET case IPPROTO_ICMP: { struct icmp iih; - struct pf_state key; if (!pf_pull_hdr(m, off2, &iih, ICMP_MINLEN, NULL, reason, pd2.af)) { @@ -4951,7 +5140,6 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, #ifdef INET6 case IPPROTO_ICMPV6: { struct icmp6_hdr iih; - struct pf_state key; if (!pf_pull_hdr(m, off2, &iih, sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) { @@ -5003,8 +5191,6 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, } #endif /* INET6 */ default: { - struct pf_state key; - key.af = pd2.af; key.proto = pd2.proto; if (direction == PF_IN) { @@ -5067,7 +5253,7 @@ pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif, struct pf_pdesc *pd) { struct pf_state_peer *src, *dst; - struct pf_state key; + struct pf_state_cmp key; key.af = pd->af; key.proto = pd->proto; @@ -5195,16 +5381,24 @@ pf_pull_hdr(struct mbuf *m, int off, void *p, int len, } int -pf_routable(struct pf_addr *addr, sa_family_t af) +pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif) { struct sockaddr_in *dst; + int ret = 1; + int check_mpath; + extern int ipmultipath; #ifdef INET6 + extern int ip6_multipath; struct sockaddr_in6 *dst6; struct route_in6 ro; #else struct route ro; #endif + struct radix_node *rn; + struct rtentry *rt; + struct ifnet *ifp; + check_mpath = 0; bzero(&ro, sizeof(ro)); switch (af) { case AF_INET: @@ -5212,6 +5406,8 @@ pf_routable(struct pf_addr *addr, sa_family_t af) dst->sin_family = AF_INET; dst->sin_len = sizeof(*dst); dst->sin_addr = addr->v4; + if (ipmultipath) + check_mpath = 1; break; #ifdef INET6 case AF_INET6: @@ -5219,20 +5415,50 @@ pf_routable(struct pf_addr *addr, sa_family_t af) dst6->sin6_family = AF_INET6; dst6->sin6_len = sizeof(*dst6); dst6->sin6_addr = addr->v6; + if (ip6_multipath) + check_mpath = 1; break; #endif /* INET6 */ default: return (0); } + /* Skip checks for ipsec interfaces */ + if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC) + goto out; + rtalloc_noclone((struct route *)&ro, NO_CLONING); if (ro.ro_rt != NULL) { - RTFREE(ro.ro_rt); - return (1); - } + /* No interface given, this is a no-route check */ + if (kif == NULL) + goto out; - return (0); + if (kif->pfik_ifp == NULL) { + ret = 0; + goto out; + } + + /* Perform uRPF check if passed input interface */ + ret = 0; + rn = (struct radix_node *)ro.ro_rt; + do { + rt = (struct rtentry *)rn; + if (rt->rt_ifp->if_type == IFT_CARP) + ifp = rt->rt_ifp->if_carpdev; + else + ifp = rt->rt_ifp; + + if (kif->pfik_ifp == ifp) + ret = 1; + rn = rn_mpath_next(rn); + } while (check_mpath == 1 && rn != NULL && ret == 0); + } else + ret = 0; +out: + if (ro.ro_rt != NULL) + RTFREE(ro.ro_rt); + return (ret); } int @@ -5281,39 +5507,29 @@ pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw) #ifdef INET void pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, - struct pf_state *s) + struct pf_state *s, struct pf_pdesc *pd) { struct mbuf *m0, *m1; - struct m_tag *mtag; struct route iproute; - struct route *ro; + struct route *ro = NULL; struct sockaddr_in *dst; struct ip *ip; struct ifnet *ifp = NULL; struct pf_addr naddr; struct pf_src_node *sn = NULL; int error = 0; +#ifdef IPSEC + struct m_tag *mtag; +#endif /* IPSEC */ if (m == NULL || *m == NULL || r == NULL || (dir != PF_IN && dir != PF_OUT) || oifp == NULL) panic("pf_route: invalid parameters"); - if ((mtag = m_tag_find(*m, PACKET_TAG_PF_ROUTED, NULL)) == NULL) { - if ((mtag = m_tag_get(PACKET_TAG_PF_ROUTED, 1, M_NOWAIT)) == - NULL) { - m0 = *m; - *m = NULL; - goto bad; - } - *(char *)(mtag + 1) = 1; - m_tag_prepend(*m, mtag); - } else { - if (*(char *)(mtag + 1) > 3) { - m0 = *m; - *m = NULL; - goto bad; - } - (*(char *)(mtag + 1))++; + if (pd->pf_mtag->routed++ > 3) { + m0 = *m; + *m = NULL; + goto bad; } if (r->rt == PF_DUPTO) { @@ -5403,33 +5619,33 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, #endif /* IPSEC */ /* Catch routing changes wrt. hardware checksumming for TCP or UDP. */ - if (m0->m_pkthdr.csum & M_TCPV4_CSUM_OUT) { + if (m0->m_pkthdr.csum_flags & M_TCPV4_CSUM_OUT) { if (!(ifp->if_capabilities & IFCAP_CSUM_TCPv4) || ifp->if_bridge != NULL) { in_delayed_cksum(m0); - m0->m_pkthdr.csum &= ~M_TCPV4_CSUM_OUT; /* Clear */ + m0->m_pkthdr.csum_flags &= ~M_TCPV4_CSUM_OUT; /* Clear */ } - } else if (m0->m_pkthdr.csum & M_UDPV4_CSUM_OUT) { + } else if (m0->m_pkthdr.csum_flags & M_UDPV4_CSUM_OUT) { if (!(ifp->if_capabilities & IFCAP_CSUM_UDPv4) || ifp->if_bridge != NULL) { in_delayed_cksum(m0); - m0->m_pkthdr.csum &= ~M_UDPV4_CSUM_OUT; /* Clear */ + m0->m_pkthdr.csum_flags &= ~M_UDPV4_CSUM_OUT; /* Clear */ } } if (ntohs(ip->ip_len) <= ifp->if_mtu) { if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) && ifp->if_bridge == NULL) { - m0->m_pkthdr.csum |= M_IPV4_CSUM_OUT; + m0->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT; ipstat.ips_outhwcsum++; } else { ip->ip_sum = 0; ip->ip_sum = in_cksum(m0, ip->ip_hl << 2); } /* Update relevant hardware checksum stats for TCP/UDP */ - if (m0->m_pkthdr.csum & M_TCPV4_CSUM_OUT) + if (m0->m_pkthdr.csum_flags & M_TCPV4_CSUM_OUT) tcpstat.tcps_outhwcsum++; - else if (m0->m_pkthdr.csum & M_UDPV4_CSUM_OUT) + else if (m0->m_pkthdr.csum_flags & M_UDPV4_CSUM_OUT) udpstat.udps_outhwcsum++; error = (*ifp->if_output)(ifp, m0, sintosa(dst), NULL); goto done; @@ -5443,7 +5659,7 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, ipstat.ips_cantfrag++; if (r->rt != PF_DUPTO) { icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0, - ifp); + ifp->if_mtu); goto done; } else goto bad; @@ -5485,10 +5701,9 @@ bad: #ifdef INET6 void pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, - struct pf_state *s) + struct pf_state *s, struct pf_pdesc *pd) { struct mbuf *m0; - struct m_tag *mtag; struct route_in6 ip6route; struct route_in6 *ro; struct sockaddr_in6 *dst; @@ -5502,22 +5717,10 @@ pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, (dir != PF_IN && dir != PF_OUT) || oifp == NULL) panic("pf_route6: invalid parameters"); - if ((mtag = m_tag_find(*m, PACKET_TAG_PF_ROUTED, NULL)) == NULL) { - if ((mtag = m_tag_get(PACKET_TAG_PF_ROUTED, 1, M_NOWAIT)) == - NULL) { - m0 = *m; - *m = NULL; - goto bad; - } - *(char *)(mtag + 1) = 1; - m_tag_prepend(*m, mtag); - } else { - if (*(char *)(mtag + 1) > 3) { - m0 = *m; - *m = NULL; - goto bad; - } - (*(char *)(mtag + 1))++; + if (pd->pf_mtag->routed++ > 3) { + m0 = *m; + *m = NULL; + goto bad; } if (r->rt == PF_DUPTO) { @@ -5543,12 +5746,9 @@ pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, dst->sin6_len = sizeof(*dst); dst->sin6_addr = ip6->ip6_dst; - /* Cheat. */ + /* Cheat. XXX why only in the v6 case??? */ if (r->rt == PF_FASTROUTE) { - mtag = m_tag_get(PACKET_TAG_PF_GENERATED, 0, M_NOWAIT); - if (mtag == NULL) - goto bad; - m_tag_prepend(m0, mtag); + pd->pf_mtag->flags |= PF_TAG_GENERATED; ip6_output(m0, NULL, NULL, 0, NULL, NULL); return; } @@ -5591,7 +5791,7 @@ pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, * If the packet is too large for the outgoing interface, * send back an icmp6 error. */ - if (IN6_IS_ADDR_LINKLOCAL(&dst->sin6_addr)) + if (IN6_IS_SCOPE_EMBED(&dst->sin6_addr)) dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index); if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) { error = nd6_output(ifp, ifp, m0, dst, NULL); @@ -5646,9 +5846,9 @@ pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, default: return (1); } - if (m->m_pkthdr.csum & flag_ok) + if (m->m_pkthdr.csum_flags & flag_ok) return (0); - if (m->m_pkthdr.csum & flag_bad) + if (m->m_pkthdr.csum_flags & flag_bad) return (1); if (off < sizeof(struct ip) || len < sizeof(struct udphdr)) return (1); @@ -5683,7 +5883,7 @@ pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, return (1); } if (sum) { - m->m_pkthdr.csum |= flag_bad; + m->m_pkthdr.csum_flags |= flag_bad; switch (p) { case IPPROTO_TCP: tcpstat.tcps_rcvbadsum++; @@ -5702,21 +5902,7 @@ pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, } return (1); } - m->m_pkthdr.csum |= flag_ok; - return (0); -} - -static int -pf_add_mbuf_tag(struct mbuf *m, u_int tag) -{ - struct m_tag *mtag; - - if (m_tag_find(m, tag, NULL) != NULL) - return (0); - mtag = m_tag_get(tag, 0, M_NOWAIT); - if (mtag == NULL) - return (1); - m_tag_prepend(m, mtag); + m->m_pkthdr.csum_flags |= flag_ok; return (0); } @@ -5735,14 +5921,22 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, struct pf_pdesc pd; int off, dirndx, pqid = 0; - if (!pf_status.running || - (m_tag_find(m, PACKET_TAG_PF_GENERATED, NULL) != NULL)) + if (!pf_status.running) + return (PF_PASS); + + memset(&pd, 0, sizeof(pd)); + if ((pd.pf_mtag = pf_get_mtag(m)) == NULL) { + DPFPRINTF(PF_DEBUG_URGENT, + ("pf_test: pf_get_mtag returned NULL\n")); + return (PF_DROP); + } + if (pd.pf_mtag->flags & PF_TAG_GENERATED) return (PF_PASS); if (ifp->if_type == IFT_CARP && ifp->if_carpdev) ifp = ifp->if_carpdev; - kif = pfi_index2kif[ifp->if_index]; + kif = (struct pfi_kif *)ifp->if_pf_kif; if (kif == NULL) { DPFPRINTF(PF_DEBUG_URGENT, ("pf_test: kif == NULL, if_xname %s\n", ifp->if_xname)); @@ -5756,7 +5950,6 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, panic("non-M_PKTHDR is passed to pf_test"); #endif /* DIAGNOSTIC */ - memset(&pd, 0, sizeof(pd)); if (m->m_pkthdr.len < (int)sizeof(*h)) { action = PF_DROP; REASON_SET(&reason, PFRES_SHORT); @@ -5810,6 +6003,7 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, } if (dir == PF_IN && pf_check_proto_cksum(m, off, ntohs(h->ip_len) - off, IPPROTO_TCP, AF_INET)) { + REASON_SET(&reason, PFRES_PROTCKSUM); action = PF_DROP; goto done; } @@ -5846,12 +6040,14 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, if (dir == PF_IN && uh.uh_sum && pf_check_proto_cksum(m, off, ntohs(h->ip_len) - off, IPPROTO_UDP, AF_INET)) { action = PF_DROP; + REASON_SET(&reason, PFRES_PROTCKSUM); goto done; } if (uh.uh_dport == 0 || ntohs(uh.uh_ulen) > m->m_pkthdr.len - off || ntohs(uh.uh_ulen) < sizeof(struct udphdr)) { action = PF_DROP; + REASON_SET(&reason, PFRES_SHORT); goto done; } action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd); @@ -5880,6 +6076,7 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, if (dir == PF_IN && pf_check_proto_cksum(m, off, ntohs(h->ip_len) - off, IPPROTO_ICMP, AF_INET)) { action = PF_DROP; + REASON_SET(&reason, PFRES_PROTCKSUM); goto done; } action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd, @@ -5922,26 +6119,18 @@ done: ("pf: dropping packet with ip options\n")); } - if (s && s->tag) - pf_tag_packet(m, pf_get_tag(m), s->tag); + if ((s && s->tag) || r->rtableid) + pf_tag_packet(m, pd.pf_mtag, s ? s->tag : 0, r->rtableid); #ifdef ALTQ if (action == PF_PASS && r->qid) { - struct m_tag *mtag; - struct altq_tag *atag; - - mtag = m_tag_get(PACKET_TAG_PF_QID, sizeof(*atag), M_NOWAIT); - if (mtag != NULL) { - atag = (struct altq_tag *)(mtag + 1); - if (pqid || pd.tos == IPTOS_LOWDELAY) - atag->qid = r->pqid; - else - atag->qid = r->qid; - /* add hints for ecn */ - atag->af = AF_INET; - atag->hdr = h; - m_tag_prepend(m, mtag); - } + if (pqid || (pd.tos & IPTOS_LOWDELAY)) + pd.pf_mtag->qid = r->pqid; + else + pd.pf_mtag->qid = r->qid; + /* add hints for ecn */ + pd.pf_mtag->af = AF_INET; + pd.pf_mtag->hdr = h; } #endif /* ALTQ */ @@ -5954,41 +6143,48 @@ done: pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL && (s->nat_rule.ptr->action == PF_RDR || s->nat_rule.ptr->action == PF_BINAT) && - (ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET && - pf_add_mbuf_tag(m, PACKET_TAG_PF_TRANSLATE_LOCALHOST)) { - action = PF_DROP; - REASON_SET(&reason, PFRES_MEMORY); - } + (ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) + pd.pf_mtag->flags |= PF_TAG_TRANSLATE_LOCALHOST; - if (log) - PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, r, a, ruleset); + if (log) { + struct pf_rule *lr; + + if (s != NULL && s->nat_rule.ptr != NULL && + s->nat_rule.ptr->log & PF_LOG_ALL) + lr = s->nat_rule.ptr; + else + lr = r; + PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, lr, a, ruleset, + &pd); + } kif->pfik_bytes[0][dir == PF_OUT][action != PF_PASS] += pd.tot_len; kif->pfik_packets[0][dir == PF_OUT][action != PF_PASS]++; if (action == PF_PASS || r->action == PF_DROP) { - r->packets++; - r->bytes += pd.tot_len; + dirndx = (dir == PF_OUT); + r->packets[dirndx]++; + r->bytes[dirndx] += pd.tot_len; if (a != NULL) { - a->packets++; - a->bytes += pd.tot_len; + a->packets[dirndx]++; + a->bytes[dirndx] += pd.tot_len; } if (s != NULL) { + if (s->nat_rule.ptr != NULL) { + s->nat_rule.ptr->packets[dirndx]++; + s->nat_rule.ptr->bytes[dirndx] += pd.tot_len; + } + if (s->src_node != NULL) { + s->src_node->packets[dirndx]++; + s->src_node->bytes[dirndx] += pd.tot_len; + } + if (s->nat_src_node != NULL) { + s->nat_src_node->packets[dirndx]++; + s->nat_src_node->bytes[dirndx] += pd.tot_len; + } dirndx = (dir == s->direction) ? 0 : 1; s->packets[dirndx]++; s->bytes[dirndx] += pd.tot_len; - if (s->nat_rule.ptr != NULL) { - s->nat_rule.ptr->packets++; - s->nat_rule.ptr->bytes += pd.tot_len; - } - if (s->src_node != NULL) { - s->src_node->packets++; - s->src_node->bytes += pd.tot_len; - } - if (s->nat_src_node != NULL) { - s->nat_src_node->packets++; - s->nat_src_node->bytes += pd.tot_len; - } } tr = r; nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule; @@ -6033,7 +6229,7 @@ done: action = PF_PASS; } else if (r->rt) /* pf_route can free the mbuf causing *m0 to become NULL */ - pf_route(m0, r, dir, ifp, s); + pf_route(m0, r, dir, ifp, s, &pd); return (action); } @@ -6046,7 +6242,7 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, { struct pfi_kif *kif; u_short action, reason = 0, log = 0; - struct mbuf *m = *m0; + struct mbuf *m = *m0, *n = NULL; struct ip6_hdr *h; struct pf_rule *a = NULL, *r = &pf_default_rule, *tr, *nr; struct pf_state *s = NULL; @@ -6054,14 +6250,22 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, struct pf_pdesc pd; int off, terminal = 0, dirndx; - if (!pf_status.running || - (m_tag_find(m, PACKET_TAG_PF_GENERATED, NULL) != NULL)) + if (!pf_status.running) + return (PF_PASS); + + memset(&pd, 0, sizeof(pd)); + if ((pd.pf_mtag = pf_get_mtag(m)) == NULL) { + DPFPRINTF(PF_DEBUG_URGENT, + ("pf_test6: pf_get_mtag returned NULL\n")); + return (PF_DROP); + } + if (pd.pf_mtag->flags & PF_TAG_GENERATED) return (PF_PASS); if (ifp->if_type == IFT_CARP && ifp->if_carpdev) ifp = ifp->if_carpdev; - kif = pfi_index2kif[ifp->if_index]; + kif = (struct pfi_kif *)ifp->if_pf_kif; if (kif == NULL) { DPFPRINTF(PF_DEBUG_URGENT, ("pf_test6: kif == NULL, if_xname %s\n", ifp->if_xname)); @@ -6075,7 +6279,6 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, panic("non-M_PKTHDR is passed to pf_test6"); #endif /* DIAGNOSTIC */ - memset(&pd, 0, sizeof(pd)); if (m->m_pkthdr.len < (int)sizeof(*h)) { action = PF_DROP; REASON_SET(&reason, PFRES_SHORT); @@ -6091,6 +6294,18 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, m = *m0; h = mtod(m, struct ip6_hdr *); +#if 1 + /* + * we do not support jumbogram yet. if we keep going, zero ip6_plen + * will do something bad, so drop the packet for now. + */ + if (htons(h->ip6_plen) == 0) { + action = PF_DROP; + REASON_SET(&reason, PFRES_NORM); /*XXX*/ + goto done; + } +#endif + pd.src = (struct pf_addr *)&h->ip6_src; pd.dst = (struct pf_addr *)&h->ip6_dst; PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET6); @@ -6110,9 +6325,67 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, if (action == PF_DROP) REASON_SET(&reason, PFRES_FRAG); goto done; + case IPPROTO_ROUTING: { + struct ip6_rthdr rthdr; + struct ip6_rthdr0 rthdr0; + struct in6_addr finaldst; + struct ip6_hdr *ip6; + + if (!pf_pull_hdr(m, off, &rthdr, sizeof(rthdr), NULL, + &reason, pd.af)) { + DPFPRINTF(PF_DEBUG_MISC, + ("pf: IPv6 short rthdr\n")); + action = PF_DROP; + log = 1; + goto done; + } + if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) { + if (!pf_pull_hdr(m, off, &rthdr0, + sizeof(rthdr0), NULL, &reason, pd.af)) { + DPFPRINTF(PF_DEBUG_MISC, + ("pf: IPv6 short rthdr0\n")); + action = PF_DROP; + log = 1; + goto done; + } + if (rthdr0.ip6r0_segleft != 0) { + if (!pf_pull_hdr(m, off + + sizeof(rthdr0) + + rthdr0.ip6r0_len * 8 - + sizeof(finaldst), &finaldst, + sizeof(finaldst), NULL, + &reason, pd.af)) { + DPFPRINTF(PF_DEBUG_MISC, + ("pf: IPv6 short rthdr0\n")); + action = PF_DROP; + log = 1; + goto done; + } + + n = m_copym(m, 0, M_COPYALL, M_DONTWAIT); + if (!n) { + DPFPRINTF(PF_DEBUG_MISC, + ("pf: mbuf shortage\n")); + action = PF_DROP; + log = 1; + goto done; + } + n = m_pullup(n, sizeof(struct ip6_hdr)); + if (!n) { + DPFPRINTF(PF_DEBUG_MISC, + ("pf: mbuf shortage\n")); + action = PF_DROP; + log = 1; + goto done; + } + ip6 = mtod(n, struct ip6_hdr *); + ip6->ip6_dst = finaldst; + } + } + /* FALLTHROUGH */ + } case IPPROTO_AH: case IPPROTO_HOPOPTS: - case IPPROTO_ROUTING: case IPPROTO_DSTOPTS: { /* get next header and header length */ struct ip6_ext opt6; @@ -6139,6 +6412,10 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, } } while (!terminal); + /* if there's no routing header, use unmodified mbuf for checksumming */ + if (!n) + n = m; + switch (pd.proto) { case IPPROTO_TCP: { @@ -6150,7 +6427,7 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, log = action != PF_PASS; goto done; } - if (dir == PF_IN && pf_check_proto_cksum(m, off, + if (dir == PF_IN && pf_check_proto_cksum(n, off, ntohs(h->ip6_plen) - (off - sizeof(struct ip6_hdr)), IPPROTO_TCP, AF_INET6)) { action = PF_DROP; @@ -6185,7 +6462,7 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, log = action != PF_PASS; goto done; } - if (dir == PF_IN && uh.uh_sum && pf_check_proto_cksum(m, + if (dir == PF_IN && uh.uh_sum && pf_check_proto_cksum(n, off, ntohs(h->ip6_plen) - (off - sizeof(struct ip6_hdr)), IPPROTO_UDP, AF_INET6)) { action = PF_DROP; @@ -6196,6 +6473,7 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, ntohs(uh.uh_ulen) > m->m_pkthdr.len - off || ntohs(uh.uh_ulen) < sizeof(struct udphdr)) { action = PF_DROP; + REASON_SET(&reason, PFRES_SHORT); goto done; } action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd); @@ -6221,7 +6499,7 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, log = action != PF_PASS; goto done; } - if (dir == PF_IN && pf_check_proto_cksum(m, off, + if (dir == PF_IN && pf_check_proto_cksum(n, off, ntohs(h->ip6_plen) - (off - sizeof(struct ip6_hdr)), IPPROTO_ICMPV6, AF_INET6)) { action = PF_DROP; @@ -6259,28 +6537,25 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, } done: - /* XXX handle IPv6 options, if not allowed. not implemented. */ + if (n != m) { + m_freem(n); + n = NULL; + } - if (s && s->tag) - pf_tag_packet(m, pf_get_tag(m), s->tag); + /* XXX handle IPv6 options, if not allowed. not implemented. */ + + if ((s && s->tag) || r->rtableid) + pf_tag_packet(m, pd.pf_mtag, s ? s->tag : 0, r->rtableid); #ifdef ALTQ if (action == PF_PASS && r->qid) { - struct m_tag *mtag; - struct altq_tag *atag; - - mtag = m_tag_get(PACKET_TAG_PF_QID, sizeof(*atag), M_NOWAIT); - if (mtag != NULL) { - atag = (struct altq_tag *)(mtag + 1); - if (pd.tos == IPTOS_LOWDELAY) - atag->qid = r->pqid; - else - atag->qid = r->qid; - /* add hints for ecn */ - atag->af = AF_INET6; - atag->hdr = h; - m_tag_prepend(m, mtag); - } + if (pd.tos & IPTOS_LOWDELAY) + pd.pf_mtag->qid = r->pqid; + else + pd.pf_mtag->qid = r->qid; + /* add hints for ecn */ + pd.pf_mtag->af = AF_INET6; + pd.pf_mtag->hdr = h; } #endif /* ALTQ */ @@ -6288,41 +6563,48 @@ done: pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL && (s->nat_rule.ptr->action == PF_RDR || s->nat_rule.ptr->action == PF_BINAT) && - IN6_IS_ADDR_LOOPBACK(&pd.dst->v6) && - pf_add_mbuf_tag(m, PACKET_TAG_PF_TRANSLATE_LOCALHOST)) { - action = PF_DROP; - REASON_SET(&reason, PFRES_MEMORY); - } + IN6_IS_ADDR_LOOPBACK(&pd.dst->v6)) + pd.pf_mtag->flags |= PF_TAG_TRANSLATE_LOCALHOST; - if (log) - PFLOG_PACKET(kif, h, m, AF_INET6, dir, reason, r, a, ruleset); + if (log) { + struct pf_rule *lr; + + if (s != NULL && s->nat_rule.ptr != NULL && + s->nat_rule.ptr->log & PF_LOG_ALL) + lr = s->nat_rule.ptr; + else + lr = r; + PFLOG_PACKET(kif, h, m, AF_INET6, dir, reason, lr, a, ruleset, + &pd); + } kif->pfik_bytes[1][dir == PF_OUT][action != PF_PASS] += pd.tot_len; kif->pfik_packets[1][dir == PF_OUT][action != PF_PASS]++; if (action == PF_PASS || r->action == PF_DROP) { - r->packets++; - r->bytes += pd.tot_len; + dirndx = (dir == PF_OUT); + r->packets[dirndx]++; + r->bytes[dirndx] += pd.tot_len; if (a != NULL) { - a->packets++; - a->bytes += pd.tot_len; + a->packets[dirndx]++; + a->bytes[dirndx] += pd.tot_len; } if (s != NULL) { + if (s->nat_rule.ptr != NULL) { + s->nat_rule.ptr->packets[dirndx]++; + s->nat_rule.ptr->bytes[dirndx] += pd.tot_len; + } + if (s->src_node != NULL) { + s->src_node->packets[dirndx]++; + s->src_node->bytes[dirndx] += pd.tot_len; + } + if (s->nat_src_node != NULL) { + s->nat_src_node->packets[dirndx]++; + s->nat_src_node->bytes[dirndx] += pd.tot_len; + } dirndx = (dir == s->direction) ? 0 : 1; s->packets[dirndx]++; s->bytes[dirndx] += pd.tot_len; - if (s->nat_rule.ptr != NULL) { - s->nat_rule.ptr->packets++; - s->nat_rule.ptr->bytes += pd.tot_len; - } - if (s->src_node != NULL) { - s->src_node->packets++; - s->src_node->bytes += pd.tot_len; - } - if (s->nat_src_node != NULL) { - s->nat_src_node->packets++; - s->nat_src_node->bytes += pd.tot_len; - } } tr = r; nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule; @@ -6367,7 +6649,7 @@ done: action = PF_PASS; } else if (r->rt) /* pf_route6 can free the mbuf causing *m0 to become NULL */ - pf_route6(m0, r, dir, ifp, s); + pf_route6(m0, r, dir, ifp, s, &pd); return (action); } diff --git a/sys/contrib/pf/net/pf_if.c b/sys/contrib/pf/net/pf_if.c index 26ce0ee731e6..6a15a896317b 100644 --- a/sys/contrib/pf/net/pf_if.c +++ b/sys/contrib/pf/net/pf_if.c @@ -1,6 +1,8 @@ -/* $OpenBSD: pf_if.c,v 1.23 2004/12/22 17:17:55 dhartmei Exp $ */ +/* $OpenBSD: pf_if.c,v 1.46 2006/12/13 09:01:59 itojun Exp $ */ /* + * Copyright 2005 Henning Brauer + * Copyright 2005 Ryan McBride * Copyright (c) 2001 Daniel Hartmeier * Copyright (c) 2003 Cedric Berger * All rights reserved. @@ -55,40 +57,25 @@ #include #endif /* INET6 */ -#define ACCEPT_FLAGS(oklist) \ - do { \ - if ((flags & ~(oklist)) & \ - PFI_FLAG_ALLMASK) \ - return (EINVAL); \ - } while (0) - -#define senderr(e) do { rv = (e); goto _bad; } while (0) - -struct pfi_kif **pfi_index2kif; -struct pfi_kif *pfi_self; -int pfi_indexlim; -struct pfi_ifhead pfi_ifs; +struct pfi_kif *pfi_all = NULL; struct pfi_statehead pfi_statehead; -int pfi_ifcnt; struct pool pfi_addr_pl; +struct pfi_ifhead pfi_ifs; long pfi_update = 1; struct pfr_addr *pfi_buffer; int pfi_buffer_cnt; int pfi_buffer_max; -void pfi_dynaddr_update(void *); -void pfi_kifaddr_update(void *); +void pfi_kif_update(struct pfi_kif *); +void pfi_dynaddr_update(struct pfi_dynaddr *dyn); void pfi_table_update(struct pfr_ktable *, struct pfi_kif *, int, int); +void pfi_kifaddr_update(void *); void pfi_instance_add(struct ifnet *, int, int); void pfi_address_add(struct sockaddr *, int, int); int pfi_if_compare(struct pfi_kif *, struct pfi_kif *); -struct pfi_kif *pfi_if_create(const char *, struct pfi_kif *, int); -void pfi_copy_group(char *, const char *, int); -void pfi_newgroup(const char *, int); -int pfi_skip_if(const char *, struct pfi_kif *, int); +int pfi_skip_if(const char *, struct pfi_kif *); int pfi_unmask(void *); -void pfi_dohooks(struct pfi_kif *); RB_PROTOTYPE(pfi_ifhead, pfi_kif, pfik_tree, pfi_if_compare); RB_GENERATE(pfi_ifhead, pfi_kif, pfik_tree, pfi_if_compare); @@ -99,7 +86,7 @@ RB_GENERATE(pfi_ifhead, pfi_kif, pfik_tree, pfi_if_compare); void pfi_initialize(void) { - if (pfi_self != NULL) /* already initialized */ + if (pfi_all != NULL) /* already initialized */ return; TAILQ_INIT(&pfi_statehead); @@ -108,173 +95,235 @@ pfi_initialize(void) pfi_buffer_max = 64; pfi_buffer = malloc(pfi_buffer_max * sizeof(*pfi_buffer), PFI_MTYPE, M_WAITOK); - pfi_self = pfi_if_create("self", NULL, PFI_IFLAG_GROUP); + + if ((pfi_all = pfi_kif_get(IFG_ALL)) == NULL) + panic("pfi_kif_get for pfi_all failed"); +} + +struct pfi_kif * +pfi_kif_get(const char *kif_name) +{ + struct pfi_kif *kif; + struct pfi_kif_cmp s; + + bzero(&s, sizeof(s)); + strlcpy(s.pfik_name, kif_name, sizeof(s.pfik_name)); + if ((kif = RB_FIND(pfi_ifhead, &pfi_ifs, (struct pfi_kif *)&s)) != NULL) + return (kif); + + /* create new one */ + if ((kif = malloc(sizeof(*kif), PFI_MTYPE, M_DONTWAIT)) == NULL) + return (NULL); + + bzero(kif, sizeof(*kif)); + strlcpy(kif->pfik_name, kif_name, sizeof(kif->pfik_name)); + kif->pfik_tzero = time_second; + TAILQ_INIT(&kif->pfik_dynaddrs); + + RB_INSERT(pfi_ifhead, &pfi_ifs, kif); + return (kif); } void -pfi_attach_clone(struct if_clone *ifc) +pfi_kif_ref(struct pfi_kif *kif, enum pfi_kif_refs what) { - pfi_initialize(); - pfi_newgroup(ifc->ifc_name, PFI_IFLAG_CLONABLE); + switch (what) { + case PFI_KIF_REF_RULE: + kif->pfik_rules++; + break; + case PFI_KIF_REF_STATE: + if (!kif->pfik_states++) + TAILQ_INSERT_TAIL(&pfi_statehead, kif, pfik_w_states); + break; + default: + panic("pfi_kif_ref with unknown type"); + } +} + +void +pfi_kif_unref(struct pfi_kif *kif, enum pfi_kif_refs what) +{ + if (kif == NULL) + return; + + switch (what) { + case PFI_KIF_REF_NONE: + break; + case PFI_KIF_REF_RULE: + if (kif->pfik_rules <= 0) { + printf("pfi_kif_unref: rules refcount <= 0\n"); + return; + } + kif->pfik_rules--; + break; + case PFI_KIF_REF_STATE: + if (kif->pfik_states <= 0) { + printf("pfi_kif_unref: state refcount <= 0\n"); + return; + } + if (!--kif->pfik_states) + TAILQ_REMOVE(&pfi_statehead, kif, pfik_w_states); + break; + default: + panic("pfi_kif_unref with unknown type"); + } + + if (kif->pfik_ifp != NULL || kif->pfik_group != NULL || kif == pfi_all) + return; + + if (kif->pfik_rules || kif->pfik_states) + return; + + RB_REMOVE(pfi_ifhead, &pfi_ifs, kif); + free(kif, PFI_MTYPE); +} + +int +pfi_kif_match(struct pfi_kif *rule_kif, struct pfi_kif *packet_kif) +{ + struct ifg_list *p; + + if (rule_kif == NULL || rule_kif == packet_kif) + return (1); + + if (rule_kif->pfik_group != NULL) + TAILQ_FOREACH(p, &packet_kif->pfik_ifp->if_groups, ifgl_next) + if (p->ifgl_group == rule_kif->pfik_group) + return (1); + + return (0); } void pfi_attach_ifnet(struct ifnet *ifp) { - struct pfi_kif *p, *q, key; - int s; + struct pfi_kif *kif; + int s; pfi_initialize(); s = splsoftnet(); pfi_update++; - if (ifp->if_index >= pfi_indexlim) { - /* - * grow pfi_index2kif, similar to ifindex2ifnet code in if.c - */ - size_t m, n, oldlim; - struct pfi_kif **mp, **np; + if ((kif = pfi_kif_get(ifp->if_xname)) == NULL) + panic("pfi_kif_get failed"); - oldlim = pfi_indexlim; - if (pfi_indexlim == 0) - pfi_indexlim = 64; - while (ifp->if_index >= pfi_indexlim) - pfi_indexlim <<= 1; + kif->pfik_ifp = ifp; + ifp->if_pf_kif = (caddr_t)kif; - m = oldlim * sizeof(struct pfi_kif *); - mp = pfi_index2kif; - n = pfi_indexlim * sizeof(struct pfi_kif *); - np = malloc(n, PFI_MTYPE, M_DONTWAIT); - if (np == NULL) - panic("pfi_attach_ifnet: " - "cannot allocate translation table"); - bzero(np, n); - if (mp != NULL) - bcopy(mp, np, m); - pfi_index2kif = np; - if (mp != NULL) - free(mp, PFI_MTYPE); - } + if ((kif->pfik_ah_cookie = hook_establish(ifp->if_addrhooks, 1, + pfi_kifaddr_update, kif)) == NULL) + panic("pfi_attach_ifnet: cannot allocate '%s' address hook", + ifp->if_xname); - strlcpy(key.pfik_name, ifp->if_xname, sizeof(key.pfik_name)); - p = RB_FIND(pfi_ifhead, &pfi_ifs, &key); - if (p == NULL) { - /* add group */ - pfi_copy_group(key.pfik_name, ifp->if_xname, - sizeof(key.pfik_name)); - q = RB_FIND(pfi_ifhead, &pfi_ifs, &key); - if (q == NULL) - q = pfi_if_create(key.pfik_name, pfi_self, PFI_IFLAG_GROUP); - if (q == NULL) - panic("pfi_attach_ifnet: " - "cannot allocate '%s' group", key.pfik_name); + pfi_kif_update(kif); - /* add interface */ - p = pfi_if_create(ifp->if_xname, q, PFI_IFLAG_INSTANCE); - if (p == NULL) - panic("pfi_attach_ifnet: " - "cannot allocate '%s' interface", ifp->if_xname); - } else - q = p->pfik_parent; - p->pfik_ifp = ifp; - p->pfik_flags |= PFI_IFLAG_ATTACHED; - p->pfik_ah_cookie = - hook_establish(ifp->if_addrhooks, 1, pfi_kifaddr_update, p); - pfi_index2kif[ifp->if_index] = p; - pfi_dohooks(p); splx(s); } void pfi_detach_ifnet(struct ifnet *ifp) { - struct pfi_kif *p, *q, key; - int s; + int s; + struct pfi_kif *kif; - strlcpy(key.pfik_name, ifp->if_xname, sizeof(key.pfik_name)); + if ((kif = (struct pfi_kif *)ifp->if_pf_kif) == NULL) + return; s = splsoftnet(); pfi_update++; - p = RB_FIND(pfi_ifhead, &pfi_ifs, &key); - if (p == NULL) { - printf("pfi_detach_ifnet: cannot find %s", ifp->if_xname); - splx(s); - return; - } - hook_disestablish(p->pfik_ifp->if_addrhooks, p->pfik_ah_cookie); - q = p->pfik_parent; - p->pfik_ifp = NULL; - p->pfik_flags &= ~PFI_IFLAG_ATTACHED; - pfi_index2kif[ifp->if_index] = NULL; - pfi_dohooks(p); - pfi_maybe_destroy(p); + hook_disestablish(ifp->if_addrhooks, kif->pfik_ah_cookie); + pfi_kif_update(kif); + + kif->pfik_ifp = NULL; + ifp->if_pf_kif = NULL; + pfi_kif_unref(kif, PFI_KIF_REF_NONE); splx(s); } -struct pfi_kif * -pfi_lookup_create(const char *name) +void +pfi_attach_ifgroup(struct ifg_group *ifg) { - struct pfi_kif *p, *q, key; + struct pfi_kif *kif; int s; + pfi_initialize(); s = splsoftnet(); - p = pfi_lookup_if(name); - if (p == NULL) { - pfi_copy_group(key.pfik_name, name, sizeof(key.pfik_name)); - q = pfi_lookup_if(key.pfik_name); - if (q == NULL) { - pfi_newgroup(key.pfik_name, PFI_IFLAG_DYNAMIC); - q = pfi_lookup_if(key.pfik_name); - } - p = pfi_lookup_if(name); - if (p == NULL && q != NULL) - p = pfi_if_create(name, q, PFI_IFLAG_INSTANCE); - } + pfi_update++; + if ((kif = pfi_kif_get(ifg->ifg_group)) == NULL) + panic("pfi_kif_get failed"); + + kif->pfik_group = ifg; + ifg->ifg_pf_kif = (caddr_t)kif; + splx(s); - return (p); -} - -struct pfi_kif * -pfi_attach_rule(const char *name) -{ - struct pfi_kif *p; - - p = pfi_lookup_create(name); - if (p != NULL) - p->pfik_rules++; - return (p); } void -pfi_detach_rule(struct pfi_kif *p) +pfi_detach_ifgroup(struct ifg_group *ifg) { - if (p == NULL) + int s; + struct pfi_kif *kif; + + if ((kif = (struct pfi_kif *)ifg->ifg_pf_kif) == NULL) return; - if (p->pfik_rules > 0) - p->pfik_rules--; - else - printf("pfi_detach_rule: reference count at 0\n"); - pfi_maybe_destroy(p); + + s = splsoftnet(); + pfi_update++; + + kif->pfik_group = NULL; + ifg->ifg_pf_kif = NULL; + pfi_kif_unref(kif, PFI_KIF_REF_NONE); + splx(s); } void -pfi_attach_state(struct pfi_kif *p) +pfi_group_change(const char *group) { - if (!p->pfik_states++) - TAILQ_INSERT_TAIL(&pfi_statehead, p, pfik_w_states); + struct pfi_kif *kif; + int s; + + s = splsoftnet(); + pfi_update++; + if ((kif = pfi_kif_get(group)) == NULL) + panic("pfi_kif_get failed"); + + pfi_kif_update(kif); + + splx(s); } -void -pfi_detach_state(struct pfi_kif *p) +int +pfi_match_addr(struct pfi_dynaddr *dyn, struct pf_addr *a, sa_family_t af) { - if (p == NULL) - return; - if (p->pfik_states <= 0) { - printf("pfi_detach_state: reference count <= 0\n"); - return; + switch (af) { +#ifdef INET + case AF_INET: + switch (dyn->pfid_acnt4) { + case 0: + return (0); + case 1: + return (PF_MATCHA(0, &dyn->pfid_addr4, + &dyn->pfid_mask4, a, AF_INET)); + default: + return (pfr_match_addr(dyn->pfid_kt, a, AF_INET)); + } + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + switch (dyn->pfid_acnt6) { + case 0: + return (0); + case 1: + return (PF_MATCHA(0, &dyn->pfid_addr6, + &dyn->pfid_mask6, a, AF_INET6)); + default: + return (pfr_match_addr(dyn->pfid_kt, a, AF_INET6)); + } + break; +#endif /* INET6 */ + default: + return (0); } - if (!--p->pfik_states) - TAILQ_REMOVE(&pfi_statehead, p, pfik_w_states); - pfi_maybe_destroy(p); } int @@ -287,15 +336,20 @@ pfi_dynaddr_setup(struct pf_addr_wrap *aw, sa_family_t af) if (aw->type != PF_ADDR_DYNIFTL) return (0); - dyn = pool_get(&pfi_addr_pl, PR_NOWAIT); - if (dyn == NULL) + if ((dyn = pool_get(&pfi_addr_pl, PR_NOWAIT)) == NULL) return (1); bzero(dyn, sizeof(*dyn)); s = splsoftnet(); - dyn->pfid_kif = pfi_attach_rule(aw->v.ifname); - if (dyn->pfid_kif == NULL) - senderr(1); + if (!strcmp(aw->v.ifname, "self")) + dyn->pfid_kif = pfi_kif_get(IFG_ALL); + else + dyn->pfid_kif = pfi_kif_get(aw->v.ifname); + if (dyn->pfid_kif == NULL) { + rv = 1; + goto _bad; + } + pfi_kif_ref(dyn->pfid_kif, PFI_KIF_REF_RULE); dyn->pfid_net = pfi_unmask(&aw->v.a.mask); if (af == AF_INET && dyn->pfid_net == 32) @@ -312,24 +366,23 @@ pfi_dynaddr_setup(struct pf_addr_wrap *aw, sa_family_t af) if (dyn->pfid_net != 128) snprintf(tblname + strlen(tblname), sizeof(tblname) - strlen(tblname), "/%d", dyn->pfid_net); - ruleset = pf_find_or_create_ruleset(PF_RESERVED_ANCHOR); - if (ruleset == NULL) - senderr(1); + if ((ruleset = pf_find_or_create_ruleset(PF_RESERVED_ANCHOR)) == NULL) { + rv = 1; + goto _bad; + } - dyn->pfid_kt = pfr_attach_table(ruleset, tblname); - if (dyn->pfid_kt == NULL) - senderr(1); + if ((dyn->pfid_kt = pfr_attach_table(ruleset, tblname)) == NULL) { + rv = 1; + goto _bad; + } dyn->pfid_kt->pfrkt_flags |= PFR_TFLAG_ACTIVE; dyn->pfid_iflags = aw->iflags; dyn->pfid_af = af; - dyn->pfid_hook_cookie = hook_establish(dyn->pfid_kif->pfik_ah_head, 1, - pfi_dynaddr_update, dyn); - if (dyn->pfid_hook_cookie == NULL) - senderr(1); + TAILQ_INSERT_TAIL(&dyn->pfid_kif->pfik_dynaddrs, dyn, entry); aw->p.dyn = dyn; - pfi_dynaddr_update(aw->p.dyn); + pfi_kif_update(dyn->pfid_kif); splx(s); return (0); @@ -339,16 +392,32 @@ _bad: if (ruleset != NULL) pf_remove_if_empty_ruleset(ruleset); if (dyn->pfid_kif != NULL) - pfi_detach_rule(dyn->pfid_kif); + pfi_kif_unref(dyn->pfid_kif, PFI_KIF_REF_RULE); pool_put(&pfi_addr_pl, dyn); splx(s); return (rv); } void -pfi_dynaddr_update(void *p) +pfi_kif_update(struct pfi_kif *kif) +{ + struct ifg_list *ifgl; + struct pfi_dynaddr *p; + + /* update all dynaddr */ + TAILQ_FOREACH(p, &kif->pfik_dynaddrs, entry) + pfi_dynaddr_update(p); + + /* again for all groups kif is member of */ + if (kif->pfik_ifp != NULL) + TAILQ_FOREACH(ifgl, &kif->pfik_ifp->if_groups, ifgl_next) + pfi_kif_update((struct pfi_kif *) + ifgl->ifgl_group->ifg_pf_kif); +} + +void +pfi_dynaddr_update(struct pfi_dynaddr *dyn) { - struct pfi_dynaddr *dyn = (struct pfi_dynaddr *)p; struct pfi_kif *kif; struct pfr_ktable *kt; @@ -357,6 +426,7 @@ pfi_dynaddr_update(void *p) kif = dyn->pfid_kif; kt = dyn->pfid_kt; + if (kt->pfrkt_larg != pfi_update) { /* this table needs to be brought up-to-date */ pfi_table_update(kt, kif, dyn->pfid_net, dyn->pfid_iflags); @@ -369,28 +439,18 @@ void pfi_table_update(struct pfr_ktable *kt, struct pfi_kif *kif, int net, int flags) { int e, size2 = 0; - struct pfi_kif *p; - struct pfr_table t; + struct ifg_member *ifgm; - if ((kif->pfik_flags & PFI_IFLAG_INSTANCE) && kif->pfik_ifp == NULL) { - pfr_clr_addrs(&kt->pfrkt_t, NULL, 0); - return; - } pfi_buffer_cnt = 0; - if ((kif->pfik_flags & PFI_IFLAG_INSTANCE)) + + if (kif->pfik_ifp != NULL) pfi_instance_add(kif->pfik_ifp, net, flags); - else if (strcmp(kif->pfik_name, "self")) { - TAILQ_FOREACH(p, &kif->pfik_grouphead, pfik_instances) - pfi_instance_add(p->pfik_ifp, net, flags); - } else { - RB_FOREACH(p, pfi_ifhead, &pfi_ifs) - if (p->pfik_flags & PFI_IFLAG_INSTANCE) - pfi_instance_add(p->pfik_ifp, net, flags); - } - t = kt->pfrkt_t; - t.pfrt_flags = 0; - if ((e = pfr_set_addrs(&t, pfi_buffer, pfi_buffer_cnt, &size2, - NULL, NULL, NULL, 0))) + else if (kif->pfik_group != NULL) + TAILQ_FOREACH(ifgm, &kif->pfik_group->ifg_members, ifgm_next) + pfi_instance_add(ifgm->ifgm_ifp, net, flags); + + if ((e = pfr_set_addrs(&kt->pfrkt_t, pfi_buffer, pfi_buffer_cnt, &size2, + NULL, NULL, NULL, 0, PFR_TFLAG_ALLMASK))) printf("pfi_table_update: cannot set %d new addresses " "into table %s: %d\n", pfi_buffer_cnt, kt->pfrkt_name, e); } @@ -434,13 +494,12 @@ pfi_instance_add(struct ifnet *ifp, int net, int flags) got6 = 1; net2 = net; if (net2 == 128 && (flags & PFI_AFLAG_NETWORK)) { - if (af == AF_INET) { + if (af == AF_INET) net2 = pfi_unmask(&((struct sockaddr_in *) ia->ifa_netmask)->sin_addr); - } else if (af == AF_INET6) { + else if (af == AF_INET6) net2 = pfi_unmask(&((struct sockaddr_in6 *) ia->ifa_netmask)->sin6_addr); - } } if (af == AF_INET && net2 > 32) net2 = 32; @@ -488,9 +547,9 @@ pfi_address_add(struct sockaddr *sa, int af, int net) p->pfra_net = net; if (af == AF_INET) p->pfra_ip4addr = ((struct sockaddr_in *)sa)->sin_addr; - if (af == AF_INET6) { + else if (af == AF_INET6) { p->pfra_ip6addr = ((struct sockaddr_in6 *)sa)->sin6_addr; - if (IN6_IS_ADDR_LINKLOCAL(&p->pfra_ip6addr)) + if (IN6_IS_SCOPE_EMBED(&p->pfra_ip6addr)) p->pfra_ip6addr.s6_addr16[1] = 0; } /* mask network address bits */ @@ -510,9 +569,8 @@ pfi_dynaddr_remove(struct pf_addr_wrap *aw) return; s = splsoftnet(); - hook_disestablish(aw->p.dyn->pfid_kif->pfik_ah_head, - aw->p.dyn->pfid_hook_cookie); - pfi_detach_rule(aw->p.dyn->pfid_kif); + TAILQ_REMOVE(&aw->p.dyn->pfid_kif->pfik_dynaddrs, aw->p.dyn, entry); + pfi_kif_unref(aw->p.dyn->pfid_kif, PFI_KIF_REF_RULE); aw->p.dyn->pfid_kif = NULL; pfr_detach_table(aw->p.dyn->pfid_kt); aw->p.dyn->pfid_kt = NULL; @@ -533,11 +591,12 @@ pfi_dynaddr_copyout(struct pf_addr_wrap *aw) void pfi_kifaddr_update(void *v) { - int s; + int s; + struct pfi_kif *kif = (struct pfi_kif *)v; s = splsoftnet(); pfi_update++; - pfi_dohooks(v); + pfi_kif_update(kif); splx(s); } @@ -547,107 +606,16 @@ pfi_if_compare(struct pfi_kif *p, struct pfi_kif *q) return (strncmp(p->pfik_name, q->pfik_name, IFNAMSIZ)); } -struct pfi_kif * -pfi_if_create(const char *name, struct pfi_kif *q, int flags) -{ - struct pfi_kif *p; - - p = malloc(sizeof(*p), PFI_MTYPE, M_DONTWAIT); - if (p == NULL) - return (NULL); - bzero(p, sizeof(*p)); - p->pfik_ah_head = malloc(sizeof(*p->pfik_ah_head), PFI_MTYPE, - M_DONTWAIT); - if (p->pfik_ah_head == NULL) { - free(p, PFI_MTYPE); - return (NULL); - } - bzero(p->pfik_ah_head, sizeof(*p->pfik_ah_head)); - TAILQ_INIT(p->pfik_ah_head); - TAILQ_INIT(&p->pfik_grouphead); - strlcpy(p->pfik_name, name, sizeof(p->pfik_name)); - RB_INIT(&p->pfik_lan_ext); - RB_INIT(&p->pfik_ext_gwy); - p->pfik_flags = flags; - p->pfik_parent = q; - p->pfik_tzero = time_second; - - RB_INSERT(pfi_ifhead, &pfi_ifs, p); - if (q != NULL) { - q->pfik_addcnt++; - TAILQ_INSERT_TAIL(&q->pfik_grouphead, p, pfik_instances); - } - pfi_ifcnt++; - return (p); -} - -int -pfi_maybe_destroy(struct pfi_kif *p) -{ - int i, j, k, s; - struct pfi_kif *q = p->pfik_parent; - - if ((p->pfik_flags & (PFI_IFLAG_ATTACHED | PFI_IFLAG_GROUP)) || - p->pfik_rules > 0 || p->pfik_states > 0) - return (0); - - s = splsoftnet(); - if (q != NULL) { - for (i = 0; i < 2; i++) - for (j = 0; j < 2; j++) - for (k = 0; k < 2; k++) { - q->pfik_bytes[i][j][k] += - p->pfik_bytes[i][j][k]; - q->pfik_packets[i][j][k] += - p->pfik_packets[i][j][k]; - } - q->pfik_delcnt++; - TAILQ_REMOVE(&q->pfik_grouphead, p, pfik_instances); - } - pfi_ifcnt--; - RB_REMOVE(pfi_ifhead, &pfi_ifs, p); - splx(s); - - free(p->pfik_ah_head, PFI_MTYPE); - free(p, PFI_MTYPE); - return (1); -} - -void -pfi_copy_group(char *p, const char *q, int m) -{ - while (m > 1 && *q && !(*q >= '0' && *q <= '9')) { - *p++ = *q++; - m--; - } - if (m > 0) - *p++ = '\0'; -} - -void -pfi_newgroup(const char *name, int flags) -{ - struct pfi_kif *p; - - p = pfi_lookup_if(name); - if (p == NULL) - p = pfi_if_create(name, pfi_self, PFI_IFLAG_GROUP); - if (p == NULL) { - printf("pfi_newgroup: cannot allocate '%s' group", name); - return; - } - p->pfik_flags |= flags; -} - void pfi_fill_oldstatus(struct pf_status *pfs) { - struct pfi_kif *p, key; - int i, j, k, s; + struct pfi_kif *p; + struct pfi_kif_cmp key; + int i, j, k, s; strlcpy(key.pfik_name, pfs->ifname, sizeof(key.pfik_name)); s = splsoftnet(); - p = RB_FIND(pfi_ifhead, &pfi_ifs, &key); + p = RB_FIND(pfi_ifhead, &pfi_ifs, (struct pfi_kif *)&key); if (p == NULL) { splx(s); return; @@ -666,40 +634,81 @@ pfi_fill_oldstatus(struct pf_status *pfs) } int -pfi_clr_istats(const char *name, int *nzero, int flags) +pfi_clr_istats(const char *name) { struct pfi_kif *p; - int n = 0, s; - long tzero = time_second; + int s; - ACCEPT_FLAGS(PFI_FLAG_GROUP|PFI_FLAG_INSTANCE); s = splsoftnet(); RB_FOREACH(p, pfi_ifhead, &pfi_ifs) { - if (pfi_skip_if(name, p, flags)) + if (pfi_skip_if(name, p)) continue; bzero(p->pfik_packets, sizeof(p->pfik_packets)); bzero(p->pfik_bytes, sizeof(p->pfik_bytes)); - p->pfik_tzero = tzero; - n++; + p->pfik_tzero = time_second; } splx(s); - if (nzero != NULL) - *nzero = n; + return (0); } +int +pfi_get_ifaces(const char *name, struct pfi_kif *buf, int *size) +{ + struct pfi_kif *p, *nextp; + int s, n = 0; + + s = splsoftnet(); + for (p = RB_MIN(pfi_ifhead, &pfi_ifs); p; p = nextp) { + nextp = RB_NEXT(pfi_ifhead, &pfi_ifs, p); + if (pfi_skip_if(name, p)) + continue; + if (*size > n++) { + if (!p->pfik_tzero) + p->pfik_tzero = time_second; + pfi_kif_ref(p, PFI_KIF_REF_RULE); + if (copyout(p, buf++, sizeof(*buf))) { + pfi_kif_unref(p, PFI_KIF_REF_RULE); + splx(s); + return (EFAULT); + } + nextp = RB_NEXT(pfi_ifhead, &pfi_ifs, p); + pfi_kif_unref(p, PFI_KIF_REF_RULE); + } + } + splx(s); + *size = n; + return (0); +} + +int +pfi_skip_if(const char *filter, struct pfi_kif *p) +{ + int n; + + if (filter == NULL || !*filter) + return (0); + if (!strcmp(p->pfik_name, filter)) + return (0); /* exact match */ + n = strlen(filter); + if (n < 1 || n >= IFNAMSIZ) + return (1); /* sanity check */ + if (filter[n-1] >= '0' && filter[n-1] <= '9') + return (1); /* only do exact match in that case */ + if (strncmp(p->pfik_name, filter, n)) + return (1); /* prefix doesn't match */ + return (p->pfik_name[n] < '0' || p->pfik_name[n] > '9'); +} + int pfi_set_flags(const char *name, int flags) { struct pfi_kif *p; int s; - if (flags & ~PFI_IFLAG_SETABLE_MASK) - return (EINVAL); - s = splsoftnet(); RB_FOREACH(p, pfi_ifhead, &pfi_ifs) { - if (pfi_skip_if(name, p, PFI_FLAG_GROUP|PFI_FLAG_INSTANCE)) + if (pfi_skip_if(name, p)) continue; p->pfik_flags |= flags; } @@ -713,12 +722,9 @@ pfi_clear_flags(const char *name, int flags) struct pfi_kif *p; int s; - if (flags & ~PFI_IFLAG_SETABLE_MASK) - return (EINVAL); - s = splsoftnet(); RB_FOREACH(p, pfi_ifhead, &pfi_ifs) { - if (pfi_skip_if(name, p, PFI_FLAG_GROUP|PFI_FLAG_INSTANCE)) + if (pfi_skip_if(name, p)) continue; p->pfik_flags &= ~flags; } @@ -726,64 +732,6 @@ pfi_clear_flags(const char *name, int flags) return (0); } -int -pfi_get_ifaces(const char *name, struct pfi_if *buf, int *size, int flags) -{ - struct pfi_kif *p; - int s, n = 0; - - ACCEPT_FLAGS(PFI_FLAG_GROUP|PFI_FLAG_INSTANCE); - s = splsoftnet(); - RB_FOREACH(p, pfi_ifhead, &pfi_ifs) { - if (pfi_skip_if(name, p, flags)) - continue; - if (*size > n++) { - if (!p->pfik_tzero) - p->pfik_tzero = time_second; - if (copyout(p, buf++, sizeof(*buf))) { - splx(s); - return (EFAULT); - } - } - } - splx(s); - *size = n; - return (0); -} - -struct pfi_kif * -pfi_lookup_if(const char *name) -{ - struct pfi_kif *p, key; - - strlcpy(key.pfik_name, name, sizeof(key.pfik_name)); - p = RB_FIND(pfi_ifhead, &pfi_ifs, &key); - return (p); -} - -int -pfi_skip_if(const char *filter, struct pfi_kif *p, int f) -{ - int n; - - if ((p->pfik_flags & PFI_IFLAG_GROUP) && !(f & PFI_FLAG_GROUP)) - return (1); - if ((p->pfik_flags & PFI_IFLAG_INSTANCE) && !(f & PFI_FLAG_INSTANCE)) - return (1); - if (filter == NULL || !*filter) - return (0); - if (!strcmp(p->pfik_name, filter)) - return (0); /* exact match */ - n = strlen(filter); - if (n < 1 || n >= IFNAMSIZ) - return (1); /* sanity check */ - if (filter[n-1] >= '0' && filter[n-1] <= '9') - return (1); /* only do exact match in that case */ - if (strncmp(p->pfik_name, filter, n)) - return (1); /* prefix doesn't match */ - return (p->pfik_name[n] < '0' || p->pfik_name[n] > '9'); -} - /* from pf_print_state.c */ int pfi_unmask(void *addr) @@ -804,44 +752,3 @@ pfi_unmask(void *addr) return (b); } -void -pfi_dohooks(struct pfi_kif *p) -{ - for (; p != NULL; p = p->pfik_parent) - dohooks(p->pfik_ah_head, 0); -} - -int -pfi_match_addr(struct pfi_dynaddr *dyn, struct pf_addr *a, sa_family_t af) -{ - switch (af) { -#ifdef INET - case AF_INET: - switch (dyn->pfid_acnt4) { - case 0: - return (0); - case 1: - return (PF_MATCHA(0, &dyn->pfid_addr4, - &dyn->pfid_mask4, a, AF_INET)); - default: - return (pfr_match_addr(dyn->pfid_kt, a, AF_INET)); - } - break; -#endif /* INET */ -#ifdef INET6 - case AF_INET6: - switch (dyn->pfid_acnt6) { - case 0: - return (0); - case 1: - return (PF_MATCHA(0, &dyn->pfid_addr6, - &dyn->pfid_mask6, a, AF_INET6)); - default: - return (pfr_match_addr(dyn->pfid_kt, a, AF_INET6)); - } - break; -#endif /* INET6 */ - default: - return (0); - } -} diff --git a/sys/contrib/pf/net/pf_ioctl.c b/sys/contrib/pf/net/pf_ioctl.c index d4cb3c7b3ddd..5694717f609d 100644 --- a/sys/contrib/pf/net/pf_ioctl.c +++ b/sys/contrib/pf/net/pf_ioctl.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pf_ioctl.c,v 1.139 2005/03/03 07:13:39 dhartmei Exp $ */ +/* $OpenBSD: pf_ioctl.c,v 1.175 2007/02/26 22:47:43 deraadt Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -48,7 +48,11 @@ #include #include #include +#include #include +#include +#include +#include #include #include @@ -62,12 +66,17 @@ #include #include +#include #include #if NPFSYNC > 0 #include #endif /* NPFSYNC > 0 */ +#if NPFLOG > 0 +#include +#endif /* NPFLOG > 0 */ + #ifdef INET6 #include #include @@ -78,17 +87,11 @@ #endif void pfattach(int); +void pf_thread_create(void *); int pfopen(dev_t, int, int, struct proc *); int pfclose(dev_t, int, int, struct proc *); struct pf_pool *pf_get_pool(char *, u_int32_t, u_int8_t, u_int32_t, u_int8_t, u_int8_t, u_int8_t); -int pf_get_ruleset_number(u_int8_t); -void pf_init_ruleset(struct pf_ruleset *); -int pf_anchor_setup(struct pf_rule *, - const struct pf_ruleset *, const char *); -int pf_anchor_copyout(const struct pf_ruleset *, - const struct pf_rule *, struct pfioc_rule *); -void pf_anchor_remove(struct pf_rule *); void pf_mv_pool(struct pf_palist *, struct pf_palist *); void pf_empty_pool(struct pf_palist *); @@ -102,11 +105,13 @@ int pf_disable_altq(struct pf_altq *); #endif /* ALTQ */ int pf_begin_rules(u_int32_t *, int, const char *); int pf_rollback_rules(u_int32_t, int, char *); +int pf_setup_pfsync_matching(struct pf_ruleset *); +void pf_hash_rule(MD5_CTX *, struct pf_rule *); +void pf_hash_rule_addr(MD5_CTX *, struct pf_rule_addr *); int pf_commit_rules(u_int32_t, int, char *); -extern struct timeout pf_expire_to; - struct pf_rule pf_default_rule; +struct rwlock pf_consistency_lock = RWLOCK_INITIALIZER; #ifdef ALTQ static int pf_altq_running; #endif @@ -118,9 +123,9 @@ TAILQ_HEAD(pf_tags, pf_tagname) pf_tags = TAILQ_HEAD_INITIALIZER(pf_tags), #if (PF_QNAME_SIZE != PF_TAG_NAME_SIZE) #error PF_QNAME_SIZE must be equal to PF_TAG_NAME_SIZE #endif -static u_int16_t tagname2tag(struct pf_tags *, char *); -static void tag2tagname(struct pf_tags *, u_int16_t, char *); -static void tag_unref(struct pf_tags *, u_int16_t); +u_int16_t tagname2tag(struct pf_tags *, char *); +void tag2tagname(struct pf_tags *, u_int16_t, char *); +void tag_unref(struct pf_tags *, u_int16_t); int pf_rtlabel_add(struct pf_addr_wrap *); void pf_rtlabel_remove(struct pf_addr_wrap *); void pf_rtlabel_copyout(struct pf_addr_wrap *); @@ -149,6 +154,10 @@ pfattach(int num) pool_sethardlimit(pf_pool_limits[PF_LIMIT_STATES].pp, pf_pool_limits[PF_LIMIT_STATES].limit, NULL, 0); + if (ctob(physmem) <= 100*1024*1024) + pf_pool_limits[PF_LIMIT_TABLE_ENTRIES].limit = + PFR_KENTRY_HIWAT_SMALL; + RB_INIT(&tree_src_tracking); RB_INIT(&pf_anchors); pf_init_ruleset(&pf_main_ruleset); @@ -157,12 +166,13 @@ pfattach(int num) TAILQ_INIT(&pf_pabuf); pf_altqs_active = &pf_altqs[0]; pf_altqs_inactive = &pf_altqs[1]; - TAILQ_INIT(&state_updates); + TAILQ_INIT(&state_list); /* default rule should never be garbage collected */ pf_default_rule.entries.tqe_prev = &pf_default_rule.entries.tqe_next; pf_default_rule.action = PF_PASS; pf_default_rule.nr = -1; + pf_default_rule.rtableid = -1; /* initialize default timeouts */ timeout[PFTM_TCP_FIRST_PACKET] = PFTM_TCP_FIRST_PACKET_VAL; @@ -183,9 +193,8 @@ pfattach(int num) timeout[PFTM_INTERVAL] = PFTM_INTERVAL_VAL; timeout[PFTM_SRC_NODE] = PFTM_SRC_NODE_VAL; timeout[PFTM_TS_DIFF] = PFTM_TS_DIFF_VAL; - - timeout_set(&pf_expire_to, pf_purge_timeout, &pf_expire_to); - timeout_add(&pf_expire_to, timeout[PFTM_INTERVAL] * hz); + timeout[PFTM_ADAPTIVE_START] = PFSTATE_ADAPT_START; + timeout[PFTM_ADAPTIVE_END] = PFSTATE_ADAPT_END; pf_normalize_init(); bzero(&pf_status, sizeof(pf_status)); @@ -193,6 +202,16 @@ pfattach(int num) /* XXX do our best to avoid a conflict */ pf_status.hostid = arc4random(); + + /* require process context to purge states, so perform in a thread */ + kthread_create_deferred(pf_thread_create, NULL); +} + +void +pf_thread_create(void *v) +{ + if (kthread_create(pf_purge_thread, NULL, NULL, "pfpurge")) + panic("pfpurge thread"); } int @@ -255,292 +274,6 @@ pf_get_pool(char *anchor, u_int32_t ticket, u_int8_t rule_action, return (&rule->rpool); } -int -pf_get_ruleset_number(u_int8_t action) -{ - switch (action) { - case PF_SCRUB: - case PF_NOSCRUB: - return (PF_RULESET_SCRUB); - break; - case PF_PASS: - case PF_DROP: - return (PF_RULESET_FILTER); - break; - case PF_NAT: - case PF_NONAT: - return (PF_RULESET_NAT); - break; - case PF_BINAT: - case PF_NOBINAT: - return (PF_RULESET_BINAT); - break; - case PF_RDR: - case PF_NORDR: - return (PF_RULESET_RDR); - break; - default: - return (PF_RULESET_MAX); - break; - } -} - -void -pf_init_ruleset(struct pf_ruleset *ruleset) -{ - int i; - - memset(ruleset, 0, sizeof(struct pf_ruleset)); - for (i = 0; i < PF_RULESET_MAX; i++) { - TAILQ_INIT(&ruleset->rules[i].queues[0]); - TAILQ_INIT(&ruleset->rules[i].queues[1]); - ruleset->rules[i].active.ptr = &ruleset->rules[i].queues[0]; - ruleset->rules[i].inactive.ptr = &ruleset->rules[i].queues[1]; - } -} - -struct pf_anchor * -pf_find_anchor(const char *path) -{ - static struct pf_anchor key; - - memset(&key, 0, sizeof(key)); - strlcpy(key.path, path, sizeof(key.path)); - return (RB_FIND(pf_anchor_global, &pf_anchors, &key)); -} - -struct pf_ruleset * -pf_find_ruleset(const char *path) -{ - struct pf_anchor *anchor; - - while (*path == '/') - path++; - if (!*path) - return (&pf_main_ruleset); - anchor = pf_find_anchor(path); - if (anchor == NULL) - return (NULL); - else - return (&anchor->ruleset); -} - -struct pf_ruleset * -pf_find_or_create_ruleset(const char *path) -{ - static char p[MAXPATHLEN]; - char *q, *r; - struct pf_ruleset *ruleset; - struct pf_anchor *anchor, *dup, *parent = NULL; - - while (*path == '/') - path++; - ruleset = pf_find_ruleset(path); - if (ruleset != NULL) - return (ruleset); - strlcpy(p, path, sizeof(p)); - while (parent == NULL && (q = strrchr(p, '/')) != NULL) { - *q = 0; - if ((ruleset = pf_find_ruleset(p)) != NULL) { - parent = ruleset->anchor; - break; - } - } - if (q == NULL) - q = p; - else - q++; - strlcpy(p, path, sizeof(p)); - if (!*q) - return (NULL); - while ((r = strchr(q, '/')) != NULL || *q) { - if (r != NULL) - *r = 0; - if (!*q || strlen(q) >= PF_ANCHOR_NAME_SIZE || - (parent != NULL && strlen(parent->path) >= - MAXPATHLEN - PF_ANCHOR_NAME_SIZE - 1)) - return (NULL); - anchor = (struct pf_anchor *)malloc(sizeof(*anchor), M_TEMP, - M_NOWAIT); - if (anchor == NULL) - return (NULL); - memset(anchor, 0, sizeof(*anchor)); - RB_INIT(&anchor->children); - strlcpy(anchor->name, q, sizeof(anchor->name)); - if (parent != NULL) { - strlcpy(anchor->path, parent->path, - sizeof(anchor->path)); - strlcat(anchor->path, "/", sizeof(anchor->path)); - } - strlcat(anchor->path, anchor->name, sizeof(anchor->path)); - if ((dup = RB_INSERT(pf_anchor_global, &pf_anchors, anchor)) != - NULL) { - printf("pf_find_or_create_ruleset: RB_INSERT1 " - "'%s' '%s' collides with '%s' '%s'\n", - anchor->path, anchor->name, dup->path, dup->name); - free(anchor, M_TEMP); - return (NULL); - } - if (parent != NULL) { - anchor->parent = parent; - if ((dup = RB_INSERT(pf_anchor_node, &parent->children, - anchor)) != NULL) { - printf("pf_find_or_create_ruleset: " - "RB_INSERT2 '%s' '%s' collides with " - "'%s' '%s'\n", anchor->path, anchor->name, - dup->path, dup->name); - RB_REMOVE(pf_anchor_global, &pf_anchors, - anchor); - free(anchor, M_TEMP); - return (NULL); - } - } - pf_init_ruleset(&anchor->ruleset); - anchor->ruleset.anchor = anchor; - parent = anchor; - if (r != NULL) - q = r + 1; - else - *q = 0; - } - return (&anchor->ruleset); -} - -void -pf_remove_if_empty_ruleset(struct pf_ruleset *ruleset) -{ - struct pf_anchor *parent; - int i; - - while (ruleset != NULL) { - if (ruleset == &pf_main_ruleset || ruleset->anchor == NULL || - !RB_EMPTY(&ruleset->anchor->children) || - ruleset->anchor->refcnt > 0 || ruleset->tables > 0 || - ruleset->topen) - return; - for (i = 0; i < PF_RULESET_MAX; ++i) - if (!TAILQ_EMPTY(ruleset->rules[i].active.ptr) || - !TAILQ_EMPTY(ruleset->rules[i].inactive.ptr) || - ruleset->rules[i].inactive.open) - return; - RB_REMOVE(pf_anchor_global, &pf_anchors, ruleset->anchor); - if ((parent = ruleset->anchor->parent) != NULL) - RB_REMOVE(pf_anchor_node, &parent->children, - ruleset->anchor); - free(ruleset->anchor, M_TEMP); - if (parent == NULL) - return; - ruleset = &parent->ruleset; - } -} - -int -pf_anchor_setup(struct pf_rule *r, const struct pf_ruleset *s, - const char *name) -{ - static char *p, path[MAXPATHLEN]; - struct pf_ruleset *ruleset; - - r->anchor = NULL; - r->anchor_relative = 0; - r->anchor_wildcard = 0; - if (!name[0]) - return (0); - if (name[0] == '/') - strlcpy(path, name + 1, sizeof(path)); - else { - /* relative path */ - r->anchor_relative = 1; - if (s->anchor == NULL || !s->anchor->path[0]) - path[0] = 0; - else - strlcpy(path, s->anchor->path, sizeof(path)); - while (name[0] == '.' && name[1] == '.' && name[2] == '/') { - if (!path[0]) { - printf("pf_anchor_setup: .. beyond root\n"); - return (1); - } - if ((p = strrchr(path, '/')) != NULL) - *p = 0; - else - path[0] = 0; - r->anchor_relative++; - name += 3; - } - if (path[0]) - strlcat(path, "/", sizeof(path)); - strlcat(path, name, sizeof(path)); - } - if ((p = strrchr(path, '/')) != NULL && !strcmp(p, "/*")) { - r->anchor_wildcard = 1; - *p = 0; - } - ruleset = pf_find_or_create_ruleset(path); - if (ruleset == NULL || ruleset->anchor == NULL) { - printf("pf_anchor_setup: ruleset\n"); - return (1); - } - r->anchor = ruleset->anchor; - r->anchor->refcnt++; - return (0); -} - -int -pf_anchor_copyout(const struct pf_ruleset *rs, const struct pf_rule *r, - struct pfioc_rule *pr) -{ - pr->anchor_call[0] = 0; - if (r->anchor == NULL) - return (0); - if (!r->anchor_relative) { - strlcpy(pr->anchor_call, "/", sizeof(pr->anchor_call)); - strlcat(pr->anchor_call, r->anchor->path, - sizeof(pr->anchor_call)); - } else { - char a[MAXPATHLEN], b[MAXPATHLEN], *p; - int i; - - if (rs->anchor == NULL) - a[0] = 0; - else - strlcpy(a, rs->anchor->path, sizeof(a)); - strlcpy(b, r->anchor->path, sizeof(b)); - for (i = 1; i < r->anchor_relative; ++i) { - if ((p = strrchr(a, '/')) == NULL) - p = a; - *p = 0; - strlcat(pr->anchor_call, "../", - sizeof(pr->anchor_call)); - } - if (strncmp(a, b, strlen(a))) { - printf("pf_anchor_copyout: '%s' '%s'\n", a, b); - return (1); - } - if (strlen(b) > strlen(a)) - strlcat(pr->anchor_call, b + (a[0] ? strlen(a) + 1 : 0), - sizeof(pr->anchor_call)); - } - if (r->anchor_wildcard) - strlcat(pr->anchor_call, pr->anchor_call[0] ? "/*" : "*", - sizeof(pr->anchor_call)); - return (0); -} - -void -pf_anchor_remove(struct pf_rule *r) -{ - if (r->anchor == NULL) - return; - if (r->anchor->refcnt <= 0) { - printf("pf_anchor_remove: broken refcount"); - r->anchor = NULL; - return; - } - if (!--r->anchor->refcnt) - pf_remove_if_empty_ruleset(&r->anchor->ruleset); - r->anchor = NULL; -} - void pf_mv_pool(struct pf_palist *poola, struct pf_palist *poolb) { @@ -560,7 +293,7 @@ pf_empty_pool(struct pf_palist *poola) while ((empty_pool_pa = TAILQ_FIRST(poola)) != NULL) { pfi_dynaddr_remove(&empty_pool_pa->addr); pf_tbladdr_remove(&empty_pool_pa->addr); - pfi_detach_rule(empty_pool_pa->kif); + pfi_kif_unref(empty_pool_pa->kif, PFI_KIF_REF_RULE); TAILQ_REMOVE(poola, empty_pool_pa, entries); pool_put(&pf_pooladdr_pl, empty_pool_pa); } @@ -606,13 +339,13 @@ pf_rm_rule(struct pf_rulequeue *rulequeue, struct pf_rule *rule) if (rule->overload_tbl) pfr_detach_table(rule->overload_tbl); } - pfi_detach_rule(rule->kif); + pfi_kif_unref(rule->kif, PFI_KIF_REF_RULE); pf_anchor_remove(rule); pf_empty_pool(&rule->rpool.list); pool_put(&pf_rule_pl, rule); } -static u_int16_t +u_int16_t tagname2tag(struct pf_tags *head, char *tagname) { struct pf_tagname *tag, *p = NULL; @@ -657,7 +390,7 @@ tagname2tag(struct pf_tags *head, char *tagname) return (tag->tag); } -static void +void tag2tagname(struct pf_tags *head, u_int16_t tagid, char *p) { struct pf_tagname *tag; @@ -669,7 +402,7 @@ tag2tagname(struct pf_tags *head, u_int16_t tagid, char *p) } } -static void +void tag_unref(struct pf_tags *head, u_int16_t tag) { struct pf_tagname *p, *next; @@ -698,7 +431,7 @@ pf_tagname2tag(char *tagname) void pf_tag2tagname(u_int16_t tagid, char *p) { - return (tag2tagname(&pf_tags, tagid, p)); + tag2tagname(&pf_tags, tagid, p); } void @@ -716,7 +449,7 @@ pf_tag_ref(u_int16_t tag) void pf_tag_unref(u_int16_t tag) { - return (tag_unref(&pf_tags, tag)); + tag_unref(&pf_tags, tag); } int @@ -760,13 +493,13 @@ pf_qname2qid(char *qname) void pf_qid2qname(u_int32_t qid, char *p) { - return (tag2tagname(&pf_qids, (u_int16_t)qid, p)); + tag2tagname(&pf_qids, (u_int16_t)qid, p); } void pf_qid_unref(u_int32_t qid) { - return (tag_unref(&pf_qids, (u_int16_t)qid)); + tag_unref(&pf_qids, (u_int16_t)qid); } int @@ -885,7 +618,7 @@ pf_enable_altq(struct pf_altq *altq) if (error == 0 && ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) { tb.rate = altq->ifbandwidth; tb.depth = altq->tbrsize; - s = splimp(); + s = splnet(); error = tbr_set(&ifp->if_snd, &tb); splx(s); } @@ -915,7 +648,7 @@ pf_disable_altq(struct pf_altq *altq) if (error == 0) { /* clear tokenbucket regulator */ tb.rate = 0; - s = splimp(); + s = splnet(); error = tbr_set(&ifp->if_snd, &tb); splx(s); } @@ -935,8 +668,10 @@ pf_begin_rules(u_int32_t *ticket, int rs_num, const char *anchor) rs = pf_find_or_create_ruleset(anchor); if (rs == NULL) return (EINVAL); - while ((rule = TAILQ_FIRST(rs->rules[rs_num].inactive.ptr)) != NULL) + while ((rule = TAILQ_FIRST(rs->rules[rs_num].inactive.ptr)) != NULL) { pf_rm_rule(rs->rules[rs_num].inactive.ptr, rule); + rs->rules[rs_num].inactive.rcount--; + } *ticket = ++rs->rules[rs_num].inactive.ticket; rs->rules[rs_num].inactive.open = 1; return (0); @@ -954,19 +689,105 @@ pf_rollback_rules(u_int32_t ticket, int rs_num, char *anchor) if (rs == NULL || !rs->rules[rs_num].inactive.open || rs->rules[rs_num].inactive.ticket != ticket) return (0); - while ((rule = TAILQ_FIRST(rs->rules[rs_num].inactive.ptr)) != NULL) + while ((rule = TAILQ_FIRST(rs->rules[rs_num].inactive.ptr)) != NULL) { pf_rm_rule(rs->rules[rs_num].inactive.ptr, rule); + rs->rules[rs_num].inactive.rcount--; + } rs->rules[rs_num].inactive.open = 0; return (0); } +#define PF_MD5_UPD(st, elm) \ + MD5Update(ctx, (u_int8_t *) &(st)->elm, sizeof((st)->elm)) + +#define PF_MD5_UPD_STR(st, elm) \ + MD5Update(ctx, (u_int8_t *) (st)->elm, strlen((st)->elm)) + +#define PF_MD5_UPD_HTONL(st, elm, stor) do { \ + (stor) = htonl((st)->elm); \ + MD5Update(ctx, (u_int8_t *) &(stor), sizeof(u_int32_t));\ +} while (0) + +#define PF_MD5_UPD_HTONS(st, elm, stor) do { \ + (stor) = htons((st)->elm); \ + MD5Update(ctx, (u_int8_t *) &(stor), sizeof(u_int16_t));\ +} while (0) + +void +pf_hash_rule_addr(MD5_CTX *ctx, struct pf_rule_addr *pfr) +{ + PF_MD5_UPD(pfr, addr.type); + switch (pfr->addr.type) { + case PF_ADDR_DYNIFTL: + PF_MD5_UPD(pfr, addr.v.ifname); + PF_MD5_UPD(pfr, addr.iflags); + break; + case PF_ADDR_TABLE: + PF_MD5_UPD(pfr, addr.v.tblname); + break; + case PF_ADDR_ADDRMASK: + /* XXX ignore af? */ + PF_MD5_UPD(pfr, addr.v.a.addr.addr32); + PF_MD5_UPD(pfr, addr.v.a.mask.addr32); + break; + case PF_ADDR_RTLABEL: + PF_MD5_UPD(pfr, addr.v.rtlabelname); + break; + } + + PF_MD5_UPD(pfr, port[0]); + PF_MD5_UPD(pfr, port[1]); + PF_MD5_UPD(pfr, neg); + PF_MD5_UPD(pfr, port_op); +} + +void +pf_hash_rule(MD5_CTX *ctx, struct pf_rule *rule) +{ + u_int16_t x; + u_int32_t y; + + pf_hash_rule_addr(ctx, &rule->src); + pf_hash_rule_addr(ctx, &rule->dst); + PF_MD5_UPD_STR(rule, label); + PF_MD5_UPD_STR(rule, ifname); + PF_MD5_UPD_STR(rule, match_tagname); + PF_MD5_UPD_HTONS(rule, match_tag, x); /* dup? */ + PF_MD5_UPD_HTONL(rule, os_fingerprint, y); + PF_MD5_UPD_HTONL(rule, prob, y); + PF_MD5_UPD_HTONL(rule, uid.uid[0], y); + PF_MD5_UPD_HTONL(rule, uid.uid[1], y); + PF_MD5_UPD(rule, uid.op); + PF_MD5_UPD_HTONL(rule, gid.gid[0], y); + PF_MD5_UPD_HTONL(rule, gid.gid[1], y); + PF_MD5_UPD(rule, gid.op); + PF_MD5_UPD_HTONL(rule, rule_flag, y); + PF_MD5_UPD(rule, action); + PF_MD5_UPD(rule, direction); + PF_MD5_UPD(rule, af); + PF_MD5_UPD(rule, quick); + PF_MD5_UPD(rule, ifnot); + PF_MD5_UPD(rule, match_tag_not); + PF_MD5_UPD(rule, natpass); + PF_MD5_UPD(rule, keep_state); + PF_MD5_UPD(rule, proto); + PF_MD5_UPD(rule, type); + PF_MD5_UPD(rule, code); + PF_MD5_UPD(rule, flags); + PF_MD5_UPD(rule, flagset); + PF_MD5_UPD(rule, allow_opts); + PF_MD5_UPD(rule, rt); + PF_MD5_UPD(rule, tos); +} + int pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor) { struct pf_ruleset *rs; - struct pf_rule *rule; + struct pf_rule *rule, **old_array; struct pf_rulequeue *old_rules; - int s; + int s, error; + u_int32_t old_rcount; if (rs_num < 0 || rs_num >= PF_RULESET_MAX) return (EINVAL); @@ -975,25 +796,87 @@ pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor) ticket != rs->rules[rs_num].inactive.ticket) return (EBUSY); + /* Calculate checksum for the main ruleset */ + if (rs == &pf_main_ruleset) { + error = pf_setup_pfsync_matching(rs); + if (error != 0) + return (error); + } + /* Swap rules, keep the old. */ s = splsoftnet(); old_rules = rs->rules[rs_num].active.ptr; + old_rcount = rs->rules[rs_num].active.rcount; + old_array = rs->rules[rs_num].active.ptr_array; + rs->rules[rs_num].active.ptr = rs->rules[rs_num].inactive.ptr; + rs->rules[rs_num].active.ptr_array = + rs->rules[rs_num].inactive.ptr_array; + rs->rules[rs_num].active.rcount = + rs->rules[rs_num].inactive.rcount; rs->rules[rs_num].inactive.ptr = old_rules; + rs->rules[rs_num].inactive.ptr_array = old_array; + rs->rules[rs_num].inactive.rcount = old_rcount; + rs->rules[rs_num].active.ticket = rs->rules[rs_num].inactive.ticket; pf_calc_skip_steps(rs->rules[rs_num].active.ptr); + /* Purge the old rule list. */ while ((rule = TAILQ_FIRST(old_rules)) != NULL) pf_rm_rule(old_rules, rule); + if (rs->rules[rs_num].inactive.ptr_array) + free(rs->rules[rs_num].inactive.ptr_array, M_TEMP); + rs->rules[rs_num].inactive.ptr_array = NULL; + rs->rules[rs_num].inactive.rcount = 0; rs->rules[rs_num].inactive.open = 0; pf_remove_if_empty_ruleset(rs); splx(s); return (0); } +int +pf_setup_pfsync_matching(struct pf_ruleset *rs) +{ + MD5_CTX ctx; + struct pf_rule *rule; + int rs_cnt; + u_int8_t digest[PF_MD5_DIGEST_LENGTH]; + + MD5Init(&ctx); + for (rs_cnt = 0; rs_cnt < PF_RULESET_MAX; rs_cnt++) { + /* XXX PF_RULESET_SCRUB as well? */ + if (rs_cnt == PF_RULESET_SCRUB) + continue; + + if (rs->rules[rs_cnt].inactive.ptr_array) + free(rs->rules[rs_cnt].inactive.ptr_array, M_TEMP); + rs->rules[rs_cnt].inactive.ptr_array = NULL; + + if (rs->rules[rs_cnt].inactive.rcount) { + rs->rules[rs_cnt].inactive.ptr_array = + malloc(sizeof(caddr_t) * + rs->rules[rs_cnt].inactive.rcount, + M_TEMP, M_NOWAIT); + + if (!rs->rules[rs_cnt].inactive.ptr_array) + return (ENOMEM); + } + + TAILQ_FOREACH(rule, rs->rules[rs_cnt].inactive.ptr, + entries) { + pf_hash_rule(&ctx, rule); + (rs->rules[rs_cnt].inactive.ptr_array)[rule->nr] = rule; + } + } + + MD5Final(digest, &ctx); + memcpy(pf_status.pf_chksum, digest, sizeof(pf_status.pf_chksum)); + return (0); +} + int pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) { @@ -1039,7 +922,6 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) case DIOCGETSRCNODES: case DIOCCLRSRCNODES: case DIOCIGETIFACES: - case DIOCICLRISTATS: case DIOCSETIFFLAG: case DIOCCLRIFFLAG: break; @@ -1058,7 +940,6 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) if (!(flags & FWRITE)) switch (cmd) { case DIOCGETRULES: - case DIOCGETRULE: case DIOCGETADDRS: case DIOCGETADDR: case DIOCGETSTATE: @@ -1071,6 +952,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) case DIOCGETQSTATS: case DIOCGETRULESETS: case DIOCGETRULESET: + case DIOCNATLOOK: case DIOCRGETTABLES: case DIOCRGETTSTATS: case DIOCRGETADDRS: @@ -1090,13 +972,24 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) case DIOCRSETADDRS: case DIOCRSETTFLAGS: if (((struct pfioc_table *)addr)->pfrio_flags & - PFR_FLAG_DUMMY) + PFR_FLAG_DUMMY) { + flags |= FWRITE; /* need write lock for dummy */ break; /* dummy operation ok */ + } return (EACCES); + case DIOCGETRULE: + if (((struct pfioc_rule *)addr)->action == PF_GET_CLR_CNTR) + return (EACCES); + break; default: return (EACCES); } + if (flags & FWRITE) + rw_enter_write(&pf_consistency_lock); + else + rw_enter_read(&pf_consistency_lock); + s = splsoftnet(); switch (cmd) { @@ -1160,6 +1053,8 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) break; } bcopy(&pr->rule, rule, sizeof(struct pf_rule)); + rule->cuid = p->p_cred->p_ruid; + rule->cpid = p->p_pid; rule->anchor = NULL; rule->kif = NULL; TAILQ_INIT(&rule->rpool.list); @@ -1188,14 +1083,18 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) else rule->nr = 0; if (rule->ifname[0]) { - rule->kif = pfi_attach_rule(rule->ifname); + rule->kif = pfi_kif_get(rule->ifname); if (rule->kif == NULL) { pool_put(&pf_rule_pl, rule); error = EINVAL; break; } + pfi_kif_ref(rule->kif, PFI_KIF_REF_RULE); } + if (rule->rtableid > 0 && !rtable_exists(rule->rtableid)) + error = EBUSY; + #ifdef ALTQ /* set queue IDs */ if (rule->qname[0] != 0) { @@ -1218,6 +1117,10 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = EBUSY; if (rule->rt && !rule->direction) error = EINVAL; +#if NPFLOG > 0 + if (rule->logif >= PFLOGIFS_MAX) + error = EINVAL; +#endif if (pf_rtlabel_add(&rule->src.addr) || pf_rtlabel_add(&rule->dst.addr)) error = EBUSY; @@ -1256,9 +1159,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) break; } rule->rpool.cur = TAILQ_FIRST(&rule->rpool.list); - rule->evaluations = rule->packets = rule->bytes = 0; + rule->evaluations = rule->packets[0] = rule->packets[1] = + rule->bytes[0] = rule->bytes[1] = 0; TAILQ_INSERT_TAIL(ruleset->rules[rs_num].inactive.ptr, rule, entries); + ruleset->rules[rs_num].inactive.rcount++; break; } @@ -1334,6 +1239,12 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) else pr->rule.skip[i].nr = rule->skip[i].ptr->nr; + + if (pr->action == PF_GET_CLR_CNTR) { + rule->evaluations = 0; + rule->packets[0] = rule->packets[1] = 0; + rule->bytes[0] = rule->bytes[1] = 0; + } break; } @@ -1389,6 +1300,8 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) break; } bcopy(&pcr->rule, newrule, sizeof(struct pf_rule)); + newrule->cuid = p->p_cred->p_ruid; + newrule->cpid = p->p_pid; TAILQ_INIT(&newrule->rpool.list); /* initialize refcounting */ newrule->states = 0; @@ -1408,15 +1321,20 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } #endif /* INET6 */ if (newrule->ifname[0]) { - newrule->kif = pfi_attach_rule(newrule->ifname); + newrule->kif = pfi_kif_get(newrule->ifname); if (newrule->kif == NULL) { pool_put(&pf_rule_pl, newrule); error = EINVAL; break; } + pfi_kif_ref(newrule->kif, PFI_KIF_REF_RULE); } else newrule->kif = NULL; + if (newrule->rtableid > 0 && + !rtable_exists(newrule->rtableid)) + error = EBUSY; + #ifdef ALTQ /* set queue IDs */ if (newrule->qname[0] != 0) { @@ -1473,7 +1391,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) (newrule->action == PF_RDR) || (newrule->action == PF_BINAT) || (newrule->rt > PF_FASTROUTE)) && - !pcr->anchor[0])) && + !newrule->anchor)) && (TAILQ_FIRST(&newrule->rpool.list) == NULL)) error = EINVAL; @@ -1482,8 +1400,9 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) break; } newrule->rpool.cur = TAILQ_FIRST(&newrule->rpool.list); - newrule->evaluations = newrule->packets = 0; - newrule->bytes = 0; + newrule->evaluations = 0; + newrule->packets[0] = newrule->packets[1] = 0; + newrule->bytes[0] = newrule->bytes[1] = 0; } pf_empty_pool(&pf_pabuf); @@ -1506,9 +1425,10 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } } - if (pcr->action == PF_CHANGE_REMOVE) + if (pcr->action == PF_CHANGE_REMOVE) { pf_rm_rule(ruleset->rules[rs_num].active.ptr, oldrule); - else { + ruleset->rules[rs_num].active.rcount--; + } else { if (oldrule == NULL) TAILQ_INSERT_TAIL( ruleset->rules[rs_num].active.ptr, @@ -1520,6 +1440,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) TAILQ_INSERT_AFTER( ruleset->rules[rs_num].active.ptr, oldrule, newrule, entries); + ruleset->rules[rs_num].active.rcount++; } nr = 0; @@ -1536,23 +1457,24 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } case DIOCCLRSTATES: { - struct pf_state *state; + struct pf_state *state, *nexts; struct pfioc_state_kill *psk = (struct pfioc_state_kill *)addr; int killed = 0; - RB_FOREACH(state, pf_state_tree_id, &tree_id) { + for (state = RB_MIN(pf_state_tree_id, &tree_id); state; + state = nexts) { + nexts = RB_NEXT(pf_state_tree_id, &tree_id, state); + if (!psk->psk_ifname[0] || !strcmp(psk->psk_ifname, state->u.s.kif->pfik_name)) { - state->timeout = PFTM_PURGE; #if NPFSYNC /* don't send out individual delete messages */ state->sync_flags = PFSTATE_NOSYNC; #endif + pf_unlink_state(state); killed++; } } - pf_purge_expired_states(); - pf_status.states = 0; psk->psk_af = killed; #if NPFSYNC pfsync_clear_states(pf_status.hostid, psk->psk_ifname); @@ -1561,37 +1483,52 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } case DIOCKILLSTATES: { - struct pf_state *state; + struct pf_state *state, *nexts; + struct pf_state_host *src, *dst; struct pfioc_state_kill *psk = (struct pfioc_state_kill *)addr; int killed = 0; - RB_FOREACH(state, pf_state_tree_id, &tree_id) { + for (state = RB_MIN(pf_state_tree_id, &tree_id); state; + state = nexts) { + nexts = RB_NEXT(pf_state_tree_id, &tree_id, state); + + if (state->direction == PF_OUT) { + src = &state->lan; + dst = &state->ext; + } else { + src = &state->ext; + dst = &state->lan; + } if ((!psk->psk_af || state->af == psk->psk_af) && (!psk->psk_proto || psk->psk_proto == state->proto) && PF_MATCHA(psk->psk_src.neg, &psk->psk_src.addr.v.a.addr, &psk->psk_src.addr.v.a.mask, - &state->lan.addr, state->af) && + &src->addr, state->af) && PF_MATCHA(psk->psk_dst.neg, &psk->psk_dst.addr.v.a.addr, &psk->psk_dst.addr.v.a.mask, - &state->ext.addr, state->af) && + &dst->addr, state->af) && (psk->psk_src.port_op == 0 || pf_match_port(psk->psk_src.port_op, psk->psk_src.port[0], psk->psk_src.port[1], - state->lan.port)) && + src->port)) && (psk->psk_dst.port_op == 0 || pf_match_port(psk->psk_dst.port_op, psk->psk_dst.port[0], psk->psk_dst.port[1], - state->ext.port)) && + dst->port)) && (!psk->psk_ifname[0] || !strcmp(psk->psk_ifname, state->u.s.kif->pfik_name))) { - state->timeout = PFTM_PURGE; +#if NPFSYNC > 0 + /* send immediate delete of state */ + pfsync_delete_state(state); + state->sync_flags |= PFSTATE_NOSYNC; +#endif + pf_unlink_state(state); killed++; } } - pf_purge_expired_states(); psk->psk_af = killed; break; } @@ -1611,7 +1548,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = ENOMEM; break; } - kif = pfi_lookup_create(ps->state.u.ifname); + kif = pfi_kif_get(ps->state.u.ifname); if (kif == NULL) { pool_put(&pf_state_pl, state); error = ENOENT; @@ -1629,7 +1566,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) state->bytes[0] = state->bytes[1] = 0; if (pf_insert_state(kif, state)) { - pfi_maybe_destroy(kif); + pfi_kif_unref(kif, PFI_KIF_REF_NONE); pool_put(&pf_state_pl, state); error = ENOMEM; } @@ -1640,6 +1577,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) struct pfioc_state *ps = (struct pfioc_state *)addr; struct pf_state *state; u_int32_t nr; + int secs; nr = 0; RB_FOREACH(state, pf_state_tree_id, &tree_id) { @@ -1651,15 +1589,19 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = EBUSY; break; } - bcopy(state, &ps->state, sizeof(struct pf_state)); + secs = time_second; + bcopy(state, &ps->state, sizeof(ps->state)); + strlcpy(ps->state.u.ifname, state->u.s.kif->pfik_name, + sizeof(ps->state.u.ifname)); ps->state.rule.nr = state->rule.ptr->nr; ps->state.nat_rule.nr = (state->nat_rule.ptr == NULL) ? -1 : state->nat_rule.ptr->nr; ps->state.anchor.nr = (state->anchor.ptr == NULL) ? -1 : state->anchor.ptr->nr; + ps->state.creation = secs - ps->state.creation; ps->state.expire = pf_state_expires(state); - if (ps->state.expire > time_second) - ps->state.expire -= time_second; + if (ps->state.expire > secs) + ps->state.expire -= secs; else ps->state.expire = 0; break; @@ -1668,48 +1610,57 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) case DIOCGETSTATES: { struct pfioc_states *ps = (struct pfioc_states *)addr; struct pf_state *state; - struct pf_state *p, pstore; - struct pfi_kif *kif; + struct pf_state *p, *pstore; u_int32_t nr = 0; int space = ps->ps_len; if (space == 0) { - TAILQ_FOREACH(kif, &pfi_statehead, pfik_w_states) - nr += kif->pfik_states; + nr = pf_status.states; ps->ps_len = sizeof(struct pf_state) * nr; break; } + pstore = malloc(sizeof(*pstore), M_TEMP, M_WAITOK); + p = ps->ps_states; - TAILQ_FOREACH(kif, &pfi_statehead, pfik_w_states) - RB_FOREACH(state, pf_state_tree_ext_gwy, - &kif->pfik_ext_gwy) { + + state = TAILQ_FIRST(&state_list); + while (state) { + if (state->timeout != PFTM_UNLINKED) { int secs = time_second; if ((nr+1) * sizeof(*p) > (unsigned)ps->ps_len) break; - bcopy(state, &pstore, sizeof(pstore)); - strlcpy(pstore.u.ifname, kif->pfik_name, - sizeof(pstore.u.ifname)); - pstore.rule.nr = state->rule.ptr->nr; - pstore.nat_rule.nr = (state->nat_rule.ptr == + bcopy(state, pstore, sizeof(*pstore)); + strlcpy(pstore->u.ifname, + state->u.s.kif->pfik_name, + sizeof(pstore->u.ifname)); + pstore->rule.nr = state->rule.ptr->nr; + pstore->nat_rule.nr = (state->nat_rule.ptr == NULL) ? -1 : state->nat_rule.ptr->nr; - pstore.anchor.nr = (state->anchor.ptr == + pstore->anchor.nr = (state->anchor.ptr == NULL) ? -1 : state->anchor.ptr->nr; - pstore.creation = secs - pstore.creation; - pstore.expire = pf_state_expires(state); - if (pstore.expire > secs) - pstore.expire -= secs; + pstore->creation = secs - pstore->creation; + pstore->expire = pf_state_expires(state); + if (pstore->expire > secs) + pstore->expire -= secs; else - pstore.expire = 0; - error = copyout(&pstore, p, sizeof(*p)); - if (error) + pstore->expire = 0; + error = copyout(pstore, p, sizeof(*p)); + if (error) { + free(pstore, M_TEMP); goto fail; + } p++; nr++; } + state = TAILQ_NEXT(state, u.s.entry_list); + } + ps->ps_len = sizeof(struct pf_state) * nr; + + free(pstore, M_TEMP); break; } @@ -1739,16 +1690,16 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) bzero(pf_status.counters, sizeof(pf_status.counters)); bzero(pf_status.fcounters, sizeof(pf_status.fcounters)); bzero(pf_status.scounters, sizeof(pf_status.scounters)); + pf_status.since = time_second; if (*pf_status.ifname) - pfi_clr_istats(pf_status.ifname, NULL, - PFI_FLAG_INSTANCE); + pfi_clr_istats(pf_status.ifname); break; } case DIOCNATLOOK: { struct pfioc_natlook *pnl = (struct pfioc_natlook *)addr; struct pf_state *state; - struct pf_state key; + struct pf_state_cmp key; int m = 0, direction = pnl->direction; key.af = pnl->af; @@ -1757,7 +1708,9 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) if (!pnl->proto || PF_AZERO(&pnl->saddr, pnl->af) || PF_AZERO(&pnl->daddr, pnl->af) || - !pnl->dport || !pnl->sport) + ((pnl->proto == IPPROTO_TCP || + pnl->proto == IPPROTO_UDP) && + (!pnl->dport || !pnl->sport))) error = EINVAL; else { /* @@ -1813,7 +1766,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) goto fail; } old = pf_default_rule.timeout[pt->timeout]; + if (pt->timeout == PFTM_INTERVAL && pt->seconds == 0) + pt->seconds = 1; pf_default_rule.timeout[pt->timeout] = pt->seconds; + if (pt->timeout == PFTM_INTERVAL && pt->seconds < old) + wakeup(pf_purge_thread); pt->seconds = old; break; } @@ -1868,13 +1825,16 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } case DIOCCLRRULECTRS: { + /* obsoleted by DIOCGETRULE with action=PF_GET_CLR_CNTR */ struct pf_ruleset *ruleset = &pf_main_ruleset; struct pf_rule *rule; TAILQ_FOREACH(rule, - ruleset->rules[PF_RULESET_FILTER].active.ptr, entries) - rule->evaluations = rule->packets = - rule->bytes = 0; + ruleset->rules[PF_RULESET_FILTER].active.ptr, entries) { + rule->evaluations = 0; + rule->packets[0] = rule->packets[1] = 0; + rule->bytes[0] = rule->bytes[1] = 0; + } break; } @@ -2067,16 +2027,17 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } bcopy(&pp->addr, pa, sizeof(struct pf_pooladdr)); if (pa->ifname[0]) { - pa->kif = pfi_attach_rule(pa->ifname); + pa->kif = pfi_kif_get(pa->ifname); if (pa->kif == NULL) { pool_put(&pf_pooladdr_pl, pa); error = EINVAL; break; } + pfi_kif_ref(pa->kif, PFI_KIF_REF_RULE); } if (pfi_dynaddr_setup(&pa->addr, pp->af)) { pfi_dynaddr_remove(&pa->addr); - pfi_detach_rule(pa->kif); + pfi_kif_unref(pa->kif, PFI_KIF_REF_RULE); pool_put(&pf_pooladdr_pl, pa); error = EINVAL; break; @@ -2176,18 +2137,19 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } #endif /* INET6 */ if (newpa->ifname[0]) { - newpa->kif = pfi_attach_rule(newpa->ifname); + newpa->kif = pfi_kif_get(newpa->ifname); if (newpa->kif == NULL) { pool_put(&pf_pooladdr_pl, newpa); error = EINVAL; break; } + pfi_kif_ref(newpa->kif, PFI_KIF_REF_RULE); } else newpa->kif = NULL; if (pfi_dynaddr_setup(&newpa->addr, pca->af) || pf_tbladdr_setup(ruleset, &newpa->addr)) { pfi_dynaddr_remove(&newpa->addr); - pfi_detach_rule(newpa->kif); + pfi_kif_unref(newpa->kif, PFI_KIF_REF_RULE); pool_put(&pf_pooladdr_pl, newpa); error = EINVAL; break; @@ -2216,7 +2178,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) TAILQ_REMOVE(&pool->list, oldpa, entries); pfi_dynaddr_remove(&oldpa->addr); pf_tbladdr_remove(&oldpa->addr); - pfi_detach_rule(oldpa->kif); + pfi_kif_unref(oldpa->kif, PFI_KIF_REF_RULE); pool_put(&pf_pooladdr_pl, oldpa); } else { if (oldpa == NULL) @@ -2426,7 +2388,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = pfr_set_addrs(&io->pfrio_table, io->pfrio_buffer, io->pfrio_size, &io->pfrio_size2, &io->pfrio_nadd, &io->pfrio_ndel, &io->pfrio_nchange, io->pfrio_flags | - PFR_FLAG_USERIOCTL); + PFR_FLAG_USERIOCTL, 0); break; } @@ -2506,150 +2468,203 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } case DIOCXBEGIN: { - struct pfioc_trans *io = (struct pfioc_trans *) - addr; - static struct pfioc_trans_e ioe; - static struct pfr_table table; - int i; + struct pfioc_trans *io = (struct pfioc_trans *)addr; + struct pfioc_trans_e *ioe; + struct pfr_table *table; + int i; - if (io->esize != sizeof(ioe)) { + if (io->esize != sizeof(*ioe)) { error = ENODEV; goto fail; } + ioe = (struct pfioc_trans_e *)malloc(sizeof(*ioe), + M_TEMP, M_WAITOK); + table = (struct pfr_table *)malloc(sizeof(*table), + M_TEMP, M_WAITOK); for (i = 0; i < io->size; i++) { - if (copyin(io->array+i, &ioe, sizeof(ioe))) { + if (copyin(io->array+i, ioe, sizeof(*ioe))) { + free(table, M_TEMP); + free(ioe, M_TEMP); error = EFAULT; goto fail; } - switch (ioe.rs_num) { + switch (ioe->rs_num) { #ifdef ALTQ case PF_RULESET_ALTQ: - if (ioe.anchor[0]) { + if (ioe->anchor[0]) { + free(table, M_TEMP); + free(ioe, M_TEMP); error = EINVAL; goto fail; } - if ((error = pf_begin_altq(&ioe.ticket))) + if ((error = pf_begin_altq(&ioe->ticket))) { + free(table, M_TEMP); + free(ioe, M_TEMP); goto fail; + } break; #endif /* ALTQ */ case PF_RULESET_TABLE: - bzero(&table, sizeof(table)); - strlcpy(table.pfrt_anchor, ioe.anchor, - sizeof(table.pfrt_anchor)); - if ((error = pfr_ina_begin(&table, - &ioe.ticket, NULL, 0))) + bzero(table, sizeof(*table)); + strlcpy(table->pfrt_anchor, ioe->anchor, + sizeof(table->pfrt_anchor)); + if ((error = pfr_ina_begin(table, + &ioe->ticket, NULL, 0))) { + free(table, M_TEMP); + free(ioe, M_TEMP); goto fail; + } break; default: - if ((error = pf_begin_rules(&ioe.ticket, - ioe.rs_num, ioe.anchor))) + if ((error = pf_begin_rules(&ioe->ticket, + ioe->rs_num, ioe->anchor))) { + free(table, M_TEMP); + free(ioe, M_TEMP); goto fail; + } break; } - if (copyout(&ioe, io->array+i, sizeof(io->array[i]))) { + if (copyout(ioe, io->array+i, sizeof(io->array[i]))) { + free(table, M_TEMP); + free(ioe, M_TEMP); error = EFAULT; goto fail; } } + free(table, M_TEMP); + free(ioe, M_TEMP); break; } case DIOCXROLLBACK: { - struct pfioc_trans *io = (struct pfioc_trans *) - addr; - static struct pfioc_trans_e ioe; - static struct pfr_table table; - int i; + struct pfioc_trans *io = (struct pfioc_trans *)addr; + struct pfioc_trans_e *ioe; + struct pfr_table *table; + int i; - if (io->esize != sizeof(ioe)) { + if (io->esize != sizeof(*ioe)) { error = ENODEV; goto fail; } + ioe = (struct pfioc_trans_e *)malloc(sizeof(*ioe), + M_TEMP, M_WAITOK); + table = (struct pfr_table *)malloc(sizeof(*table), + M_TEMP, M_WAITOK); for (i = 0; i < io->size; i++) { - if (copyin(io->array+i, &ioe, sizeof(ioe))) { + if (copyin(io->array+i, ioe, sizeof(*ioe))) { + free(table, M_TEMP); + free(ioe, M_TEMP); error = EFAULT; goto fail; } - switch (ioe.rs_num) { + switch (ioe->rs_num) { #ifdef ALTQ case PF_RULESET_ALTQ: - if (ioe.anchor[0]) { + if (ioe->anchor[0]) { + free(table, M_TEMP); + free(ioe, M_TEMP); error = EINVAL; goto fail; } - if ((error = pf_rollback_altq(ioe.ticket))) + if ((error = pf_rollback_altq(ioe->ticket))) { + free(table, M_TEMP); + free(ioe, M_TEMP); goto fail; /* really bad */ + } break; #endif /* ALTQ */ case PF_RULESET_TABLE: - bzero(&table, sizeof(table)); - strlcpy(table.pfrt_anchor, ioe.anchor, - sizeof(table.pfrt_anchor)); - if ((error = pfr_ina_rollback(&table, - ioe.ticket, NULL, 0))) + bzero(table, sizeof(*table)); + strlcpy(table->pfrt_anchor, ioe->anchor, + sizeof(table->pfrt_anchor)); + if ((error = pfr_ina_rollback(table, + ioe->ticket, NULL, 0))) { + free(table, M_TEMP); + free(ioe, M_TEMP); goto fail; /* really bad */ + } break; default: - if ((error = pf_rollback_rules(ioe.ticket, - ioe.rs_num, ioe.anchor))) + if ((error = pf_rollback_rules(ioe->ticket, + ioe->rs_num, ioe->anchor))) { + free(table, M_TEMP); + free(ioe, M_TEMP); goto fail; /* really bad */ + } break; } } + free(table, M_TEMP); + free(ioe, M_TEMP); break; } case DIOCXCOMMIT: { - struct pfioc_trans *io = (struct pfioc_trans *) - addr; - static struct pfioc_trans_e ioe; - static struct pfr_table table; - struct pf_ruleset *rs; - int i; + struct pfioc_trans *io = (struct pfioc_trans *)addr; + struct pfioc_trans_e *ioe; + struct pfr_table *table; + struct pf_ruleset *rs; + int i; - if (io->esize != sizeof(ioe)) { + if (io->esize != sizeof(*ioe)) { error = ENODEV; goto fail; } + ioe = (struct pfioc_trans_e *)malloc(sizeof(*ioe), + M_TEMP, M_WAITOK); + table = (struct pfr_table *)malloc(sizeof(*table), + M_TEMP, M_WAITOK); /* first makes sure everything will succeed */ for (i = 0; i < io->size; i++) { - if (copyin(io->array+i, &ioe, sizeof(ioe))) { + if (copyin(io->array+i, ioe, sizeof(*ioe))) { + free(table, M_TEMP); + free(ioe, M_TEMP); error = EFAULT; goto fail; } - switch (ioe.rs_num) { + switch (ioe->rs_num) { #ifdef ALTQ case PF_RULESET_ALTQ: - if (ioe.anchor[0]) { + if (ioe->anchor[0]) { + free(table, M_TEMP); + free(ioe, M_TEMP); error = EINVAL; goto fail; } - if (!altqs_inactive_open || ioe.ticket != + if (!altqs_inactive_open || ioe->ticket != ticket_altqs_inactive) { + free(table, M_TEMP); + free(ioe, M_TEMP); error = EBUSY; goto fail; } break; #endif /* ALTQ */ case PF_RULESET_TABLE: - rs = pf_find_ruleset(ioe.anchor); - if (rs == NULL || !rs->topen || ioe.ticket != + rs = pf_find_ruleset(ioe->anchor); + if (rs == NULL || !rs->topen || ioe->ticket != rs->tticket) { + free(table, M_TEMP); + free(ioe, M_TEMP); error = EBUSY; goto fail; } break; default: - if (ioe.rs_num < 0 || ioe.rs_num >= + if (ioe->rs_num < 0 || ioe->rs_num >= PF_RULESET_MAX) { + free(table, M_TEMP); + free(ioe, M_TEMP); error = EINVAL; goto fail; } - rs = pf_find_ruleset(ioe.anchor); + rs = pf_find_ruleset(ioe->anchor); if (rs == NULL || - !rs->rules[ioe.rs_num].inactive.open || - rs->rules[ioe.rs_num].inactive.ticket != - ioe.ticket) { + !rs->rules[ioe->rs_num].inactive.open || + rs->rules[ioe->rs_num].inactive.ticket != + ioe->ticket) { + free(table, M_TEMP); + free(ioe, M_TEMP); error = EBUSY; goto fail; } @@ -2658,39 +2673,51 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } /* now do the commit - no errors should happen here */ for (i = 0; i < io->size; i++) { - if (copyin(io->array+i, &ioe, sizeof(ioe))) { + if (copyin(io->array+i, ioe, sizeof(*ioe))) { + free(table, M_TEMP); + free(ioe, M_TEMP); error = EFAULT; goto fail; } - switch (ioe.rs_num) { + switch (ioe->rs_num) { #ifdef ALTQ case PF_RULESET_ALTQ: - if ((error = pf_commit_altq(ioe.ticket))) + if ((error = pf_commit_altq(ioe->ticket))) { + free(table, M_TEMP); + free(ioe, M_TEMP); goto fail; /* really bad */ + } break; #endif /* ALTQ */ case PF_RULESET_TABLE: - bzero(&table, sizeof(table)); - strlcpy(table.pfrt_anchor, ioe.anchor, - sizeof(table.pfrt_anchor)); - if ((error = pfr_ina_commit(&table, ioe.ticket, - NULL, NULL, 0))) + bzero(table, sizeof(*table)); + strlcpy(table->pfrt_anchor, ioe->anchor, + sizeof(table->pfrt_anchor)); + if ((error = pfr_ina_commit(table, ioe->ticket, + NULL, NULL, 0))) { + free(table, M_TEMP); + free(ioe, M_TEMP); goto fail; /* really bad */ + } break; default: - if ((error = pf_commit_rules(ioe.ticket, - ioe.rs_num, ioe.anchor))) + if ((error = pf_commit_rules(ioe->ticket, + ioe->rs_num, ioe->anchor))) { + free(table, M_TEMP); + free(ioe, M_TEMP); goto fail; /* really bad */ + } break; } } + free(table, M_TEMP); + free(ioe, M_TEMP); break; } case DIOCGETSRCNODES: { struct pfioc_src_nodes *psn = (struct pfioc_src_nodes *)addr; - struct pf_src_node *n; - struct pf_src_node *p, pstore; + struct pf_src_node *n, *p, *pstore; u_int32_t nr = 0; int space = psn->psn_len; @@ -2701,6 +2728,8 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) break; } + pstore = malloc(sizeof(*pstore), M_TEMP, M_WAITOK); + p = psn->psn_src_nodes; RB_FOREACH(n, pf_src_tree, &tree_src_tracking) { int secs = time_second, diff; @@ -2708,31 +2737,35 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) if ((nr + 1) * sizeof(*p) > (unsigned)psn->psn_len) break; - bcopy(n, &pstore, sizeof(pstore)); + bcopy(n, pstore, sizeof(*pstore)); if (n->rule.ptr != NULL) - pstore.rule.nr = n->rule.ptr->nr; - pstore.creation = secs - pstore.creation; - if (pstore.expire > secs) - pstore.expire -= secs; + pstore->rule.nr = n->rule.ptr->nr; + pstore->creation = secs - pstore->creation; + if (pstore->expire > secs) + pstore->expire -= secs; else - pstore.expire = 0; + pstore->expire = 0; /* adjust the connection rate estimate */ diff = secs - n->conn_rate.last; if (diff >= n->conn_rate.seconds) - pstore.conn_rate.count = 0; + pstore->conn_rate.count = 0; else - pstore.conn_rate.count -= + pstore->conn_rate.count -= n->conn_rate.count * diff / n->conn_rate.seconds; - error = copyout(&pstore, p, sizeof(*p)); - if (error) + error = copyout(pstore, p, sizeof(*p)); + if (error) { + free(pstore, M_TEMP); goto fail; + } p++; nr++; } psn->psn_len = sizeof(struct pf_src_node) * nr; + + free(pstore, M_TEMP); break; } @@ -2748,11 +2781,50 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) n->expire = 1; n->states = 0; } - pf_purge_expired_src_nodes(); + pf_purge_expired_src_nodes(1); pf_status.src_nodes = 0; break; } + case DIOCKILLSRCNODES: { + struct pf_src_node *sn; + struct pf_state *s; + struct pfioc_src_node_kill *psnk = \ + (struct pfioc_src_node_kill *) addr; + int killed = 0; + + RB_FOREACH(sn, pf_src_tree, &tree_src_tracking) { + if (PF_MATCHA(psnk->psnk_src.neg, \ + &psnk->psnk_src.addr.v.a.addr, \ + &psnk->psnk_src.addr.v.a.mask, \ + &sn->addr, sn->af) && + PF_MATCHA(psnk->psnk_dst.neg, \ + &psnk->psnk_dst.addr.v.a.addr, \ + &psnk->psnk_dst.addr.v.a.mask, \ + &sn->raddr, sn->af)) { + /* Handle state to src_node linkage */ + if (sn->states != 0) { + RB_FOREACH(s, pf_state_tree_id, + &tree_id) { + if (s->src_node == sn) + s->src_node = NULL; + if (s->nat_src_node == sn) + s->nat_src_node = NULL; + } + sn->states = 0; + } + sn->expire = 1; + killed++; + } + } + + if (killed > 0) + pf_purge_expired_src_nodes(1); + + psnk->psnk_af = killed; + break; + } + case DIOCSETHOSTID: { u_int32_t *hostid = (u_int32_t *)addr; @@ -2770,20 +2842,12 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) case DIOCIGETIFACES: { struct pfioc_iface *io = (struct pfioc_iface *)addr; - if (io->pfiio_esize != sizeof(struct pfi_if)) { + if (io->pfiio_esize != sizeof(struct pfi_kif)) { error = ENODEV; break; } error = pfi_get_ifaces(io->pfiio_name, io->pfiio_buffer, - &io->pfiio_size, io->pfiio_flags); - break; - } - - case DIOCICLRISTATS: { - struct pfioc_iface *io = (struct pfioc_iface *)addr; - - error = pfi_clr_istats(io->pfiio_name, &io->pfiio_nzero, - io->pfiio_flags); + &io->pfiio_size); break; } @@ -2807,5 +2871,9 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } fail: splx(s); + if (flags & FWRITE) + rw_exit_write(&pf_consistency_lock); + else + rw_exit_read(&pf_consistency_lock); return (error); } diff --git a/sys/contrib/pf/net/pf_norm.c b/sys/contrib/pf/net/pf_norm.c index 3c3dbc062ca4..df339ae6f691 100644 --- a/sys/contrib/pf/net/pf_norm.c +++ b/sys/contrib/pf/net/pf_norm.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pf_norm.c,v 1.97 2004/09/21 16:59:12 aaron Exp $ */ +/* $OpenBSD: pf_norm.c,v 1.107 2006/04/16 00:59:52 pascoe Exp $ */ /* * Copyright 2001 Niels Provos @@ -412,7 +412,7 @@ pf_reassemble(struct mbuf **m0, struct pf_fragment **frag, break; } - /* This fragment is completely overlapped, loose it */ + /* This fragment is completely overlapped, lose it */ next = LIST_NEXT(frea, fr_next); m_freem(frea->fr_m); LIST_REMOVE(frea, fr_next); @@ -704,7 +704,7 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, } else { hosed++; } - } else { + } else if (frp == NULL) { /* There is a gap between fragments */ DPFPRINTF(("fragcache[%d]: gap %d %d-%d (%d-%d)\n", h->ip_id, -aftercut, off, max, fra->fr_off, @@ -831,8 +831,7 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); while (r != NULL) { r->evaluations++; - if (r->kif != NULL && - (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot) + if (pfi_kif_match(r->kif, kif) == r->ifnot) r = r->skip[PF_SKIP_IFP].ptr; else if (r->direction && r->direction != dir) r = r->skip[PF_SKIP_DIR].ptr; @@ -841,19 +840,23 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, else if (r->proto && r->proto != h->ip_p) r = r->skip[PF_SKIP_PROTO].ptr; else if (PF_MISMATCHAW(&r->src.addr, - (struct pf_addr *)&h->ip_src.s_addr, AF_INET, r->src.neg)) + (struct pf_addr *)&h->ip_src.s_addr, AF_INET, + r->src.neg, kif)) r = r->skip[PF_SKIP_SRC_ADDR].ptr; else if (PF_MISMATCHAW(&r->dst.addr, - (struct pf_addr *)&h->ip_dst.s_addr, AF_INET, r->dst.neg)) + (struct pf_addr *)&h->ip_dst.s_addr, AF_INET, + r->dst.neg, NULL)) r = r->skip[PF_SKIP_DST_ADDR].ptr; else break; } - if (r == NULL) + if (r == NULL || r->action == PF_NOSCRUB) return (PF_PASS); - else - r->packets++; + else { + r->packets[dir == PF_OUT]++; + r->bytes[dir == PF_OUT] += pd->tot_len; + } /* Check for illegal packets */ if (hlen < (int)sizeof(struct ip)) @@ -863,8 +866,12 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, goto drop; /* Clear IP_DF if the rule uses the no-df option */ - if (r->rule_flag & PFRULE_NODF) + if (r->rule_flag & PFRULE_NODF && h->ip_off & htons(IP_DF)) { + u_int16_t ip_off = h->ip_off; + h->ip_off &= htons(~IP_DF); + h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0); + } /* We will need other tests here */ if (!fragoff && !mff) @@ -922,6 +929,18 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, if (m == NULL) return (PF_DROP); + /* use mtag from concatenated mbuf chain */ + pd->pf_mtag = pf_find_mtag(m); +#ifdef DIAGNOSTIC + if (pd->pf_mtag == NULL) { + printf("%s: pf_find_mtag returned NULL(1)\n", __func__); + if ((pd->pf_mtag = pf_get_mtag(m)) == NULL) { + m_freem(m); + *m0 = NULL; + goto no_mem; + } + } +#endif if (frag != NULL && (frag->fr_flags & PFFRAG_DROP)) goto drop; @@ -930,15 +949,13 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, /* non-buffering fragment cache (drops or masks overlaps) */ int nomem = 0; - if (dir == PF_OUT) { - if (m_tag_find(m, PACKET_TAG_PF_FRAGCACHE, NULL) != - NULL) { - /* Already passed the fragment cache in the - * input direction. If we continued, it would - * appear to be a dup and would be dropped. - */ - goto fragment_pass; - } + if (dir == PF_OUT && pd->pf_mtag->flags & PF_TAG_FRAGCACHE) { + /* + * Already passed the fragment cache in the + * input direction. If we continued, it would + * appear to be a dup and would be dropped. + */ + goto fragment_pass; } frag = pf_find_fragment(h, &pf_cache_tree); @@ -959,14 +976,21 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, goto drop; } - if (dir == PF_IN) { - struct m_tag *mtag; - - mtag = m_tag_get(PACKET_TAG_PF_FRAGCACHE, 0, M_NOWAIT); - if (mtag == NULL) + /* use mtag from copied and trimmed mbuf chain */ + pd->pf_mtag = pf_find_mtag(m); +#ifdef DIAGNOSTIC + if (pd->pf_mtag == NULL) { + printf("%s: pf_find_mtag returned NULL(2)\n", __func__); + if ((pd->pf_mtag = pf_get_mtag(m)) == NULL) { + m_freem(m); + *m0 = NULL; goto no_mem; - m_tag_prepend(m, mtag); + } } +#endif + if (dir == PF_IN) + pd->pf_mtag->flags |= PF_TAG_FRAGCACHE; + if (frag != NULL && (frag->fr_flags & PFFRAG_DROP)) goto drop; goto fragment_pass; @@ -974,11 +998,20 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, no_fragment: /* At this point, only IP_DF is allowed in ip_off */ - h->ip_off &= htons(IP_DF); + if (h->ip_off & ~htons(IP_DF)) { + u_int16_t ip_off = h->ip_off; + + h->ip_off &= htons(IP_DF); + h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0); + } /* Enforce a minimum ttl, may cause endless packet loops */ - if (r->min_ttl && h->ip_ttl < r->min_ttl) + if (r->min_ttl && h->ip_ttl < r->min_ttl) { + u_int16_t ip_ttl = h->ip_ttl; + h->ip_ttl = r->min_ttl; + h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0); + } if (r->rule_flag & PFRULE_RANDOMID) { u_int16_t ip_id = h->ip_id; @@ -993,8 +1026,12 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, fragment_pass: /* Enforce a minimum ttl, may cause endless packet loops */ - if (r->min_ttl && h->ip_ttl < r->min_ttl) + if (r->min_ttl && h->ip_ttl < r->min_ttl) { + u_int16_t ip_ttl = h->ip_ttl; + h->ip_ttl = r->min_ttl; + h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0); + } if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) pd->flags |= PFDESC_IP_REAS; return (PF_PASS); @@ -1002,13 +1039,13 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, no_mem: REASON_SET(reason, PFRES_MEMORY); if (r != NULL && r->log) - PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL); + PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd); return (PF_DROP); drop: REASON_SET(reason, PFRES_NORM); if (r != NULL && r->log) - PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL); + PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd); return (PF_DROP); bad: @@ -1020,7 +1057,7 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, REASON_SET(reason, PFRES_FRAG); if (r != NULL && r->log) - PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL); + PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd); return (PF_DROP); } @@ -1048,8 +1085,7 @@ pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif, r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); while (r != NULL) { r->evaluations++; - if (r->kif != NULL && - (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot) + if (pfi_kif_match(r->kif, kif) == r->ifnot) r = r->skip[PF_SKIP_IFP].ptr; else if (r->direction && r->direction != dir) r = r->skip[PF_SKIP_DIR].ptr; @@ -1060,19 +1096,23 @@ pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif, r = r->skip[PF_SKIP_PROTO].ptr; #endif else if (PF_MISMATCHAW(&r->src.addr, - (struct pf_addr *)&h->ip6_src, AF_INET6, r->src.neg)) + (struct pf_addr *)&h->ip6_src, AF_INET6, + r->src.neg, kif)) r = r->skip[PF_SKIP_SRC_ADDR].ptr; else if (PF_MISMATCHAW(&r->dst.addr, - (struct pf_addr *)&h->ip6_dst, AF_INET6, r->dst.neg)) + (struct pf_addr *)&h->ip6_dst, AF_INET6, + r->dst.neg, NULL)) r = r->skip[PF_SKIP_DST_ADDR].ptr; else break; } - if (r == NULL) + if (r == NULL || r->action == PF_NOSCRUB) return (PF_PASS); - else - r->packets++; + else { + r->packets[dir == PF_OUT]++; + r->bytes[dir == PF_OUT] += pd->tot_len; + } /* Check for illegal packets */ if (sizeof(struct ip6_hdr) + IPV6_MAXPACKET < m->m_pkthdr.len) @@ -1184,19 +1224,19 @@ pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif, shortpkt: REASON_SET(reason, PFRES_SHORT); if (r != NULL && r->log) - PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL); + PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd); return (PF_DROP); drop: REASON_SET(reason, PFRES_NORM); if (r != NULL && r->log) - PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL); + PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd); return (PF_DROP); badfrag: REASON_SET(reason, PFRES_FRAG); if (r != NULL && r->log) - PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL); + PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd); return (PF_DROP); } #endif /* INET6 */ @@ -1215,8 +1255,7 @@ pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff, r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); while (r != NULL) { r->evaluations++; - if (r->kif != NULL && - (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot) + if (pfi_kif_match(r->kif, kif) == r->ifnot) r = r->skip[PF_SKIP_IFP].ptr; else if (r->direction && r->direction != dir) r = r->skip[PF_SKIP_DIR].ptr; @@ -1224,12 +1263,14 @@ pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff, r = r->skip[PF_SKIP_AF].ptr; else if (r->proto && r->proto != pd->proto) r = r->skip[PF_SKIP_PROTO].ptr; - else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, r->src.neg)) + else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, + r->src.neg, kif)) r = r->skip[PF_SKIP_SRC_ADDR].ptr; else if (r->src.port_op && !pf_match_port(r->src.port_op, r->src.port[0], r->src.port[1], th->th_sport)) r = r->skip[PF_SKIP_SRC_PORT].ptr; - else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, r->dst.neg)) + else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, + r->dst.neg, NULL)) r = r->skip[PF_SKIP_DST_ADDR].ptr; else if (r->dst.port_op && !pf_match_port(r->dst.port_op, r->dst.port[0], r->dst.port[1], th->th_dport)) @@ -1246,8 +1287,10 @@ pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff, if (rm == NULL || rm->action == PF_NOSCRUB) return (PF_PASS); - else - r->packets++; + else { + r->packets[dir == PF_OUT]++; + r->bytes[dir == PF_OUT] += pd->tot_len; + } if (rm->rule_flag & PFRULE_REASSEMBLE_TCP) pd->flags |= PFDESC_TCP_NORM; @@ -1309,7 +1352,7 @@ pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff, tcp_drop: REASON_SET(&reason, PFRES_NORM); if (rm != NULL && r->log) - PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, r, NULL, NULL); + PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, r, NULL, NULL, pd); return (PF_DROP); } @@ -1743,7 +1786,7 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, * timestamps. And require all data packets to contain a timestamp * if the first does. PAWS implicitly requires that all data packets be * timestamped. But I think there are middle-man devices that hijack - * TCP streams immedietly after the 3whs and don't timestamp their + * TCP streams immediately after the 3whs and don't timestamp their * packets (seen in a WWW accelerator or cache). */ if (pd->p_len > 0 && src->scrub && (src->scrub->pfss_flags & diff --git a/sys/contrib/pf/net/pf_osfp.c b/sys/contrib/pf/net/pf_osfp.c index b2adcf29667d..15dca8e1fbdd 100644 --- a/sys/contrib/pf/net/pf_osfp.c +++ b/sys/contrib/pf/net/pf_osfp.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pf_osfp.c,v 1.10 2004/04/09 19:30:41 frantzen Exp $ */ +/* $OpenBSD: pf_osfp.c,v 1.12 2006/12/13 18:14:10 itojun Exp $ */ /* * Copyright (c) 2003 Mike Frantzen @@ -32,9 +32,10 @@ #include #include -#ifdef INET6 #include -#endif /* INET6 */ +#ifdef _KERNEL +#include +#endif #ifdef _KERNEL @@ -51,6 +52,7 @@ typedef struct pool pool_t; # include # include # include +# include # define pool_t int # define pool_get(pool, flags) malloc(*(pool)) # define pool_put(pool, item) free(item) @@ -87,38 +89,96 @@ pf_osfp_fingerprint(struct pf_pdesc *pd, struct mbuf *m, int off, const struct tcphdr *tcp) { struct ip *ip; + struct ip6_hdr *ip6; char hdr[60]; - /* XXX don't have a fingerprint database for IPv6 :-( */ - if (pd->af != PF_INET || pd->proto != IPPROTO_TCP || (tcp->th_off << 2) - < sizeof(*tcp)) + if ((pd->af != PF_INET && pd->af != PF_INET6) || + pd->proto != IPPROTO_TCP || (tcp->th_off << 2) < sizeof(*tcp)) return (NULL); - ip = mtod(m, struct ip *); - if (!pf_pull_hdr(m, off, hdr, tcp->th_off << 2, NULL, NULL, pd->af)) - return (NULL); + if (pd->af == PF_INET) { + ip = mtod(m, struct ip *); + ip6 = (struct ip6_hdr *)NULL; + } else { + ip = (struct ip *)NULL; + ip6 = mtod(m, struct ip6_hdr *); + } + if (!pf_pull_hdr(m, off, hdr, tcp->th_off << 2, NULL, NULL, + pd->af)) return (NULL); - return (pf_osfp_fingerprint_hdr(ip, (struct tcphdr *)hdr)); + return (pf_osfp_fingerprint_hdr(ip, ip6, (struct tcphdr *)hdr)); } #endif /* _KERNEL */ struct pf_osfp_enlist * -pf_osfp_fingerprint_hdr(const struct ip *ip, const struct tcphdr *tcp) +pf_osfp_fingerprint_hdr(const struct ip *ip, const struct ip6_hdr *ip6, const struct tcphdr *tcp) { struct pf_os_fingerprint fp, *fpresult; int cnt, optlen = 0; const u_int8_t *optp; +#ifdef _KERNEL + char srcname[128]; +#else + char srcname[NI_MAXHOST]; +#endif - if ((tcp->th_flags & (TH_SYN|TH_ACK)) != TH_SYN || (ip->ip_off & - htons(IP_OFFMASK))) + if ((tcp->th_flags & (TH_SYN|TH_ACK)) != TH_SYN) return (NULL); + if (ip) { + if ((ip->ip_off & htons(IP_OFFMASK)) != 0) + return (NULL); + } memset(&fp, 0, sizeof(fp)); - fp.fp_psize = ntohs(ip->ip_len); - fp.fp_ttl = ip->ip_ttl; - if (ip->ip_off & htons(IP_DF)) + if (ip) { +#ifndef _KERNEL + struct sockaddr_in sin; +#endif + + fp.fp_psize = ntohs(ip->ip_len); + fp.fp_ttl = ip->ip_ttl; + if (ip->ip_off & htons(IP_DF)) + fp.fp_flags |= PF_OSFP_DF; +#ifdef _KERNEL + strlcpy(srcname, inet_ntoa(ip->ip_src), sizeof(srcname)); +#else + memset(&sin, 0, sizeof(sin)); + sin.sin_family = AF_INET; + sin.sin_len = sizeof(struct sockaddr_in); + sin.sin_addr = ip->ip_src; + (void)getnameinfo((struct sockaddr *)&sin, + sizeof(struct sockaddr_in), srcname, sizeof(srcname), + NULL, 0, NI_NUMERICHOST); +#endif + } +#ifdef INET6 + else if (ip6) { +#ifndef _KERNEL + struct sockaddr_in6 sin6; +#endif + + /* jumbo payload? */ + fp.fp_psize = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen); + fp.fp_ttl = ip6->ip6_hlim; fp.fp_flags |= PF_OSFP_DF; + fp.fp_flags |= PF_OSFP_INET6; +#ifdef _KERNEL + strlcpy(srcname, ip6_sprintf((struct in6_addr *)&ip6->ip6_src), + sizeof(srcname)); +#else + memset(&sin6, 0, sizeof(sin6)); + sin6.sin6_family = AF_INET6; + sin6.sin6_len = sizeof(struct sockaddr_in6); + sin6.sin6_addr = ip6->ip6_src; + (void)getnameinfo((struct sockaddr *)&sin6, + sizeof(struct sockaddr_in6), srcname, sizeof(srcname), + NULL, 0, NI_NUMERICHOST); +#endif + } +#endif + else + return (NULL); fp.fp_wsize = ntohs(tcp->th_win); @@ -181,7 +241,7 @@ pf_osfp_fingerprint_hdr(const struct ip *ip, const struct tcphdr *tcp) DPFPRINTF("fingerprinted %s:%d %d:%d:%d:%d:%llx (%d) " "(TS=%s,M=%s%d,W=%s%d)\n", - inet_ntoa(ip->ip_src), ntohs(tcp->th_sport), + srcname, ntohs(tcp->th_sport), fp.fp_wsize, fp.fp_ttl, (fp.fp_flags & PF_OSFP_DF) != 0, fp.fp_psize, (long long int)fp.fp_tcpopts, fp.fp_optcnt, (fp.fp_flags & PF_OSFP_TS0) ? "0" : "", diff --git a/sys/contrib/pf/net/pf_ruleset.c b/sys/contrib/pf/net/pf_ruleset.c new file mode 100644 index 000000000000..0db7b1aea6dc --- /dev/null +++ b/sys/contrib/pf/net/pf_ruleset.c @@ -0,0 +1,415 @@ +/* $OpenBSD: pf_ruleset.c,v 1.1 2006/10/27 13:56:51 mcbride Exp $ */ + +/* + * Copyright (c) 2001 Daniel Hartmeier + * Copyright (c) 2002,2003 Henning Brauer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Effort sponsored in part by the Defense Advanced Research Projects + * Agency (DARPA) and Air Force Research Laboratory, Air Force + * Materiel Command, USAF, under agreement number F30602-01-2-0537. + * + */ + +#include +#include +#ifdef _KERNEL +# include +#endif /* _KERNEL */ +#include + +#include +#include +#include +#include + +#include +#include + +#ifdef INET6 +#include +#endif /* INET6 */ + + +#ifdef _KERNEL +# define DPFPRINTF(format, x...) \ + if (pf_status.debug >= PF_DEBUG_NOISY) \ + printf(format , ##x) +#define rs_malloc(x) malloc(x, M_TEMP, M_WAITOK) +#define rs_free(x) free(x, M_TEMP) + +#else +/* Userland equivalents so we can lend code to pfctl et al. */ + +# include +# include +# include +# include +# include +# define rs_malloc(x) malloc(x) +# define rs_free(x) free(x) + +# ifdef PFDEBUG +# include +# define DPFPRINTF(format, x...) fprintf(stderr, format , ##x) +# else +# define DPFPRINTF(format, x...) ((void)0) +# endif /* PFDEBUG */ +#endif /* _KERNEL */ + + +struct pf_anchor_global pf_anchors; +struct pf_anchor pf_main_anchor; + +int pf_get_ruleset_number(u_int8_t); +void pf_init_ruleset(struct pf_ruleset *); +int pf_anchor_setup(struct pf_rule *, + const struct pf_ruleset *, const char *); +int pf_anchor_copyout(const struct pf_ruleset *, + const struct pf_rule *, struct pfioc_rule *); +void pf_anchor_remove(struct pf_rule *); + +static __inline int pf_anchor_compare(struct pf_anchor *, struct pf_anchor *); + +RB_GENERATE(pf_anchor_global, pf_anchor, entry_global, pf_anchor_compare); +RB_GENERATE(pf_anchor_node, pf_anchor, entry_node, pf_anchor_compare); + +static __inline int +pf_anchor_compare(struct pf_anchor *a, struct pf_anchor *b) +{ + int c = strcmp(a->path, b->path); + + return (c ? (c < 0 ? -1 : 1) : 0); +} + +int +pf_get_ruleset_number(u_int8_t action) +{ + switch (action) { + case PF_SCRUB: + case PF_NOSCRUB: + return (PF_RULESET_SCRUB); + break; + case PF_PASS: + case PF_DROP: + return (PF_RULESET_FILTER); + break; + case PF_NAT: + case PF_NONAT: + return (PF_RULESET_NAT); + break; + case PF_BINAT: + case PF_NOBINAT: + return (PF_RULESET_BINAT); + break; + case PF_RDR: + case PF_NORDR: + return (PF_RULESET_RDR); + break; + default: + return (PF_RULESET_MAX); + break; + } +} + +void +pf_init_ruleset(struct pf_ruleset *ruleset) +{ + int i; + + memset(ruleset, 0, sizeof(struct pf_ruleset)); + for (i = 0; i < PF_RULESET_MAX; i++) { + TAILQ_INIT(&ruleset->rules[i].queues[0]); + TAILQ_INIT(&ruleset->rules[i].queues[1]); + ruleset->rules[i].active.ptr = &ruleset->rules[i].queues[0]; + ruleset->rules[i].inactive.ptr = &ruleset->rules[i].queues[1]; + } +} + +struct pf_anchor * +pf_find_anchor(const char *path) +{ + struct pf_anchor *key, *found; + + key = (struct pf_anchor *)rs_malloc(sizeof(*key)); + memset(key, 0, sizeof(*key)); + strlcpy(key->path, path, sizeof(key->path)); + found = RB_FIND(pf_anchor_global, &pf_anchors, key); + rs_free(key); + return (found); +} + +struct pf_ruleset * +pf_find_ruleset(const char *path) +{ + struct pf_anchor *anchor; + + while (*path == '/') + path++; + if (!*path) + return (&pf_main_ruleset); + anchor = pf_find_anchor(path); + if (anchor == NULL) + return (NULL); + else + return (&anchor->ruleset); +} + +struct pf_ruleset * +pf_find_or_create_ruleset(const char *path) +{ + char *p, *q, *r; + struct pf_ruleset *ruleset; + struct pf_anchor *anchor, *dup, *parent = NULL; + + if (path[0] == 0) + return (&pf_main_ruleset); + while (*path == '/') + path++; + ruleset = pf_find_ruleset(path); + if (ruleset != NULL) + return (ruleset); + p = (char *)rs_malloc(MAXPATHLEN); + bzero(p, MAXPATHLEN); + strlcpy(p, path, MAXPATHLEN); + while (parent == NULL && (q = strrchr(p, '/')) != NULL) { + *q = 0; + if ((ruleset = pf_find_ruleset(p)) != NULL) { + parent = ruleset->anchor; + break; + } + } + if (q == NULL) + q = p; + else + q++; + strlcpy(p, path, MAXPATHLEN); + if (!*q) { + rs_free(p); + return (NULL); + } + while ((r = strchr(q, '/')) != NULL || *q) { + if (r != NULL) + *r = 0; + if (!*q || strlen(q) >= PF_ANCHOR_NAME_SIZE || + (parent != NULL && strlen(parent->path) >= + MAXPATHLEN - PF_ANCHOR_NAME_SIZE - 1)) { + rs_free(p); + return (NULL); + } + anchor = (struct pf_anchor *)rs_malloc(sizeof(*anchor)); + if (anchor == NULL) { + rs_free(p); + return (NULL); + } + memset(anchor, 0, sizeof(*anchor)); + RB_INIT(&anchor->children); + strlcpy(anchor->name, q, sizeof(anchor->name)); + if (parent != NULL) { + strlcpy(anchor->path, parent->path, + sizeof(anchor->path)); + strlcat(anchor->path, "/", sizeof(anchor->path)); + } + strlcat(anchor->path, anchor->name, sizeof(anchor->path)); + if ((dup = RB_INSERT(pf_anchor_global, &pf_anchors, anchor)) != + NULL) { + printf("pf_find_or_create_ruleset: RB_INSERT1 " + "'%s' '%s' collides with '%s' '%s'\n", + anchor->path, anchor->name, dup->path, dup->name); + rs_free(anchor); + rs_free(p); + return (NULL); + } + if (parent != NULL) { + anchor->parent = parent; + if ((dup = RB_INSERT(pf_anchor_node, &parent->children, + anchor)) != NULL) { + printf("pf_find_or_create_ruleset: " + "RB_INSERT2 '%s' '%s' collides with " + "'%s' '%s'\n", anchor->path, anchor->name, + dup->path, dup->name); + RB_REMOVE(pf_anchor_global, &pf_anchors, + anchor); + rs_free(anchor); + rs_free(p); + return (NULL); + } + } + pf_init_ruleset(&anchor->ruleset); + anchor->ruleset.anchor = anchor; + parent = anchor; + if (r != NULL) + q = r + 1; + else + *q = 0; + } + rs_free(p); + return (&anchor->ruleset); +} + +void +pf_remove_if_empty_ruleset(struct pf_ruleset *ruleset) +{ + struct pf_anchor *parent; + int i; + + while (ruleset != NULL) { + if (ruleset == &pf_main_ruleset || ruleset->anchor == NULL || + !RB_EMPTY(&ruleset->anchor->children) || + ruleset->anchor->refcnt > 0 || ruleset->tables > 0 || + ruleset->topen) + return; + for (i = 0; i < PF_RULESET_MAX; ++i) + if (!TAILQ_EMPTY(ruleset->rules[i].active.ptr) || + !TAILQ_EMPTY(ruleset->rules[i].inactive.ptr) || + ruleset->rules[i].inactive.open) + return; + RB_REMOVE(pf_anchor_global, &pf_anchors, ruleset->anchor); + if ((parent = ruleset->anchor->parent) != NULL) + RB_REMOVE(pf_anchor_node, &parent->children, + ruleset->anchor); + rs_free(ruleset->anchor); + if (parent == NULL) + return; + ruleset = &parent->ruleset; + } +} + +int +pf_anchor_setup(struct pf_rule *r, const struct pf_ruleset *s, + const char *name) +{ + char *p, *path; + struct pf_ruleset *ruleset; + + r->anchor = NULL; + r->anchor_relative = 0; + r->anchor_wildcard = 0; + if (!name[0]) + return (0); + path = (char *)rs_malloc(MAXPATHLEN); + bzero(path, MAXPATHLEN); + if (name[0] == '/') + strlcpy(path, name + 1, MAXPATHLEN); + else { + /* relative path */ + r->anchor_relative = 1; + if (s->anchor == NULL || !s->anchor->path[0]) + path[0] = 0; + else + strlcpy(path, s->anchor->path, MAXPATHLEN); + while (name[0] == '.' && name[1] == '.' && name[2] == '/') { + if (!path[0]) { + printf("pf_anchor_setup: .. beyond root\n"); + rs_free(path); + return (1); + } + if ((p = strrchr(path, '/')) != NULL) + *p = 0; + else + path[0] = 0; + r->anchor_relative++; + name += 3; + } + if (path[0]) + strlcat(path, "/", MAXPATHLEN); + strlcat(path, name, MAXPATHLEN); + } + if ((p = strrchr(path, '/')) != NULL && !strcmp(p, "/*")) { + r->anchor_wildcard = 1; + *p = 0; + } + ruleset = pf_find_or_create_ruleset(path); + rs_free(path); + if (ruleset == NULL || ruleset->anchor == NULL) { + printf("pf_anchor_setup: ruleset\n"); + return (1); + } + r->anchor = ruleset->anchor; + r->anchor->refcnt++; + return (0); +} + +int +pf_anchor_copyout(const struct pf_ruleset *rs, const struct pf_rule *r, + struct pfioc_rule *pr) +{ + pr->anchor_call[0] = 0; + if (r->anchor == NULL) + return (0); + if (!r->anchor_relative) { + strlcpy(pr->anchor_call, "/", sizeof(pr->anchor_call)); + strlcat(pr->anchor_call, r->anchor->path, + sizeof(pr->anchor_call)); + } else { + char *a, *p; + int i; + + a = (char *)rs_malloc(MAXPATHLEN); + bzero(a, MAXPATHLEN); + if (rs->anchor == NULL) + a[0] = 0; + else + strlcpy(a, rs->anchor->path, MAXPATHLEN); + for (i = 1; i < r->anchor_relative; ++i) { + if ((p = strrchr(a, '/')) == NULL) + p = a; + *p = 0; + strlcat(pr->anchor_call, "../", + sizeof(pr->anchor_call)); + } + if (strncmp(a, r->anchor->path, strlen(a))) { + printf("pf_anchor_copyout: '%s' '%s'\n", a, + r->anchor->path); + rs_free(a); + return (1); + } + if (strlen(r->anchor->path) > strlen(a)) + strlcat(pr->anchor_call, r->anchor->path + (a[0] ? + strlen(a) + 1 : 0), sizeof(pr->anchor_call)); + rs_free(a); + } + if (r->anchor_wildcard) + strlcat(pr->anchor_call, pr->anchor_call[0] ? "/*" : "*", + sizeof(pr->anchor_call)); + return (0); +} + +void +pf_anchor_remove(struct pf_rule *r) +{ + if (r->anchor == NULL) + return; + if (r->anchor->refcnt <= 0) { + printf("pf_anchor_remove: broken refcount\n"); + r->anchor = NULL; + return; + } + if (!--r->anchor->refcnt) + pf_remove_if_empty_ruleset(&r->anchor->ruleset); + r->anchor = NULL; +} diff --git a/sys/contrib/pf/net/pf_table.c b/sys/contrib/pf/net/pf_table.c index 621809a3b0e4..a79ed372a441 100644 --- a/sys/contrib/pf/net/pf_table.c +++ b/sys/contrib/pf/net/pf_table.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pf_table.c,v 1.62 2004/12/07 18:02:04 mcbride Exp $ */ +/* $OpenBSD: pf_table.c,v 1.68 2006/05/02 10:08:45 dhartmei Exp $ */ /* * Copyright (c) 2002 Cedric Berger @@ -404,7 +404,8 @@ _bad: int pfr_set_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, - int *size2, int *nadd, int *ndel, int *nchange, int flags) + int *size2, int *nadd, int *ndel, int *nchange, int flags, + u_int32_t ignore_pfrt_flags) { struct pfr_ktable *kt, *tmpkt; struct pfr_kentryworkq addq, delq, changeq; @@ -414,7 +415,8 @@ pfr_set_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, long tzero = time_second; ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY+PFR_FLAG_FEEDBACK); - if (pfr_validate_table(tbl, 0, flags & PFR_FLAG_USERIOCTL)) + if (pfr_validate_table(tbl, ignore_pfrt_flags, flags & + PFR_FLAG_USERIOCTL)) return (EINVAL); kt = pfr_lookup_table(tbl); if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) @@ -2047,7 +2049,7 @@ pfr_attach_table(struct pf_ruleset *rs, char *name) bzero(&tbl, sizeof(tbl)); strlcpy(tbl.pfrt_name, name, sizeof(tbl.pfrt_name)); if (ac != NULL) - strlcpy(tbl.pfrt_anchor, ac->name, sizeof(tbl.pfrt_anchor)); + strlcpy(tbl.pfrt_anchor, ac->path, sizeof(tbl.pfrt_anchor)); kt = pfr_lookup_table(&tbl); if (kt == NULL) { kt = pfr_create_ktable(&tbl, time_second, 1); diff --git a/sys/contrib/pf/net/pfvar.h b/sys/contrib/pf/net/pfvar.h index d27d01d2323b..d650f7997045 100644 --- a/sys/contrib/pf/net/pfvar.h +++ b/sys/contrib/pf/net/pfvar.h @@ -1,4 +1,4 @@ -/* $OpenBSD: pfvar.h,v 1.213 2005/03/03 07:13:39 dhartmei Exp $ */ +/* $OpenBSD: pfvar.h,v 1.244 2007/02/23 21:31:51 deraadt Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -44,10 +45,18 @@ #include struct ip; +struct ip6_hdr; #define PF_TCPS_PROXY_SRC ((TCP_NSTATES)+0) #define PF_TCPS_PROXY_DST ((TCP_NSTATES)+1) +#define PF_MD5_DIGEST_LENGTH 16 +#ifdef MD5_DIGEST_LENGTH +#if PF_MD5_DIGEST_LENGTH != MD5_DIGEST_LENGTH +#error +#endif +#endif + enum { PF_INOUT, PF_IN, PF_OUT }; enum { PF_LAN_EXT, PF_EXT_GWY, PF_ID }; enum { PF_PASS, PF_DROP, PF_SCRUB, PF_NOSCRUB, PF_NAT, PF_NONAT, @@ -60,6 +69,8 @@ enum { PF_DEBUG_NONE, PF_DEBUG_URGENT, PF_DEBUG_MISC, PF_DEBUG_NOISY }; enum { PF_CHANGE_NONE, PF_CHANGE_ADD_HEAD, PF_CHANGE_ADD_TAIL, PF_CHANGE_ADD_BEFORE, PF_CHANGE_ADD_AFTER, PF_CHANGE_REMOVE, PF_CHANGE_GET_TICKET }; +enum { PF_GET_NONE, PF_GET_CLR_CNTR }; + /* * Note about PFTM_*: real indices into pf_rule.timeout[] come before * PFTM_MAX, special cases afterwards. See pf_state_expires(). @@ -71,7 +82,8 @@ enum { PFTM_TCP_FIRST_PACKET, PFTM_TCP_OPENING, PFTM_TCP_ESTABLISHED, PFTM_OTHER_FIRST_PACKET, PFTM_OTHER_SINGLE, PFTM_OTHER_MULTIPLE, PFTM_FRAG, PFTM_INTERVAL, PFTM_ADAPTIVE_START, PFTM_ADAPTIVE_END, PFTM_SRC_NODE, - PFTM_TS_DIFF, PFTM_MAX, PFTM_PURGE, PFTM_UNTIL_PACKET }; + PFTM_TS_DIFF, PFTM_MAX, PFTM_PURGE, PFTM_UNLINKED, + PFTM_UNTIL_PACKET }; /* PFTM default values */ #define PFTM_TCP_FIRST_PACKET_VAL 120 /* First TCP packet */ @@ -94,17 +106,22 @@ enum { PFTM_TCP_FIRST_PACKET, PFTM_TCP_OPENING, PFTM_TCP_ESTABLISHED, #define PFTM_TS_DIFF_VAL 30 /* Allowed TS diff */ enum { PF_NOPFROUTE, PF_FASTROUTE, PF_ROUTETO, PF_DUPTO, PF_REPLYTO }; -enum { PF_LIMIT_STATES, PF_LIMIT_SRC_NODES, PF_LIMIT_FRAGS, PF_LIMIT_MAX }; +enum { PF_LIMIT_STATES, PF_LIMIT_SRC_NODES, PF_LIMIT_FRAGS, + PF_LIMIT_TABLES, PF_LIMIT_TABLE_ENTRIES, PF_LIMIT_MAX }; #define PF_POOL_IDMASK 0x0f enum { PF_POOL_NONE, PF_POOL_BITMASK, PF_POOL_RANDOM, PF_POOL_SRCHASH, PF_POOL_ROUNDROBIN }; enum { PF_ADDR_ADDRMASK, PF_ADDR_NOROUTE, PF_ADDR_DYNIFTL, - PF_ADDR_TABLE, PF_ADDR_RTLABEL }; + PF_ADDR_TABLE, PF_ADDR_RTLABEL, PF_ADDR_URPFFAILED }; #define PF_POOL_TYPEMASK 0x0f #define PF_POOL_STICKYADDR 0x20 #define PF_WSCALE_FLAG 0x80 #define PF_WSCALE_MASK 0x0f +#define PF_LOG 0x01 +#define PF_LOG_ALL 0x02 +#define PF_LOG_SOCKET_LOOKUP 0x04 + struct pf_addr { union { struct in_addr v4; @@ -152,18 +169,19 @@ struct pf_addr_wrap { #ifdef _KERNEL struct pfi_dynaddr { - struct pf_addr pfid_addr4; - struct pf_addr pfid_mask4; - struct pf_addr pfid_addr6; - struct pf_addr pfid_mask6; - struct pfr_ktable *pfid_kt; - struct pfi_kif *pfid_kif; - void *pfid_hook_cookie; - int pfid_net; /* optional mask, or 128 */ - int pfid_acnt4; /* address count, IPv4 */ - int pfid_acnt6; /* address count, IPv6 */ - sa_family_t pfid_af; /* rule address family */ - u_int8_t pfid_iflags; /* PFI_AFLAG_* */ + TAILQ_ENTRY(pfi_dynaddr) entry; + struct pf_addr pfid_addr4; + struct pf_addr pfid_mask4; + struct pf_addr pfid_addr6; + struct pf_addr pfid_mask6; + struct pfr_ktable *pfid_kt; + struct pfi_kif *pfid_kif; + void *pfid_hook_cookie; + int pfid_net; /* mask or 128 */ + int pfid_acnt4; /* address count IPv4 */ + int pfid_acnt6; /* address count IPv6 */ + sa_family_t pfid_af; /* rule af */ + u_int8_t pfid_iflags; /* PFI_AFLAG_* */ }; /* @@ -299,23 +317,26 @@ struct pfi_dynaddr { #endif /* PF_INET6_ONLY */ #endif /* PF_INET_INET6 */ -#define PF_MISMATCHAW(aw, x, af, neg) \ - ( \ - (((aw)->type == PF_ADDR_NOROUTE && \ - pf_routable((x), (af))) || \ - ((aw)->type == PF_ADDR_RTLABEL && \ - !pf_rtlabel_match((x), (af), (aw))) || \ - ((aw)->type == PF_ADDR_TABLE && \ - !pfr_match_addr((aw)->p.tbl, (x), (af))) || \ - ((aw)->type == PF_ADDR_DYNIFTL && \ - !pfi_match_addr((aw)->p.dyn, (x), (af))) || \ - ((aw)->type == PF_ADDR_ADDRMASK && \ - !PF_AZERO(&(aw)->v.a.mask, (af)) && \ - !PF_MATCHA(0, &(aw)->v.a.addr, \ - &(aw)->v.a.mask, (x), (af)))) != \ - (neg) \ +#define PF_MISMATCHAW(aw, x, af, neg, ifp) \ + ( \ + (((aw)->type == PF_ADDR_NOROUTE && \ + pf_routable((x), (af), NULL)) || \ + (((aw)->type == PF_ADDR_URPFFAILED && (ifp) != NULL && \ + pf_routable((x), (af), (ifp))) || \ + ((aw)->type == PF_ADDR_RTLABEL && \ + !pf_rtlabel_match((x), (af), (aw))) || \ + ((aw)->type == PF_ADDR_TABLE && \ + !pfr_match_addr((aw)->p.tbl, (x), (af))) || \ + ((aw)->type == PF_ADDR_DYNIFTL && \ + !pfi_match_addr((aw)->p.dyn, (x), (af))) || \ + ((aw)->type == PF_ADDR_ADDRMASK && \ + !PF_AZERO(&(aw)->v.a.mask, (af)) && \ + !PF_MATCHA(0, &(aw)->v.a.addr, \ + &(aw)->v.a.mask, (x), (af))))) != \ + (neg) \ ) + struct pf_rule_uid { uid_t uid[2]; u_int8_t op; @@ -433,6 +454,7 @@ struct pf_os_fingerprint { #define PF_OSFP_MSS_DC 0x0800 /* TCP MSS dont-care */ #define PF_OSFP_DF 0x1000 /* IPv4 don't fragment bit */ #define PF_OSFP_TS0 0x2000 /* Zero timestamp */ +#define PF_OSFP_INET6 0x4000 /* IPv6 */ u_int8_t fp_optcnt; /* TCP option count */ u_int8_t fp_wscale; /* TCP window scaling */ u_int8_t fp_ttl; /* IPv4 TTL */ @@ -488,11 +510,11 @@ struct pf_rule { union pf_rule_ptr skip[PF_SKIP_COUNT]; #define PF_RULE_LABEL_SIZE 64 char label[PF_RULE_LABEL_SIZE]; -#define PF_QNAME_SIZE 16 +#define PF_QNAME_SIZE 64 char ifname[IFNAMSIZ]; char qname[PF_QNAME_SIZE]; char pqname[PF_QNAME_SIZE]; -#define PF_TAG_NAME_SIZE 16 +#define PF_TAG_NAME_SIZE 64 char tagname[PF_TAG_NAME_SIZE]; char match_tagname[PF_TAG_NAME_SIZE]; @@ -502,8 +524,8 @@ struct pf_rule { struct pf_pool rpool; u_int64_t evaluations; - u_int64_t packets; - u_int64_t bytes; + u_int64_t packets[2]; + u_int64_t bytes[2]; struct pfi_kif *kif; struct pf_anchor *anchor; @@ -511,6 +533,7 @@ struct pf_rule { pf_osfp_t os_fingerprint; + int rtableid; u_int32_t timeout[PFTM_MAX]; u_int32_t states; u_int32_t max_states; @@ -527,6 +550,8 @@ struct pf_rule { u_int32_t rt_listid; u_int32_t nr; u_int32_t prob; + uid_t cuid; + pid_t cpid; u_int16_t return_icmp; u_int16_t return_icmp6; @@ -541,6 +566,7 @@ struct pf_rule { u_int8_t action; u_int8_t direction; u_int8_t log; + u_int8_t logif; u_int8_t quick; u_int8_t ifnot; u_int8_t match_tag_not; @@ -588,9 +614,10 @@ struct pf_rule { /* rule flags again */ #define PFRULE_IFBOUND 0x00010000 /* if-bound */ -#define PFRULE_GRBOUND 0x00020000 /* group-bound */ #define PFSTATE_HIWAT 10000 /* default state table size */ +#define PFSTATE_ADAPT_START 6000 /* default adaptive timeout start */ +#define PFSTATE_ADAPT_END 12000 /* default adaptive timeout end */ struct pf_threshold { @@ -608,8 +635,8 @@ struct pf_src_node { struct pf_addr raddr; union pf_rule_ptr rule; struct pfi_kif *kif; - u_int32_t bytes; - u_int32_t packets; + u_int64_t bytes[2]; + u_int64_t packets[2]; u_int32_t states; u_int32_t conn; struct pf_threshold conn_rate; @@ -651,26 +678,53 @@ struct pf_state_peer { u_int8_t state; /* active state level */ u_int8_t wscale; /* window scaling factor */ u_int16_t mss; /* Maximum segment size option */ + u_int8_t tcp_est; /* Did we reach TCPS_ESTABLISHED */ struct pf_state_scrub *scrub; /* state is scrubbed */ + u_int8_t pad[3]; }; TAILQ_HEAD(pf_state_queue, pf_state); +/* keep synced with struct pf_state, used in RB_FIND */ +struct pf_state_cmp { + u_int64_t id; + u_int32_t creatorid; + struct pf_state_host lan; + struct pf_state_host gwy; + struct pf_state_host ext; + sa_family_t af; + u_int8_t proto; + u_int8_t direction; + u_int8_t pad; +}; + struct pf_state { u_int64_t id; + u_int32_t creatorid; + struct pf_state_host lan; + struct pf_state_host gwy; + struct pf_state_host ext; + sa_family_t af; + u_int8_t proto; + u_int8_t direction; + u_int8_t pad; + u_int8_t log; + u_int8_t allow_opts; + u_int8_t timeout; + u_int8_t sync_flags; +#define PFSTATE_NOSYNC 0x01 +#define PFSTATE_FROMSYNC 0x02 +#define PFSTATE_STALE 0x04 union { struct { RB_ENTRY(pf_state) entry_lan_ext; RB_ENTRY(pf_state) entry_ext_gwy; RB_ENTRY(pf_state) entry_id; - TAILQ_ENTRY(pf_state) entry_updates; + TAILQ_ENTRY(pf_state) entry_list; struct pfi_kif *kif; } s; char ifname[IFNAMSIZ]; } u; - struct pf_state_host lan; - struct pf_state_host gwy; - struct pf_state_host ext; struct pf_state_peer src; struct pf_state_peer dst; union pf_rule_ptr rule; @@ -680,24 +734,12 @@ struct pf_state { struct pfi_kif *rt_kif; struct pf_src_node *src_node; struct pf_src_node *nat_src_node; + u_int64_t packets[2]; + u_int64_t bytes[2]; u_int32_t creation; u_int32_t expire; u_int32_t pfsync_time; - u_int32_t packets[2]; - u_int32_t bytes[2]; - u_int32_t creatorid; u_int16_t tag; - sa_family_t af; - u_int8_t proto; - u_int8_t direction; - u_int8_t log; - u_int8_t allow_opts; - u_int8_t timeout; - u_int8_t sync_flags; -#define PFSTATE_NOSYNC 0x01 -#define PFSTATE_FROMSYNC 0x02 -#define PFSTATE_STALE 0x04 - u_int8_t pad; }; TAILQ_HEAD(pf_rulequeue, pf_rule); @@ -709,6 +751,8 @@ struct pf_ruleset { struct pf_rulequeue queues[2]; struct { struct pf_rulequeue *ptr; + struct pf_rule **ptr_array; + u_int32_t rcount; u_int32_t ticket; int open; } active, inactive; @@ -730,6 +774,7 @@ struct pf_anchor { char path[MAXPATHLEN]; struct pf_ruleset ruleset; int refcnt; /* anchor rules */ + int match; }; RB_PROTOTYPE(pf_anchor_global, pf_anchor, entry_global, pf_anchor_compare); RB_PROTOTYPE(pf_anchor_node, pf_anchor, entry_node, pf_anchor_compare); @@ -846,55 +891,47 @@ RB_HEAD(pf_state_tree_ext_gwy, pf_state); RB_PROTOTYPE(pf_state_tree_ext_gwy, pf_state, u.s.entry_ext_gwy, pf_state_compare_ext_gwy); -struct pfi_if { - char pfif_name[IFNAMSIZ]; - u_int64_t pfif_packets[2][2][2]; - u_int64_t pfif_bytes[2][2][2]; - u_int64_t pfif_addcnt; - u_int64_t pfif_delcnt; - long pfif_tzero; - int pfif_states; - int pfif_rules; - int pfif_flags; -}; - -TAILQ_HEAD(pfi_grouphead, pfi_kif); TAILQ_HEAD(pfi_statehead, pfi_kif); RB_HEAD(pfi_ifhead, pfi_kif); + +/* keep synced with pfi_kif, used in RB_FIND */ +struct pfi_kif_cmp { + char pfik_name[IFNAMSIZ]; +}; + struct pfi_kif { - struct pfi_if pfik_if; + char pfik_name[IFNAMSIZ]; RB_ENTRY(pfi_kif) pfik_tree; + u_int64_t pfik_packets[2][2][2]; + u_int64_t pfik_bytes[2][2][2]; + u_int32_t pfik_tzero; + int pfik_flags; struct pf_state_tree_lan_ext pfik_lan_ext; struct pf_state_tree_ext_gwy pfik_ext_gwy; - struct pfi_grouphead pfik_grouphead; - TAILQ_ENTRY(pfi_kif) pfik_instances; TAILQ_ENTRY(pfi_kif) pfik_w_states; - struct hook_desc_head *pfik_ah_head; void *pfik_ah_cookie; - struct pfi_kif *pfik_parent; struct ifnet *pfik_ifp; + struct ifg_group *pfik_group; int pfik_states; int pfik_rules; + TAILQ_HEAD(, pfi_dynaddr) pfik_dynaddrs; +}; + +enum pfi_kif_refs { + PFI_KIF_REF_NONE, + PFI_KIF_REF_STATE, + PFI_KIF_REF_RULE }; -#define pfik_name pfik_if.pfif_name -#define pfik_packets pfik_if.pfif_packets -#define pfik_bytes pfik_if.pfif_bytes -#define pfik_tzero pfik_if.pfif_tzero -#define pfik_flags pfik_if.pfif_flags -#define pfik_addcnt pfik_if.pfif_addcnt -#define pfik_delcnt pfik_if.pfif_delcnt -#define pfik_states pfik_if.pfif_states -#define pfik_rules pfik_if.pfif_rules -#define PFI_IFLAG_GROUP 0x0001 /* group of interfaces */ -#define PFI_IFLAG_INSTANCE 0x0002 /* single instance */ -#define PFI_IFLAG_CLONABLE 0x0010 /* clonable group */ -#define PFI_IFLAG_DYNAMIC 0x0020 /* dynamic group */ -#define PFI_IFLAG_ATTACHED 0x0040 /* interface attached */ #define PFI_IFLAG_SKIP 0x0100 /* skip filtering on interface */ -#define PFI_IFLAG_SETABLE_MASK 0x0100 /* setable via DIOC{SET,CLR}IFFLAG */ struct pf_pdesc { + struct { + int done; + uid_t uid; + gid_t gid; + pid_t pid; + } lookup; u_int64_t tot_len; /* Make Mickey money */ union { struct tcphdr *tcp; @@ -912,6 +949,7 @@ struct pf_pdesc { struct pf_addr *dst; struct ether_header *eh; + struct pf_mtag *pf_mtag; u_int16_t *ip_sum; u_int32_t p_len; /* total length of payload */ u_int16_t flags; /* Let SCRUB trigger behavior in @@ -1052,6 +1090,7 @@ struct pf_status { u_int32_t debug; u_int32_t hostid; char ifname[IFNAMSIZ]; + u_int8_t pf_chksum[PF_MD5_DIGEST_LENGTH]; }; struct cbq_opts { @@ -1114,6 +1153,20 @@ struct pf_altq { u_int32_t qid; /* return value */ }; +#define PF_TAG_GENERATED 0x01 +#define PF_TAG_FRAGCACHE 0x02 +#define PF_TAG_TRANSLATE_LOCALHOST 0x04 + +struct pf_mtag { + void *hdr; /* saved hdr pos in mbuf, for ECN */ + u_int rtableid; /* alternate routing table id */ + u_int32_t qid; /* queue id */ + u_int16_t tag; /* tag id */ + u_int8_t flags; + u_int8_t routed; + sa_family_t af; /* for ECN */ +}; + struct pf_tag { u_int16_t tag; /* tag id */ }; @@ -1130,6 +1183,10 @@ struct pf_tagname { #define PFFRAG_FRCENT_HIWAT 50000 /* Number of fragment cache entries */ #define PFFRAG_FRCACHE_HIWAT 10000 /* Number of fragment descriptors */ +#define PFR_KTABLE_HIWAT 1000 /* Number of tables */ +#define PFR_KENTRY_HIWAT 200000 /* Number of table entries */ +#define PFR_KENTRY_HIWAT_SMALL 100000 /* Number of table entries (tiny hosts) */ + /* * ioctl parameter structures */ @@ -1175,6 +1232,13 @@ struct pfioc_state { struct pf_state state; }; +struct pfioc_src_node_kill { + /* XXX returns the number of src nodes killed in psnk_af */ + sa_family_t psnk_af; + struct pf_rule_addr psnk_src; + struct pf_rule_addr psnk_dst; +}; + struct pfioc_state_kill { /* XXX returns the number of states killed in psk_af */ sa_family_t psk_af; @@ -1282,11 +1346,6 @@ struct pfioc_table { #define pfrio_setflag pfrio_size2 #define pfrio_clrflag pfrio_nadd - -#define PFI_FLAG_GROUP 0x0001 /* gets groups of interfaces */ -#define PFI_FLAG_INSTANCE 0x0002 /* gets single interfaces */ -#define PFI_FLAG_ALLMASK 0x0003 - struct pfioc_iface { char pfiio_name[IFNAMSIZ]; void *pfiio_buffer; @@ -1365,9 +1424,9 @@ struct pfioc_iface { #define DIOCCLRSRCNODES _IO('D', 85) #define DIOCSETHOSTID _IOWR('D', 86, u_int32_t) #define DIOCIGETIFACES _IOWR('D', 87, struct pfioc_iface) -#define DIOCICLRISTATS _IOWR('D', 88, struct pfioc_iface) #define DIOCSETIFFLAG _IOWR('D', 89, struct pfioc_iface) #define DIOCCLRIFFLAG _IOWR('D', 90, struct pfioc_iface) +#define DIOCKILLSRCNODES _IOWR('D', 91, struct pfioc_src_node_kill) #ifdef _KERNEL RB_HEAD(pf_src_tree, pf_src_node); @@ -1378,16 +1437,13 @@ RB_HEAD(pf_state_tree_id, pf_state); RB_PROTOTYPE(pf_state_tree_id, pf_state, entry_id, pf_state_compare_id); extern struct pf_state_tree_id tree_id; -extern struct pf_state_queue state_updates; +extern struct pf_state_queue state_list; -extern struct pf_anchor_global pf_anchors; -extern struct pf_ruleset pf_main_ruleset; TAILQ_HEAD(pf_poolqueue, pf_pool); extern struct pf_poolqueue pf_pools[2]; TAILQ_HEAD(pf_altqqueue, pf_altq); extern struct pf_altqqueue pf_altqs[2]; extern struct pf_palist pf_pabuf; -extern struct pfi_kif **pfi_index2kif; extern u_int32_t ticket_altqs_active; extern u_int32_t ticket_altqs_inactive; @@ -1405,26 +1461,22 @@ extern void pf_calc_skip_steps(struct pf_rulequeue *); extern struct pool pf_src_tree_pl, pf_rule_pl; extern struct pool pf_state_pl, pf_altq_pl, pf_pooladdr_pl; extern struct pool pf_state_scrub_pl; -extern void pf_purge_timeout(void *); -extern void pf_purge_expired_src_nodes(void); -extern void pf_purge_expired_states(void); -extern void pf_purge_expired_state(struct pf_state *); +extern void pf_purge_thread(void *); +extern void pf_purge_expired_src_nodes(int); +extern void pf_purge_expired_states(u_int32_t); +extern void pf_unlink_state(struct pf_state *); +extern void pf_free_state(struct pf_state *); extern int pf_insert_state(struct pfi_kif *, struct pf_state *); extern int pf_insert_src_node(struct pf_src_node **, struct pf_rule *, struct pf_addr *, sa_family_t); void pf_src_tree_remove_state(struct pf_state *); -extern struct pf_state *pf_find_state_byid(struct pf_state *); -extern struct pf_state *pf_find_state_all(struct pf_state *key, +extern struct pf_state *pf_find_state_byid(struct pf_state_cmp *); +extern struct pf_state *pf_find_state_all(struct pf_state_cmp *key, u_int8_t tree, int *more); extern void pf_print_state(struct pf_state *); extern void pf_print_flags(u_int8_t); -extern struct pf_anchor *pf_find_anchor(const char *); -extern struct pf_ruleset *pf_find_ruleset(const char *); -extern struct pf_ruleset *pf_find_or_create_ruleset(const char *); -extern void pf_remove_if_empty_ruleset( - struct pf_ruleset *); extern u_int16_t pf_cksum_fixup(u_int16_t, u_int16_t, u_int16_t, u_int8_t); @@ -1450,7 +1502,8 @@ void *pf_pull_hdr(struct mbuf *, int, void *, int, u_short *, u_short *, sa_family_t); void pf_change_a(void *, u_int16_t *, u_int32_t, u_int8_t); int pflog_packet(struct pfi_kif *, struct mbuf *, sa_family_t, u_int8_t, - u_int8_t, struct pf_rule *, struct pf_rule *, struct pf_ruleset *); + u_int8_t, struct pf_rule *, struct pf_rule *, struct pf_ruleset *, + struct pf_pdesc *); int pf_match_addr(u_int8_t, struct pf_addr *, struct pf_addr *, struct pf_addr *, sa_family_t); int pf_match(u_int8_t, u_int32_t, u_int32_t, u_int32_t); @@ -1474,8 +1527,9 @@ int pf_normalize_tcp_stateful(struct mbuf *, int, struct pf_pdesc *, u_int32_t pf_state_expires(const struct pf_state *); void pf_purge_expired_fragments(void); -int pf_routable(struct pf_addr *addr, sa_family_t af); +int pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *); int pf_rtlabel_match(struct pf_addr *, sa_family_t, struct pf_addr_wrap *); +int pf_socket_lookup(int, struct pf_pdesc *); void pfr_initialize(void); int pfr_match_addr(struct pfr_ktable *, struct pf_addr *, sa_family_t); void pfr_update_stats(struct pfr_ktable *, struct pf_addr *, sa_family_t, @@ -1500,7 +1554,7 @@ int pfr_add_addrs(struct pfr_table *, struct pfr_addr *, int, int *, int pfr_del_addrs(struct pfr_table *, struct pfr_addr *, int, int *, int); int pfr_set_addrs(struct pfr_table *, struct pfr_addr *, int, int *, - int *, int *, int *, int); + int *, int *, int *, int, u_int32_t); int pfr_get_addrs(struct pfr_table *, struct pfr_addr *, int *, int); int pfr_get_astats(struct pfr_table *, struct pfr_astats *, int *, int); int pfr_clr_astats(struct pfr_table *, struct pfr_addr *, int, int *, @@ -1513,41 +1567,44 @@ int pfr_ina_commit(struct pfr_table *, u_int32_t, int *, int *, int); int pfr_ina_define(struct pfr_table *, struct pfr_addr *, int, int *, int *, u_int32_t, int); +extern struct pfi_statehead pfi_statehead; +extern struct pfi_kif *pfi_all; + void pfi_initialize(void); -void pfi_attach_clone(struct if_clone *); +struct pfi_kif *pfi_kif_get(const char *); +void pfi_kif_ref(struct pfi_kif *, enum pfi_kif_refs); +void pfi_kif_unref(struct pfi_kif *, enum pfi_kif_refs); +int pfi_kif_match(struct pfi_kif *, struct pfi_kif *); void pfi_attach_ifnet(struct ifnet *); void pfi_detach_ifnet(struct ifnet *); -struct pfi_kif *pfi_lookup_create(const char *); -struct pfi_kif *pfi_lookup_if(const char *); -int pfi_maybe_destroy(struct pfi_kif *); -struct pfi_kif *pfi_attach_rule(const char *); -void pfi_detach_rule(struct pfi_kif *); -void pfi_attach_state(struct pfi_kif *); -void pfi_detach_state(struct pfi_kif *); -int pfi_dynaddr_setup(struct pf_addr_wrap *, sa_family_t); -void pfi_dynaddr_copyout(struct pf_addr_wrap *); -void pfi_dynaddr_remove(struct pf_addr_wrap *); -void pfi_fill_oldstatus(struct pf_status *); -int pfi_clr_istats(const char *, int *, int); -int pfi_get_ifaces(const char *, struct pfi_if *, int *, int); -int pfi_set_flags(const char *, int); -int pfi_clear_flags(const char *, int); +void pfi_attach_ifgroup(struct ifg_group *); +void pfi_detach_ifgroup(struct ifg_group *); +void pfi_group_change(const char *); int pfi_match_addr(struct pfi_dynaddr *, struct pf_addr *, sa_family_t); +int pfi_dynaddr_setup(struct pf_addr_wrap *, sa_family_t); +void pfi_dynaddr_remove(struct pf_addr_wrap *); +void pfi_dynaddr_copyout(struct pf_addr_wrap *); +void pfi_fill_oldstatus(struct pf_status *); +int pfi_clr_istats(const char *); +int pfi_get_ifaces(const char *, struct pfi_kif *, int *); +int pfi_set_flags(const char *, int); +int pfi_clear_flags(const char *, int); -extern struct pfi_statehead pfi_statehead; - -u_int16_t pf_tagname2tag(char *); -void pf_tag2tagname(u_int16_t, char *); -void pf_tag_ref(u_int16_t); -void pf_tag_unref(u_int16_t); -int pf_tag_packet(struct mbuf *, struct pf_tag *, int); -u_int32_t pf_qname2qid(char *); -void pf_qid2qname(u_int32_t, char *); -void pf_qid_unref(u_int32_t); +u_int16_t pf_tagname2tag(char *); +void pf_tag2tagname(u_int16_t, char *); +void pf_tag_ref(u_int16_t); +void pf_tag_unref(u_int16_t); +int pf_tag_packet(struct mbuf *, struct pf_mtag *, int, int); +u_int32_t pf_qname2qid(char *); +void pf_qid2qname(u_int32_t, char *); +void pf_qid_unref(u_int32_t); +struct pf_mtag *pf_find_mtag(struct mbuf *); +struct pf_mtag *pf_get_mtag(struct mbuf *); extern struct pf_status pf_status; extern struct pool pf_frent_pl, pf_frag_pl; +extern struct rwlock pf_consistency_lock; struct pf_pool_limit { void *pp; @@ -1557,6 +1614,31 @@ extern struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX]; #endif /* _KERNEL */ +extern struct pf_anchor_global pf_anchors; +extern struct pf_anchor pf_main_anchor; +#define pf_main_ruleset pf_main_anchor.ruleset + +/* these ruleset functions can be linked into userland programs (pfctl) */ +int pf_get_ruleset_number(u_int8_t); +void pf_init_ruleset(struct pf_ruleset *); +int pf_anchor_setup(struct pf_rule *, + const struct pf_ruleset *, const char *); +int pf_anchor_copyout(const struct pf_ruleset *, + const struct pf_rule *, struct pfioc_rule *); +void pf_anchor_remove(struct pf_rule *); +void pf_remove_if_empty_ruleset(struct pf_ruleset *); +struct pf_anchor *pf_find_anchor(const char *); +struct pf_ruleset *pf_find_ruleset(const char *); +struct pf_ruleset *pf_find_or_create_ruleset(const char *); +void pf_rs_initialize(void); + +#ifdef _KERNEL +int pf_anchor_copyout(const struct pf_ruleset *, + const struct pf_rule *, struct pfioc_rule *); +void pf_anchor_remove(struct pf_rule *); + +#endif /* _KERNEL */ + /* The fingerprint functions can be linked into userland programs (tcpdump) */ int pf_osfp_add(struct pf_osfp_ioctl *); #ifdef _KERNEL @@ -1565,7 +1647,8 @@ struct pf_osfp_enlist * const struct tcphdr *); #endif /* _KERNEL */ struct pf_osfp_enlist * - pf_osfp_fingerprint_hdr(const struct ip *, const struct tcphdr *); + pf_osfp_fingerprint_hdr(const struct ip *, const struct ip6_hdr *, + const struct tcphdr *); void pf_osfp_flush(void); int pf_osfp_get(struct pf_osfp_ioctl *); void pf_osfp_initialize(void);