mmacy 7aeac9ef18 ifnet: Replace if_addr_lock rwlock with epoch + mutex
Run on LLNW canaries and tested by pho@

gallatin:
Using a 14-core, 28-HTT single socket E5-2697 v3 with a 40GbE MLX5
based ConnectX 4-LX NIC, I see an almost 12% improvement in received
packet rate, and a larger improvement in bytes delivered all the way
to userspace.

When the host receiving 64 streams of netperf -H $DUT -t UDP_STREAM -- -m 1,
I see, using nstat -I mce0 1 before the patch:

InMpps OMpps  InGbs  OGbs err TCP Est %CPU syscalls csw     irq GBfree
4.98   0.00   4.42   0.00 4235592     33   83.80 4720653 2149771   1235 247.32
4.73   0.00   4.20   0.00 4025260     33   82.99 4724900 2139833   1204 247.32
4.72   0.00   4.20   0.00 4035252     33   82.14 4719162 2132023   1264 247.32
4.71   0.00   4.21   0.00 4073206     33   83.68 4744973 2123317   1347 247.32
4.72   0.00   4.21   0.00 4061118     33   80.82 4713615 2188091   1490 247.32
4.72   0.00   4.21   0.00 4051675     33   85.29 4727399 2109011   1205 247.32
4.73   0.00   4.21   0.00 4039056     33   84.65 4724735 2102603   1053 247.32

After the patch

InMpps OMpps  InGbs  OGbs err TCP Est %CPU syscalls csw     irq GBfree
5.43   0.00   4.20   0.00 3313143     33   84.96 5434214 1900162   2656 245.51
5.43   0.00   4.20   0.00 3308527     33   85.24 5439695 1809382   2521 245.51
5.42   0.00   4.19   0.00 3316778     33   87.54 5416028 1805835   2256 245.51
5.42   0.00   4.19   0.00 3317673     33   90.44 5426044 1763056   2332 245.51
5.42   0.00   4.19   0.00 3314839     33   88.11 5435732 1792218   2499 245.52
5.44   0.00   4.19   0.00 3293228     33   91.84 5426301 1668597   2121 245.52

Similarly, netperf reports 230Mb/s before the patch, and 270Mb/s after the patch

Reviewed by:	gallatin
Sponsored by:	Limelight Networks
Differential Revision:	https://reviews.freebsd.org/D15366
2018-05-18 20:13:34 +00:00

907 lines
23 KiB
C

/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2001 Daniel Hartmeier
* Copyright (c) 2003 Cedric Berger
* Copyright (c) 2005 Henning Brauer <henning@openbsd.org>
* Copyright (c) 2005 Ryan McBride <mcbride@openbsd.org>
* Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* $OpenBSD: pf_if.c,v 1.54 2008/06/14 16:55:28 mk Exp $
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
#include "opt_inet6.h"
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/eventhandler.h>
#include <sys/lock.h>
#include <sys/mbuf.h>
#include <sys/rwlock.h>
#include <sys/socket.h>
#include <net/if.h>
#include <net/if_var.h>
#include <net/vnet.h>
#include <net/pfvar.h>
#include <net/route.h>
VNET_DEFINE(struct pfi_kif *, pfi_all);
static VNET_DEFINE(long, pfi_update);
#define V_pfi_update VNET(pfi_update)
#define PFI_BUFFER_MAX 0x10000
VNET_DECLARE(int, pf_vnet_active);
#define V_pf_vnet_active VNET(pf_vnet_active)
static VNET_DEFINE(struct pfr_addr *, pfi_buffer);
static VNET_DEFINE(int, pfi_buffer_cnt);
static VNET_DEFINE(int, pfi_buffer_max);
#define V_pfi_buffer VNET(pfi_buffer)
#define V_pfi_buffer_cnt VNET(pfi_buffer_cnt)
#define V_pfi_buffer_max VNET(pfi_buffer_max)
eventhandler_tag pfi_attach_cookie;
eventhandler_tag pfi_detach_cookie;
eventhandler_tag pfi_attach_group_cookie;
eventhandler_tag pfi_change_group_cookie;
eventhandler_tag pfi_detach_group_cookie;
eventhandler_tag pfi_ifaddr_event_cookie;
static void pfi_attach_ifnet(struct ifnet *);
static void pfi_attach_ifgroup(struct ifg_group *);
static void pfi_kif_update(struct pfi_kif *);
static void pfi_dynaddr_update(struct pfi_dynaddr *dyn);
static void pfi_table_update(struct pfr_ktable *, struct pfi_kif *, int,
int);
static void pfi_instance_add(struct ifnet *, int, int);
static void pfi_address_add(struct sockaddr *, int, int);
static int pfi_if_compare(struct pfi_kif *, struct pfi_kif *);
static int pfi_skip_if(const char *, struct pfi_kif *);
static int pfi_unmask(void *);
static void pfi_attach_ifnet_event(void * __unused, struct ifnet *);
static void pfi_detach_ifnet_event(void * __unused, struct ifnet *);
static void pfi_attach_group_event(void * __unused, struct ifg_group *);
static void pfi_change_group_event(void * __unused, char *);
static void pfi_detach_group_event(void * __unused, struct ifg_group *);
static void pfi_ifaddr_event(void * __unused, struct ifnet *);
RB_HEAD(pfi_ifhead, pfi_kif);
static RB_PROTOTYPE(pfi_ifhead, pfi_kif, pfik_tree, pfi_if_compare);
static RB_GENERATE(pfi_ifhead, pfi_kif, pfik_tree, pfi_if_compare);
static VNET_DEFINE(struct pfi_ifhead, pfi_ifs);
#define V_pfi_ifs VNET(pfi_ifs)
#define PFI_BUFFER_MAX 0x10000
MALLOC_DEFINE(PFI_MTYPE, "pf_ifnet", "pf(4) interface database");
LIST_HEAD(pfi_list, pfi_kif);
static VNET_DEFINE(struct pfi_list, pfi_unlinked_kifs);
#define V_pfi_unlinked_kifs VNET(pfi_unlinked_kifs)
static struct mtx pfi_unlnkdkifs_mtx;
MTX_SYSINIT(pfi_unlnkdkifs_mtx, &pfi_unlnkdkifs_mtx, "pf unlinked interfaces",
MTX_DEF);
void
pfi_initialize_vnet(void)
{
struct ifg_group *ifg;
struct ifnet *ifp;
struct pfi_kif *kif;
V_pfi_buffer_max = 64;
V_pfi_buffer = malloc(V_pfi_buffer_max * sizeof(*V_pfi_buffer),
PFI_MTYPE, M_WAITOK);
kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK);
PF_RULES_WLOCK();
V_pfi_all = pfi_kif_attach(kif, IFG_ALL);
PF_RULES_WUNLOCK();
IFNET_RLOCK();
TAILQ_FOREACH(ifg, &V_ifg_head, ifg_next)
pfi_attach_ifgroup(ifg);
TAILQ_FOREACH(ifp, &V_ifnet, if_link)
pfi_attach_ifnet(ifp);
IFNET_RUNLOCK();
}
void
pfi_initialize(void)
{
pfi_attach_cookie = EVENTHANDLER_REGISTER(ifnet_arrival_event,
pfi_attach_ifnet_event, NULL, EVENTHANDLER_PRI_ANY);
pfi_detach_cookie = EVENTHANDLER_REGISTER(ifnet_departure_event,
pfi_detach_ifnet_event, NULL, EVENTHANDLER_PRI_ANY);
pfi_attach_group_cookie = EVENTHANDLER_REGISTER(group_attach_event,
pfi_attach_group_event, NULL, EVENTHANDLER_PRI_ANY);
pfi_change_group_cookie = EVENTHANDLER_REGISTER(group_change_event,
pfi_change_group_event, NULL, EVENTHANDLER_PRI_ANY);
pfi_detach_group_cookie = EVENTHANDLER_REGISTER(group_detach_event,
pfi_detach_group_event, NULL, EVENTHANDLER_PRI_ANY);
pfi_ifaddr_event_cookie = EVENTHANDLER_REGISTER(ifaddr_event,
pfi_ifaddr_event, NULL, EVENTHANDLER_PRI_ANY);
}
void
pfi_cleanup_vnet(void)
{
struct pfi_kif *kif;
PF_RULES_WASSERT();
V_pfi_all = NULL;
while ((kif = RB_MIN(pfi_ifhead, &V_pfi_ifs))) {
RB_REMOVE(pfi_ifhead, &V_pfi_ifs, kif);
if (kif->pfik_group)
kif->pfik_group->ifg_pf_kif = NULL;
if (kif->pfik_ifp)
kif->pfik_ifp->if_pf_kif = NULL;
free(kif, PFI_MTYPE);
}
mtx_lock(&pfi_unlnkdkifs_mtx);
while ((kif = LIST_FIRST(&V_pfi_unlinked_kifs))) {
LIST_REMOVE(kif, pfik_list);
free(kif, PFI_MTYPE);
}
mtx_unlock(&pfi_unlnkdkifs_mtx);
free(V_pfi_buffer, PFI_MTYPE);
}
void
pfi_cleanup(void)
{
EVENTHANDLER_DEREGISTER(ifnet_arrival_event, pfi_attach_cookie);
EVENTHANDLER_DEREGISTER(ifnet_departure_event, pfi_detach_cookie);
EVENTHANDLER_DEREGISTER(group_attach_event, pfi_attach_group_cookie);
EVENTHANDLER_DEREGISTER(group_change_event, pfi_change_group_cookie);
EVENTHANDLER_DEREGISTER(group_detach_event, pfi_detach_group_cookie);
EVENTHANDLER_DEREGISTER(ifaddr_event, pfi_ifaddr_event_cookie);
}
struct pfi_kif *
pfi_kif_find(const char *kif_name)
{
struct pfi_kif_cmp s;
PF_RULES_ASSERT();
bzero(&s, sizeof(s));
strlcpy(s.pfik_name, kif_name, sizeof(s.pfik_name));
return (RB_FIND(pfi_ifhead, &V_pfi_ifs, (struct pfi_kif *)&s));
}
struct pfi_kif *
pfi_kif_attach(struct pfi_kif *kif, const char *kif_name)
{
struct pfi_kif *kif1;
PF_RULES_WASSERT();
KASSERT(kif != NULL, ("%s: null kif", __func__));
kif1 = pfi_kif_find(kif_name);
if (kif1 != NULL) {
free(kif, PFI_MTYPE);
return (kif1);
}
bzero(kif, sizeof(*kif));
strlcpy(kif->pfik_name, kif_name, sizeof(kif->pfik_name));
/*
* It seems that the value of time_second is in unintialzied state
* when pf sets interface statistics clear time in boot phase if pf
* was statically linked to kernel. Instead of setting the bogus
* time value have pfi_get_ifaces handle this case. In
* pfi_get_ifaces it uses time_second if it sees the time is 0.
*/
kif->pfik_tzero = time_second > 1 ? time_second : 0;
TAILQ_INIT(&kif->pfik_dynaddrs);
RB_INSERT(pfi_ifhead, &V_pfi_ifs, kif);
return (kif);
}
void
pfi_kif_ref(struct pfi_kif *kif)
{
PF_RULES_WASSERT();
kif->pfik_rulerefs++;
}
void
pfi_kif_unref(struct pfi_kif *kif)
{
PF_RULES_WASSERT();
KASSERT(kif->pfik_rulerefs > 0, ("%s: %p has zero refs", __func__, kif));
kif->pfik_rulerefs--;
if (kif->pfik_rulerefs > 0)
return;
/* kif referencing an existing ifnet or group should exist. */
if (kif->pfik_ifp != NULL || kif->pfik_group != NULL || kif == V_pfi_all)
return;
RB_REMOVE(pfi_ifhead, &V_pfi_ifs, kif);
kif->pfik_flags |= PFI_IFLAG_REFS;
mtx_lock(&pfi_unlnkdkifs_mtx);
LIST_INSERT_HEAD(&V_pfi_unlinked_kifs, kif, pfik_list);
mtx_unlock(&pfi_unlnkdkifs_mtx);
}
void
pfi_kif_purge(void)
{
struct pfi_kif *kif, *kif1;
/*
* Do naive mark-and-sweep garbage collecting of old kifs.
* Reference flag is raised by pf_purge_expired_states().
*/
mtx_lock(&pfi_unlnkdkifs_mtx);
LIST_FOREACH_SAFE(kif, &V_pfi_unlinked_kifs, pfik_list, kif1) {
if (!(kif->pfik_flags & PFI_IFLAG_REFS)) {
LIST_REMOVE(kif, pfik_list);
free(kif, PFI_MTYPE);
} else
kif->pfik_flags &= ~PFI_IFLAG_REFS;
}
mtx_unlock(&pfi_unlnkdkifs_mtx);
}
int
pfi_kif_match(struct pfi_kif *rule_kif, struct pfi_kif *packet_kif)
{
struct ifg_list *p;
if (rule_kif == NULL || rule_kif == packet_kif)
return (1);
if (rule_kif->pfik_group != NULL)
/* XXXGL: locking? */
TAILQ_FOREACH(p, &packet_kif->pfik_ifp->if_groups, ifgl_next)
if (p->ifgl_group == rule_kif->pfik_group)
return (1);
return (0);
}
static void
pfi_attach_ifnet(struct ifnet *ifp)
{
struct pfi_kif *kif;
kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK);
PF_RULES_WLOCK();
V_pfi_update++;
kif = pfi_kif_attach(kif, ifp->if_xname);
kif->pfik_ifp = ifp;
ifp->if_pf_kif = kif;
pfi_kif_update(kif);
PF_RULES_WUNLOCK();
}
static void
pfi_attach_ifgroup(struct ifg_group *ifg)
{
struct pfi_kif *kif;
kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK);
PF_RULES_WLOCK();
V_pfi_update++;
kif = pfi_kif_attach(kif, ifg->ifg_group);
kif->pfik_group = ifg;
ifg->ifg_pf_kif = kif;
PF_RULES_WUNLOCK();
}
int
pfi_match_addr(struct pfi_dynaddr *dyn, struct pf_addr *a, sa_family_t af)
{
switch (af) {
#ifdef INET
case AF_INET:
switch (dyn->pfid_acnt4) {
case 0:
return (0);
case 1:
return (PF_MATCHA(0, &dyn->pfid_addr4,
&dyn->pfid_mask4, a, AF_INET));
default:
return (pfr_match_addr(dyn->pfid_kt, a, AF_INET));
}
break;
#endif /* INET */
#ifdef INET6
case AF_INET6:
switch (dyn->pfid_acnt6) {
case 0:
return (0);
case 1:
return (PF_MATCHA(0, &dyn->pfid_addr6,
&dyn->pfid_mask6, a, AF_INET6));
default:
return (pfr_match_addr(dyn->pfid_kt, a, AF_INET6));
}
break;
#endif /* INET6 */
default:
return (0);
}
}
int
pfi_dynaddr_setup(struct pf_addr_wrap *aw, sa_family_t af)
{
struct pfi_dynaddr *dyn;
char tblname[PF_TABLE_NAME_SIZE];
struct pf_ruleset *ruleset = NULL;
struct pfi_kif *kif;
int rv = 0;
PF_RULES_WASSERT();
KASSERT(aw->type == PF_ADDR_DYNIFTL, ("%s: type %u",
__func__, aw->type));
KASSERT(aw->p.dyn == NULL, ("%s: dyn is %p", __func__, aw->p.dyn));
if ((dyn = malloc(sizeof(*dyn), PFI_MTYPE, M_NOWAIT | M_ZERO)) == NULL)
return (ENOMEM);
if ((kif = malloc(sizeof(*kif), PFI_MTYPE, M_NOWAIT)) == NULL) {
free(dyn, PFI_MTYPE);
return (ENOMEM);
}
if (!strcmp(aw->v.ifname, "self"))
dyn->pfid_kif = pfi_kif_attach(kif, IFG_ALL);
else
dyn->pfid_kif = pfi_kif_attach(kif, aw->v.ifname);
pfi_kif_ref(dyn->pfid_kif);
dyn->pfid_net = pfi_unmask(&aw->v.a.mask);
if (af == AF_INET && dyn->pfid_net == 32)
dyn->pfid_net = 128;
strlcpy(tblname, aw->v.ifname, sizeof(tblname));
if (aw->iflags & PFI_AFLAG_NETWORK)
strlcat(tblname, ":network", sizeof(tblname));
if (aw->iflags & PFI_AFLAG_BROADCAST)
strlcat(tblname, ":broadcast", sizeof(tblname));
if (aw->iflags & PFI_AFLAG_PEER)
strlcat(tblname, ":peer", sizeof(tblname));
if (aw->iflags & PFI_AFLAG_NOALIAS)
strlcat(tblname, ":0", sizeof(tblname));
if (dyn->pfid_net != 128)
snprintf(tblname + strlen(tblname),
sizeof(tblname) - strlen(tblname), "/%d", dyn->pfid_net);
if ((ruleset = pf_find_or_create_ruleset(PF_RESERVED_ANCHOR)) == NULL) {
rv = ENOMEM;
goto _bad;
}
if ((dyn->pfid_kt = pfr_attach_table(ruleset, tblname)) == NULL) {
rv = ENOMEM;
goto _bad;
}
dyn->pfid_kt->pfrkt_flags |= PFR_TFLAG_ACTIVE;
dyn->pfid_iflags = aw->iflags;
dyn->pfid_af = af;
TAILQ_INSERT_TAIL(&dyn->pfid_kif->pfik_dynaddrs, dyn, entry);
aw->p.dyn = dyn;
pfi_kif_update(dyn->pfid_kif);
return (0);
_bad:
if (dyn->pfid_kt != NULL)
pfr_detach_table(dyn->pfid_kt);
if (ruleset != NULL)
pf_remove_if_empty_ruleset(ruleset);
if (dyn->pfid_kif != NULL)
pfi_kif_unref(dyn->pfid_kif);
free(dyn, PFI_MTYPE);
return (rv);
}
static void
pfi_kif_update(struct pfi_kif *kif)
{
struct ifg_list *ifgl;
struct pfi_dynaddr *p;
PF_RULES_WASSERT();
/* update all dynaddr */
TAILQ_FOREACH(p, &kif->pfik_dynaddrs, entry)
pfi_dynaddr_update(p);
/* again for all groups kif is member of */
if (kif->pfik_ifp != NULL) {
IF_ADDR_RLOCK(kif->pfik_ifp);
TAILQ_FOREACH(ifgl, &kif->pfik_ifp->if_groups, ifgl_next)
pfi_kif_update((struct pfi_kif *)
ifgl->ifgl_group->ifg_pf_kif);
IF_ADDR_RUNLOCK(kif->pfik_ifp);
}
}
static void
pfi_dynaddr_update(struct pfi_dynaddr *dyn)
{
struct pfi_kif *kif;
struct pfr_ktable *kt;
PF_RULES_WASSERT();
KASSERT(dyn && dyn->pfid_kif && dyn->pfid_kt,
("%s: bad argument", __func__));
kif = dyn->pfid_kif;
kt = dyn->pfid_kt;
if (kt->pfrkt_larg != V_pfi_update) {
/* this table needs to be brought up-to-date */
pfi_table_update(kt, kif, dyn->pfid_net, dyn->pfid_iflags);
kt->pfrkt_larg = V_pfi_update;
}
pfr_dynaddr_update(kt, dyn);
}
static void
pfi_table_update(struct pfr_ktable *kt, struct pfi_kif *kif, int net, int flags)
{
int e, size2 = 0;
struct ifg_member *ifgm;
V_pfi_buffer_cnt = 0;
if (kif->pfik_ifp != NULL)
pfi_instance_add(kif->pfik_ifp, net, flags);
else if (kif->pfik_group != NULL) {
IFNET_RLOCK_NOSLEEP();
TAILQ_FOREACH(ifgm, &kif->pfik_group->ifg_members, ifgm_next)
pfi_instance_add(ifgm->ifgm_ifp, net, flags);
IFNET_RUNLOCK_NOSLEEP();
}
if ((e = pfr_set_addrs(&kt->pfrkt_t, V_pfi_buffer, V_pfi_buffer_cnt, &size2,
NULL, NULL, NULL, 0, PFR_TFLAG_ALLMASK)))
printf("%s: cannot set %d new addresses into table %s: %d\n",
__func__, V_pfi_buffer_cnt, kt->pfrkt_name, e);
}
static void
pfi_instance_add(struct ifnet *ifp, int net, int flags)
{
struct ifaddr *ia;
int got4 = 0, got6 = 0;
int net2, af;
IF_ADDR_RLOCK(ifp);
CK_STAILQ_FOREACH(ia, &ifp->if_addrhead, ifa_link) {
if (ia->ifa_addr == NULL)
continue;
af = ia->ifa_addr->sa_family;
if (af != AF_INET && af != AF_INET6)
continue;
/*
* XXX: For point-to-point interfaces, (ifname:0) and IPv4,
* jump over addresses without a proper route to work
* around a problem with ppp not fully removing the
* address used during IPCP.
*/
if ((ifp->if_flags & IFF_POINTOPOINT) &&
!(ia->ifa_flags & IFA_ROUTE) &&
(flags & PFI_AFLAG_NOALIAS) && (af == AF_INET))
continue;
if ((flags & PFI_AFLAG_BROADCAST) && af == AF_INET6)
continue;
if ((flags & PFI_AFLAG_BROADCAST) &&
!(ifp->if_flags & IFF_BROADCAST))
continue;
if ((flags & PFI_AFLAG_PEER) &&
!(ifp->if_flags & IFF_POINTOPOINT))
continue;
if ((flags & PFI_AFLAG_NETWORK) && af == AF_INET6 &&
IN6_IS_ADDR_LINKLOCAL(
&((struct sockaddr_in6 *)ia->ifa_addr)->sin6_addr))
continue;
if (flags & PFI_AFLAG_NOALIAS) {
if (af == AF_INET && got4)
continue;
if (af == AF_INET6 && got6)
continue;
}
if (af == AF_INET)
got4 = 1;
else if (af == AF_INET6)
got6 = 1;
net2 = net;
if (net2 == 128 && (flags & PFI_AFLAG_NETWORK)) {
if (af == AF_INET)
net2 = pfi_unmask(&((struct sockaddr_in *)
ia->ifa_netmask)->sin_addr);
else if (af == AF_INET6)
net2 = pfi_unmask(&((struct sockaddr_in6 *)
ia->ifa_netmask)->sin6_addr);
}
if (af == AF_INET && net2 > 32)
net2 = 32;
if (flags & PFI_AFLAG_BROADCAST)
pfi_address_add(ia->ifa_broadaddr, af, net2);
else if (flags & PFI_AFLAG_PEER)
pfi_address_add(ia->ifa_dstaddr, af, net2);
else
pfi_address_add(ia->ifa_addr, af, net2);
}
IF_ADDR_RUNLOCK(ifp);
}
static void
pfi_address_add(struct sockaddr *sa, int af, int net)
{
struct pfr_addr *p;
int i;
if (V_pfi_buffer_cnt >= V_pfi_buffer_max) {
int new_max = V_pfi_buffer_max * 2;
if (new_max > PFI_BUFFER_MAX) {
printf("%s: address buffer full (%d/%d)\n", __func__,
V_pfi_buffer_cnt, PFI_BUFFER_MAX);
return;
}
p = malloc(new_max * sizeof(*V_pfi_buffer), PFI_MTYPE,
M_NOWAIT);
if (p == NULL) {
printf("%s: no memory to grow buffer (%d/%d)\n",
__func__, V_pfi_buffer_cnt, PFI_BUFFER_MAX);
return;
}
memcpy(p, V_pfi_buffer, V_pfi_buffer_max * sizeof(*V_pfi_buffer));
/* no need to zero buffer */
free(V_pfi_buffer, PFI_MTYPE);
V_pfi_buffer = p;
V_pfi_buffer_max = new_max;
}
if (af == AF_INET && net > 32)
net = 128;
p = V_pfi_buffer + V_pfi_buffer_cnt++;
bzero(p, sizeof(*p));
p->pfra_af = af;
p->pfra_net = net;
if (af == AF_INET)
p->pfra_ip4addr = ((struct sockaddr_in *)sa)->sin_addr;
else if (af == AF_INET6) {
p->pfra_ip6addr = ((struct sockaddr_in6 *)sa)->sin6_addr;
if (IN6_IS_SCOPE_EMBED(&p->pfra_ip6addr))
p->pfra_ip6addr.s6_addr16[1] = 0;
}
/* mask network address bits */
if (net < 128)
((caddr_t)p)[p->pfra_net/8] &= ~(0xFF >> (p->pfra_net%8));
for (i = (p->pfra_net+7)/8; i < sizeof(p->pfra_u); i++)
((caddr_t)p)[i] = 0;
}
void
pfi_dynaddr_remove(struct pfi_dynaddr *dyn)
{
KASSERT(dyn->pfid_kif != NULL, ("%s: null pfid_kif", __func__));
KASSERT(dyn->pfid_kt != NULL, ("%s: null pfid_kt", __func__));
TAILQ_REMOVE(&dyn->pfid_kif->pfik_dynaddrs, dyn, entry);
pfi_kif_unref(dyn->pfid_kif);
pfr_detach_table(dyn->pfid_kt);
free(dyn, PFI_MTYPE);
}
void
pfi_dynaddr_copyout(struct pf_addr_wrap *aw)
{
KASSERT(aw->type == PF_ADDR_DYNIFTL,
("%s: type %u", __func__, aw->type));
if (aw->p.dyn == NULL || aw->p.dyn->pfid_kif == NULL)
return;
aw->p.dyncnt = aw->p.dyn->pfid_acnt4 + aw->p.dyn->pfid_acnt6;
}
static int
pfi_if_compare(struct pfi_kif *p, struct pfi_kif *q)
{
return (strncmp(p->pfik_name, q->pfik_name, IFNAMSIZ));
}
void
pfi_update_status(const char *name, struct pf_status *pfs)
{
struct pfi_kif *p;
struct pfi_kif_cmp key;
struct ifg_member p_member, *ifgm;
TAILQ_HEAD(, ifg_member) ifg_members;
int i, j, k;
strlcpy(key.pfik_name, name, sizeof(key.pfik_name));
p = RB_FIND(pfi_ifhead, &V_pfi_ifs, (struct pfi_kif *)&key);
if (p == NULL)
return;
if (p->pfik_group != NULL) {
bcopy(&p->pfik_group->ifg_members, &ifg_members,
sizeof(ifg_members));
} else {
/* build a temporary list for p only */
bzero(&p_member, sizeof(p_member));
p_member.ifgm_ifp = p->pfik_ifp;
TAILQ_INIT(&ifg_members);
TAILQ_INSERT_TAIL(&ifg_members, &p_member, ifgm_next);
}
if (pfs) {
bzero(pfs->pcounters, sizeof(pfs->pcounters));
bzero(pfs->bcounters, sizeof(pfs->bcounters));
}
TAILQ_FOREACH(ifgm, &ifg_members, ifgm_next) {
if (ifgm->ifgm_ifp == NULL || ifgm->ifgm_ifp->if_pf_kif == NULL)
continue;
p = (struct pfi_kif *)ifgm->ifgm_ifp->if_pf_kif;
/* just clear statistics */
if (pfs == NULL) {
bzero(p->pfik_packets, sizeof(p->pfik_packets));
bzero(p->pfik_bytes, sizeof(p->pfik_bytes));
p->pfik_tzero = time_second;
continue;
}
for (i = 0; i < 2; i++)
for (j = 0; j < 2; j++)
for (k = 0; k < 2; k++) {
pfs->pcounters[i][j][k] +=
p->pfik_packets[i][j][k];
pfs->bcounters[i][j] +=
p->pfik_bytes[i][j][k];
}
}
}
void
pfi_get_ifaces(const char *name, struct pfi_kif *buf, int *size)
{
struct pfi_kif *p, *nextp;
int n = 0;
for (p = RB_MIN(pfi_ifhead, &V_pfi_ifs); p; p = nextp) {
nextp = RB_NEXT(pfi_ifhead, &V_pfi_ifs, p);
if (pfi_skip_if(name, p))
continue;
if (*size <= n++)
break;
if (!p->pfik_tzero)
p->pfik_tzero = time_second;
bcopy(p, buf++, sizeof(*buf));
nextp = RB_NEXT(pfi_ifhead, &V_pfi_ifs, p);
}
*size = n;
}
static int
pfi_skip_if(const char *filter, struct pfi_kif *p)
{
int n;
if (filter == NULL || !*filter)
return (0);
if (!strcmp(p->pfik_name, filter))
return (0); /* exact match */
n = strlen(filter);
if (n < 1 || n >= IFNAMSIZ)
return (1); /* sanity check */
if (filter[n-1] >= '0' && filter[n-1] <= '9')
return (1); /* only do exact match in that case */
if (strncmp(p->pfik_name, filter, n))
return (1); /* prefix doesn't match */
return (p->pfik_name[n] < '0' || p->pfik_name[n] > '9');
}
int
pfi_set_flags(const char *name, int flags)
{
struct pfi_kif *p;
RB_FOREACH(p, pfi_ifhead, &V_pfi_ifs) {
if (pfi_skip_if(name, p))
continue;
p->pfik_flags |= flags;
}
return (0);
}
int
pfi_clear_flags(const char *name, int flags)
{
struct pfi_kif *p;
RB_FOREACH(p, pfi_ifhead, &V_pfi_ifs) {
if (pfi_skip_if(name, p))
continue;
p->pfik_flags &= ~flags;
}
return (0);
}
/* from pf_print_state.c */
static int
pfi_unmask(void *addr)
{
struct pf_addr *m = addr;
int i = 31, j = 0, b = 0;
u_int32_t tmp;
while (j < 4 && m->addr32[j] == 0xffffffff) {
b += 32;
j++;
}
if (j < 4) {
tmp = ntohl(m->addr32[j]);
for (i = 31; tmp & (1 << i); --i)
b++;
}
return (b);
}
static void
pfi_attach_ifnet_event(void *arg __unused, struct ifnet *ifp)
{
if (V_pf_vnet_active == 0) {
/* Avoid teardown race in the least expensive way. */
return;
}
pfi_attach_ifnet(ifp);
#ifdef ALTQ
PF_RULES_WLOCK();
pf_altq_ifnet_event(ifp, 0);
PF_RULES_WUNLOCK();
#endif
}
static void
pfi_detach_ifnet_event(void *arg __unused, struct ifnet *ifp)
{
struct pfi_kif *kif = (struct pfi_kif *)ifp->if_pf_kif;
if (kif == NULL)
return;
if (V_pf_vnet_active == 0) {
/* Avoid teardown race in the least expensive way. */
return;
}
PF_RULES_WLOCK();
V_pfi_update++;
pfi_kif_update(kif);
kif->pfik_ifp = NULL;
ifp->if_pf_kif = NULL;
#ifdef ALTQ
pf_altq_ifnet_event(ifp, 1);
#endif
PF_RULES_WUNLOCK();
}
static void
pfi_attach_group_event(void *arg __unused, struct ifg_group *ifg)
{
if (V_pf_vnet_active == 0) {
/* Avoid teardown race in the least expensive way. */
return;
}
pfi_attach_ifgroup(ifg);
}
static void
pfi_change_group_event(void *arg __unused, char *gname)
{
struct pfi_kif *kif;
if (V_pf_vnet_active == 0) {
/* Avoid teardown race in the least expensive way. */
return;
}
kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK);
PF_RULES_WLOCK();
V_pfi_update++;
kif = pfi_kif_attach(kif, gname);
pfi_kif_update(kif);
PF_RULES_WUNLOCK();
}
static void
pfi_detach_group_event(void *arg __unused, struct ifg_group *ifg)
{
struct pfi_kif *kif = (struct pfi_kif *)ifg->ifg_pf_kif;
if (kif == NULL)
return;
if (V_pf_vnet_active == 0) {
/* Avoid teardown race in the least expensive way. */
return;
}
PF_RULES_WLOCK();
V_pfi_update++;
kif->pfik_group = NULL;
ifg->ifg_pf_kif = NULL;
PF_RULES_WUNLOCK();
}
static void
pfi_ifaddr_event(void *arg __unused, struct ifnet *ifp)
{
if (ifp->if_pf_kif == NULL)
return;
if (V_pf_vnet_active == 0) {
/* Avoid teardown race in the least expensive way. */
return;
}
PF_RULES_WLOCK();
if (ifp && ifp->if_pf_kif) {
V_pfi_update++;
pfi_kif_update(ifp->if_pf_kif);
}
PF_RULES_WUNLOCK();
}