Update pf(4) and pflog(4) to survive basic VNET testing, which includes

proper virtualisation, teardown, avoiding use-after-free, race conditions,
no longer creating a thread per VNET (which could easily be a couple of
thousand threads), gracefully ignoring global events (e.g., eventhandlers)
on teardown, clearing various globally cached pointers and checking
them before use.

Reviewed by:		kp
Approved by:		re (gjb)
Sponsored by:		The FreeBSD Foundation
MFC after:		2 weeks
Differential Revision:	https://reviews.freebsd.org/D6924
This commit is contained in:
Bjoern A. Zeeb 2016-06-23 21:34:38 +00:00
parent 42644eb88e
commit a0429b5459
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=302156
5 changed files with 254 additions and 117 deletions

View File

@ -835,7 +835,6 @@ typedef int pflog_packet_t(struct pfi_kif *, struct mbuf *, sa_family_t,
struct pf_ruleset *, struct pf_pdesc *, int);
extern pflog_packet_t *pflog_packet_ptr;
#define V_pf_end_threads VNET(pf_end_threads)
#endif /* _KERNEL */
#define PFSYNC_FLAG_SRCNODE 0x04
@ -1520,6 +1519,7 @@ VNET_DECLARE(uma_zone_t, pf_state_scrub_z);
#define V_pf_state_scrub_z VNET(pf_state_scrub_z)
extern void pf_purge_thread(void *);
extern void pf_unload_vnet_purge(void);
extern void pf_intr(void *);
extern void pf_purge_expired_src_nodes(void);
@ -1661,7 +1661,9 @@ VNET_DECLARE(struct pfi_kif *, pfi_all);
#define V_pfi_all VNET(pfi_all)
void pfi_initialize(void);
void pfi_initialize_vnet(void);
void pfi_cleanup(void);
void pfi_cleanup_vnet(void);
void pfi_kif_ref(struct pfi_kif *);
void pfi_kif_unref(struct pfi_kif *);
struct pfi_kif *pfi_kif_find(const char *);

View File

@ -91,19 +91,22 @@ static int pflogioctl(struct ifnet *, u_long, caddr_t);
static void pflogstart(struct ifnet *);
static int pflog_clone_create(struct if_clone *, int, caddr_t);
static void pflog_clone_destroy(struct ifnet *);
static struct if_clone *pflog_cloner;
static const char pflogname[] = "pflog";
struct ifnet *pflogifs[PFLOGIFS_MAX]; /* for fast access */
static VNET_DEFINE(struct if_clone *, pflog_cloner);
#define V_pflog_cloner VNET(pflog_cloner)
VNET_DEFINE(struct ifnet *, pflogifs[PFLOGIFS_MAX]); /* for fast access */
#define V_pflogifs VNET(pflogifs)
static void
pflogattach(int npflog)
pflogattach(int npflog __unused)
{
int i;
for (i = 0; i < PFLOGIFS_MAX; i++)
pflogifs[i] = NULL;
pflog_cloner = if_clone_simple(pflogname, pflog_clone_create,
V_pflogifs[i] = NULL;
V_pflog_cloner = if_clone_simple(pflogname, pflog_clone_create,
pflog_clone_destroy, 1);
}
@ -130,7 +133,7 @@ pflog_clone_create(struct if_clone *ifc, int unit, caddr_t param)
bpfattach(ifp, DLT_PFLOG, PFLOG_HDRLEN);
pflogifs[unit] = ifp;
V_pflogifs[unit] = ifp;
return (0);
}
@ -141,8 +144,8 @@ pflog_clone_destroy(struct ifnet *ifp)
int i;
for (i = 0; i < PFLOGIFS_MAX; i++)
if (pflogifs[i] == ifp)
pflogifs[i] = NULL;
if (V_pflogifs[i] == ifp)
V_pflogifs[i] = NULL;
bpfdetach(ifp);
if_detach(ifp);
@ -206,7 +209,7 @@ pflog_packet(struct pfi_kif *kif, struct mbuf *m, sa_family_t af, u_int8_t dir,
if (kif == NULL || m == NULL || rm == NULL || pd == NULL)
return ( 1);
if ((ifn = pflogifs[rm->logif]) == NULL || !ifn->if_bpf)
if ((ifn = V_pflogifs[rm->logif]) == NULL || !ifn->if_bpf)
return (0);
bzero(&hdr, sizeof(hdr));
@ -259,6 +262,24 @@ pflog_packet(struct pfi_kif *kif, struct mbuf *m, sa_family_t af, u_int8_t dir,
return (0);
}
static void
vnet_pflog_init(const void *unused __unused)
{
pflogattach(1);
}
VNET_SYSINIT(vnet_pflog_init, SI_SUB_PSEUDO, SI_ORDER_ANY,
vnet_pflog_init, NULL);
static void
vnet_pflog_uninit(const void *unused __unused)
{
if_clone_detach(V_pflog_cloner);
}
VNET_SYSUNINIT(vnet_pflog_uninit, SI_SUB_INIT_IF, SI_ORDER_SECOND,
vnet_pflog_uninit, NULL);
static int
pflog_modevent(module_t mod, int type, void *data)
{
@ -266,7 +287,6 @@ pflog_modevent(module_t mod, int type, void *data)
switch (type) {
case MOD_LOAD:
pflogattach(1);
PF_RULES_WLOCK();
pflog_packet_ptr = pflog_packet;
PF_RULES_WUNLOCK();
@ -275,10 +295,9 @@ pflog_modevent(module_t mod, int type, void *data)
PF_RULES_WLOCK();
pflog_packet_ptr = NULL;
PF_RULES_WUNLOCK();
if_clone_detach(pflog_cloner);
break;
default:
error = EINVAL;
error = EOPNOTSUPP;
break;
}

View File

@ -299,7 +299,7 @@ static void pf_route6(struct mbuf **, struct pf_rule *, int,
int in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len);
VNET_DECLARE(int, pf_end_threads);
extern int pf_end_threads;
VNET_DEFINE(struct pf_limit, pf_limits[PF_LIMIT_MAX]);
@ -1421,51 +1421,25 @@ pf_intr(void *v)
}
void
pf_purge_thread(void *v)
pf_purge_thread(void *unused __unused)
{
VNET_ITERATOR_DECL(vnet_iter);
u_int idx = 0;
CURVNET_SET((struct vnet *)v);
for (;;) {
PF_RULES_RLOCK();
rw_sleep(pf_purge_thread, &pf_rules_lock, 0, "pftm", hz / 10);
PF_RULES_RUNLOCK();
if (V_pf_end_threads) {
/*
* To cleanse up all kifs and rules we need
* two runs: first one clears reference flags,
* then pf_purge_expired_states() doesn't
* raise them, and then second run frees.
*/
PF_RULES_RUNLOCK();
pf_purge_unlinked_rules();
pfi_kif_purge();
VNET_LIST_RLOCK();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
/*
* Now purge everything.
*/
pf_purge_expired_states(0, pf_hashmask);
pf_purge_expired_fragments();
pf_purge_expired_src_nodes();
/*
* Now all kifs & rules should be unreferenced,
* thus should be successfully freed.
*/
pf_purge_unlinked_rules();
pfi_kif_purge();
/*
* Announce success and exit.
*/
PF_RULES_RLOCK();
V_pf_end_threads++;
PF_RULES_RUNLOCK();
if (pf_end_threads) {
pf_end_threads++;
wakeup(pf_purge_thread);
kproc_exit(0);
}
PF_RULES_RUNLOCK();
/* Process 1/interval fraction of the state table every run. */
idx = pf_purge_expired_states(idx, pf_hashmask /
@ -1483,11 +1457,42 @@ pf_purge_thread(void *v)
pf_purge_unlinked_rules();
pfi_kif_purge();
}
CURVNET_RESTORE();
}
VNET_LIST_RUNLOCK();
}
/* not reached */
CURVNET_RESTORE();
}
void
pf_unload_vnet_purge(void)
{
/*
* To cleanse up all kifs and rules we need
* two runs: first one clears reference flags,
* then pf_purge_expired_states() doesn't
* raise them, and then second run frees.
*/
pf_purge_unlinked_rules();
pfi_kif_purge();
/*
* Now purge everything.
*/
pf_purge_expired_states(0, pf_hashmask);
pf_purge_expired_fragments();
pf_purge_expired_src_nodes();
/*
* Now all kifs & rules should be unreferenced,
* thus should be successfully freed.
*/
pf_purge_unlinked_rules();
pfi_kif_purge();
}
u_int32_t
pf_state_expires(const struct pf_state *state)
{

View File

@ -58,6 +58,9 @@ static VNET_DEFINE(long, pfi_update);
#define V_pfi_update VNET(pfi_update)
#define PFI_BUFFER_MAX 0x10000
VNET_DECLARE(int, pf_vnet_active);
#define V_pf_vnet_active VNET(pf_vnet_active)
static VNET_DEFINE(struct pfr_addr *, pfi_buffer);
static VNET_DEFINE(int, pfi_buffer_cnt);
static VNET_DEFINE(int, pfi_buffer_max);
@ -108,7 +111,7 @@ MTX_SYSINIT(pfi_unlnkdkifs_mtx, &pfi_unlnkdkifs_mtx, "pf unlinked interfaces",
MTX_DEF);
void
pfi_initialize(void)
pfi_initialize_vnet(void)
{
struct ifg_group *ifg;
struct ifnet *ifp;
@ -129,6 +132,11 @@ pfi_initialize(void)
TAILQ_FOREACH(ifp, &V_ifnet, if_link)
pfi_attach_ifnet(ifp);
IFNET_RUNLOCK();
}
void
pfi_initialize(void)
{
pfi_attach_cookie = EVENTHANDLER_REGISTER(ifnet_arrival_event,
pfi_attach_ifnet_event, NULL, EVENTHANDLER_PRI_ANY);
@ -144,10 +152,36 @@ pfi_initialize(void)
pfi_ifaddr_event, NULL, EVENTHANDLER_PRI_ANY);
}
void
pfi_cleanup_vnet(void)
{
struct pfi_kif *kif;
PF_RULES_WASSERT();
V_pfi_all = NULL;
while ((kif = RB_MIN(pfi_ifhead, &V_pfi_ifs))) {
RB_REMOVE(pfi_ifhead, &V_pfi_ifs, kif);
if (kif->pfik_group)
kif->pfik_group->ifg_pf_kif = NULL;
if (kif->pfik_ifp)
kif->pfik_ifp->if_pf_kif = NULL;
free(kif, PFI_MTYPE);
}
mtx_lock(&pfi_unlnkdkifs_mtx);
while ((kif = LIST_FIRST(&V_pfi_unlinked_kifs))) {
LIST_REMOVE(kif, pfik_list);
free(kif, PFI_MTYPE);
}
mtx_unlock(&pfi_unlnkdkifs_mtx);
free(V_pfi_buffer, PFI_MTYPE);
}
void
pfi_cleanup(void)
{
struct pfi_kif *p;
EVENTHANDLER_DEREGISTER(ifnet_arrival_event, pfi_attach_cookie);
EVENTHANDLER_DEREGISTER(ifnet_departure_event, pfi_detach_cookie);
@ -155,19 +189,6 @@ pfi_cleanup(void)
EVENTHANDLER_DEREGISTER(group_change_event, pfi_change_group_cookie);
EVENTHANDLER_DEREGISTER(group_detach_event, pfi_detach_group_cookie);
EVENTHANDLER_DEREGISTER(ifaddr_event, pfi_ifaddr_event_cookie);
V_pfi_all = NULL;
while ((p = RB_MIN(pfi_ifhead, &V_pfi_ifs))) {
RB_REMOVE(pfi_ifhead, &V_pfi_ifs, p);
free(p, PFI_MTYPE);
}
while ((p = LIST_FIRST(&V_pfi_unlinked_kifs))) {
LIST_REMOVE(p, pfik_list);
free(p, PFI_MTYPE);
}
free(V_pfi_buffer, PFI_MTYPE);
}
struct pfi_kif *
@ -668,7 +689,7 @@ pfi_update_status(const char *name, struct pf_status *pfs)
bzero(pfs->bcounters, sizeof(pfs->bcounters));
}
TAILQ_FOREACH(ifgm, &ifg_members, ifgm_next) {
if (ifgm->ifgm_ifp == NULL)
if (ifgm->ifgm_ifp == NULL || ifgm->ifgm_ifp->if_pf_kif == NULL)
continue;
p = (struct pfi_kif *)ifgm->ifgm_ifp->if_pf_kif;
@ -780,6 +801,11 @@ pfi_attach_ifnet_event(void *arg __unused, struct ifnet *ifp)
{
CURVNET_SET(ifp->if_vnet);
if (V_pf_vnet_active == 0) {
/* Avoid teardown race in the least expensive way. */
CURVNET_RESTORE();
return;
}
pfi_attach_ifnet(ifp);
#ifdef ALTQ
PF_RULES_WLOCK();
@ -794,7 +820,15 @@ pfi_detach_ifnet_event(void *arg __unused, struct ifnet *ifp)
{
struct pfi_kif *kif = (struct pfi_kif *)ifp->if_pf_kif;
if (kif == NULL)
return;
CURVNET_SET(ifp->if_vnet);
if (V_pf_vnet_active == 0) {
/* Avoid teardown race in the least expensive way. */
CURVNET_RESTORE();
return;
}
PF_RULES_WLOCK();
V_pfi_update++;
pfi_kif_update(kif);
@ -813,6 +847,11 @@ pfi_attach_group_event(void *arg , struct ifg_group *ifg)
{
CURVNET_SET((struct vnet *)arg);
if (V_pf_vnet_active == 0) {
/* Avoid teardown race in the least expensive way. */
CURVNET_RESTORE();
return;
}
pfi_attach_ifgroup(ifg);
CURVNET_RESTORE();
}
@ -822,9 +861,14 @@ pfi_change_group_event(void *arg, char *gname)
{
struct pfi_kif *kif;
kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK);
CURVNET_SET((struct vnet *)arg);
if (V_pf_vnet_active == 0) {
/* Avoid teardown race in the least expensive way. */
CURVNET_RESTORE();
return;
}
kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK);
PF_RULES_WLOCK();
V_pfi_update++;
kif = pfi_kif_attach(kif, gname);
@ -838,7 +882,15 @@ pfi_detach_group_event(void *arg, struct ifg_group *ifg)
{
struct pfi_kif *kif = (struct pfi_kif *)ifg->ifg_pf_kif;
if (kif == NULL)
return;
CURVNET_SET((struct vnet *)arg);
if (V_pf_vnet_active == 0) {
/* Avoid teardown race in the least expensive way. */
CURVNET_RESTORE();
return;
}
PF_RULES_WLOCK();
V_pfi_update++;
@ -851,8 +903,15 @@ pfi_detach_group_event(void *arg, struct ifg_group *ifg)
static void
pfi_ifaddr_event(void *arg __unused, struct ifnet *ifp)
{
if (ifp->if_pf_kif == NULL)
return;
CURVNET_SET(ifp->if_vnet);
if (V_pf_vnet_active == 0) {
/* Avoid teardown race in the least expensive way. */
CURVNET_RESTORE();
return;
}
PF_RULES_WLOCK();
if (ifp && ifp->if_pf_kif) {
V_pfi_update++;

View File

@ -87,7 +87,6 @@ __FBSDID("$FreeBSD$");
#include <net/altq/altq.h>
#endif
static int pfattach(void);
static struct pf_pool *pf_get_pool(char *, u_int32_t, u_int8_t, u_int32_t,
u_int8_t, u_int8_t, u_int8_t);
@ -189,7 +188,16 @@ static struct cdevsw pf_cdevsw = {
static volatile VNET_DEFINE(int, pf_pfil_hooked);
#define V_pf_pfil_hooked VNET(pf_pfil_hooked)
VNET_DEFINE(int, pf_end_threads);
/*
* We need a flag that is neither hooked nor running to know when
* the VNET is "valid". We primarily need this to control (global)
* external event, e.g., eventhandlers.
*/
VNET_DEFINE(int, pf_vnet_active);
#define V_pf_vnet_active VNET(pf_vnet_active)
int pf_end_threads;
struct rwlock pf_rules_lock;
struct sx pf_ioctl_lock;
@ -204,17 +212,14 @@ pfsync_defer_t *pfsync_defer_ptr = NULL;
/* pflog */
pflog_packet_t *pflog_packet_ptr = NULL;
static int
pfattach(void)
static void
pfattach_vnet(void)
{
u_int32_t *my_timeout = V_pf_default_rule.timeout;
int error;
if (IS_DEFAULT_VNET(curvnet))
pf_mtag_initialize();
pf_initialize();
pfr_initialize();
pfi_initialize();
pfi_initialize_vnet();
pf_normalize_init();
V_pf_limits[PF_LIMIT_STATES].limit = PFSTATE_HIWAT;
@ -276,18 +281,13 @@ pfattach(void)
for (int i = 0; i < SCNT_MAX; i++)
V_pf_status.scounters[i] = counter_u64_alloc(M_WAITOK);
if ((error = kproc_create(pf_purge_thread, curvnet, NULL, 0, 0,
"pf purge")) != 0)
if (swi_add(NULL, "pf send", pf_intr, curvnet, SWI_NET,
INTR_MPSAFE, &V_pf_swi_cookie) != 0)
/* XXXGL: leaked all above. */
return (error);
if ((error = swi_add(NULL, "pf send", pf_intr, curvnet, SWI_NET,
INTR_MPSAFE, &V_pf_swi_cookie)) != 0)
/* XXXGL: leaked all above. */
return (error);
return (0);
return;
}
static struct pf_pool *
pf_get_pool(char *anchor, u_int32_t ticket, u_int8_t rule_action,
u_int32_t rule_number, u_int8_t r_last, u_int8_t active,
@ -3480,21 +3480,6 @@ shutdown_pf(void)
u_int32_t t[5];
char nn = '\0';
V_pf_status.running = 0;
counter_u64_free(V_pf_default_rule.states_cur);
counter_u64_free(V_pf_default_rule.states_tot);
counter_u64_free(V_pf_default_rule.src_nodes);
for (int i = 0; i < PFRES_MAX; i++)
counter_u64_free(V_pf_status.counters[i]);
for (int i = 0; i < LCNT_MAX; i++)
counter_u64_free(V_pf_status.lcounters[i]);
for (int i = 0; i < FCNT_MAX; i++)
counter_u64_free(V_pf_status.fcounters[i]);
for (int i = 0; i < SCNT_MAX; i++)
counter_u64_free(V_pf_status.scounters[i]);
do {
if ((error = pf_begin_rules(&t[0], PF_RULESET_SCRUB, &nn))
!= 0) {
@ -3546,6 +3531,20 @@ shutdown_pf(void)
/* status does not use malloced mem so no need to cleanup */
/* fingerprints and interfaces have their own cleanup code */
/* Free counters last as we updated them during shutdown. */
counter_u64_free(V_pf_default_rule.states_cur);
counter_u64_free(V_pf_default_rule.states_tot);
counter_u64_free(V_pf_default_rule.src_nodes);
for (int i = 0; i < PFRES_MAX; i++)
counter_u64_free(V_pf_status.counters[i]);
for (int i = 0; i < LCNT_MAX; i++)
counter_u64_free(V_pf_status.lcounters[i]);
for (int i = 0; i < FCNT_MAX; i++)
counter_u64_free(V_pf_status.fcounters[i]);
for (int i = 0; i < SCNT_MAX; i++)
counter_u64_free(V_pf_status.scounters[i]);
} while(0);
return (error);
@ -3697,39 +3696,55 @@ dehook_pf(void)
return (0);
}
static int
pf_load(void)
static void
pf_load_vnet(void)
{
int error;
VNET_ITERATOR_DECL(vnet_iter);
VNET_LIST_RLOCK();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
V_pf_pfil_hooked = 0;
V_pf_end_threads = 0;
TAILQ_INIT(&V_pf_tags);
TAILQ_INIT(&V_pf_qids);
CURVNET_RESTORE();
}
VNET_LIST_RUNLOCK();
pfattach_vnet();
V_pf_vnet_active = 1;
}
static int
pf_load(void)
{
int error;
rw_init(&pf_rules_lock, "pf rulesets");
sx_init(&pf_ioctl_lock, "pf ioctl");
pf_mtag_initialize();
pf_dev = make_dev(&pf_cdevsw, 0, 0, 0, 0600, PF_NAME);
if ((error = pfattach()) != 0)
if (pf_dev == NULL)
return (ENOMEM);
pf_end_threads = 0;
error = kproc_create(pf_purge_thread, NULL, NULL, 0, 0, "pf purge");
if (error != 0)
return (error);
pfi_initialize();
return (0);
}
static int
pf_unload(void)
static void
pf_unload_vnet()
{
int error = 0;
int error;
V_pf_vnet_active = 0;
V_pf_status.running = 0;
swi_remove(V_pf_swi_cookie);
error = dehook_pf();
@ -3740,30 +3755,67 @@ pf_unload(void)
* a message like 'No such process'.
*/
printf("%s : pfil unregisteration fail\n", __FUNCTION__);
return error;
return;
}
pf_unload_vnet_purge();
PF_RULES_WLOCK();
shutdown_pf();
V_pf_end_threads = 1;
while (V_pf_end_threads < 2) {
wakeup_one(pf_purge_thread);
rw_sleep(pf_purge_thread, &pf_rules_lock, 0, "pftmo", 0);
}
PF_RULES_WUNLOCK();
pf_normalize_cleanup();
pfi_cleanup();
PF_RULES_WLOCK();
pfi_cleanup_vnet();
PF_RULES_WUNLOCK();
pfr_cleanup();
pf_osfp_flush();
pf_cleanup();
if (IS_DEFAULT_VNET(curvnet))
pf_mtag_cleanup();
destroy_dev(pf_dev);
}
static int
pf_unload(void)
{
int error = 0;
pf_end_threads = 1;
while (pf_end_threads < 2) {
wakeup_one(pf_purge_thread);
rw_sleep(pf_purge_thread, &pf_rules_lock, 0, "pftmo", 0);
}
if (pf_dev != NULL)
destroy_dev(pf_dev);
pfi_cleanup();
rw_destroy(&pf_rules_lock);
sx_destroy(&pf_ioctl_lock);
return (error);
}
static void
vnet_pf_init(void *unused __unused)
{
pf_load_vnet();
}
VNET_SYSINIT(vnet_pf_init, SI_SUB_PROTO_FIREWALL, SI_ORDER_THIRD,
vnet_pf_init, NULL);
static void
vnet_pf_uninit(const void *unused __unused)
{
pf_unload_vnet();
}
VNET_SYSUNINIT(vnet_pf_uninit, SI_SUB_PROTO_FIREWALL, SI_ORDER_THIRD,
vnet_pf_uninit, NULL);
static int
pf_modevent(module_t mod, int type, void *data)
{
@ -3796,5 +3848,5 @@ static moduledata_t pf_mod = {
0
};
DECLARE_MODULE(pf, pf_mod, SI_SUB_PROTO_FIREWALL, SI_ORDER_FIRST);
DECLARE_MODULE(pf, pf_mod, SI_SUB_PROTO_FIREWALL, SI_ORDER_SECOND);
MODULE_VERSION(pf, PF_MODVER);