When connection rate hits and we overload a source to a table,

we are actually editing table, which means editing rules,
thus we need writer access to 'em.

Fix this by offloading the update of table to the same taskqueue,
we already use for flushing. Since taskqueues major task is now
overloading, and flushing is optional, do mechanical rename
s/flush/overload/ in the code related to the taskqueue.

Since overloading tasks do unsafe referencing of rules, provide
a bandaid in pf_purge_unlinked_rules(). If the latter sees any
queued tasks, then it skips purging for this run.

In table code:
- Assert any lock in pfr_lookup_addr().
- Assert writer lock in pfr_route_kentry().
This commit is contained in:
Gleb Smirnoff 2012-09-22 10:14:47 +00:00
parent e706fd3a3a
commit 29bdd62c85
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=240811
2 changed files with 109 additions and 81 deletions

View File

@ -163,25 +163,25 @@ static struct mtx pf_sendqueue_mtx;
#define PF_SENDQ_UNLOCK() mtx_unlock(&pf_sendqueue_mtx)
/*
* Queue for pf_flush_task() tasks.
* Queue for pf_overload_task() tasks.
*/
struct pf_flush_entry {
SLIST_ENTRY(pf_flush_entry) next;
struct pf_overload_entry {
SLIST_ENTRY(pf_overload_entry) next;
struct pf_addr addr;
sa_family_t af;
uint8_t dir;
struct pf_rule *rule; /* never dereferenced */
struct pf_rule *rule;
};
SLIST_HEAD(pf_flush_head, pf_flush_entry);
static VNET_DEFINE(struct pf_flush_head, pf_flushqueue);
#define V_pf_flushqueue VNET(pf_flushqueue)
static VNET_DEFINE(struct task, pf_flushtask);
#define V_pf_flushtask VNET(pf_flushtask)
SLIST_HEAD(pf_overload_head, pf_overload_entry);
static VNET_DEFINE(struct pf_overload_head, pf_overloadqueue);
#define V_pf_overloadqueue VNET(pf_overloadqueue)
static VNET_DEFINE(struct task, pf_overloadtask);
#define V_pf_overloadtask VNET(pf_overloadtask)
static struct mtx pf_flushqueue_mtx;
#define PF_FLUSHQ_LOCK() mtx_lock(&pf_flushqueue_mtx)
#define PF_FLUSHQ_UNLOCK() mtx_unlock(&pf_flushqueue_mtx)
static struct mtx pf_overloadqueue_mtx;
#define PF_OVERLOADQ_LOCK() mtx_lock(&pf_overloadqueue_mtx)
#define PF_OVERLOADQ_UNLOCK() mtx_unlock(&pf_overloadqueue_mtx)
VNET_DEFINE(struct pf_rulequeue, pf_unlinked_rules);
struct mtx pf_unlnkdrules_mtx;
@ -279,7 +279,7 @@ static int pf_addr_wrap_neq(struct pf_addr_wrap *,
static struct pf_state *pf_find_state(struct pfi_kif *,
struct pf_state_key_cmp *, u_int);
static int pf_src_connlimit(struct pf_state **);
static void pf_flush_task(void *c, int pending);
static void pf_overload_task(void *c, int pending);
static int pf_insert_src_node(struct pf_src_node **,
struct pf_rule *, struct pf_addr *, sa_family_t);
static int pf_purge_expired_states(int);
@ -461,8 +461,7 @@ pf_check_threshold(struct pf_threshold *threshold)
static int
pf_src_connlimit(struct pf_state **state)
{
struct pfr_addr p;
struct pf_flush_entry *pffe;
struct pf_overload_entry *pfoe;
int bad = 0;
PF_STATE_LOCK_ASSERT(*state);
@ -494,69 +493,79 @@ pf_src_connlimit(struct pf_state **state)
if ((*state)->rule.ptr->overload_tbl == NULL)
return (1);
V_pf_status.lcounters[LCNT_OVERLOAD_TABLE]++;
if (V_pf_status.debug >= PF_DEBUG_MISC) {
printf("%s: blocking address ", __func__);
pf_print_host(&(*state)->src_node->addr, 0,
(*state)->key[PF_SK_WIRE]->af);
printf("\n");
}
bzero(&p, sizeof(p));
p.pfra_af = (*state)->key[PF_SK_WIRE]->af;
switch ((*state)->key[PF_SK_WIRE]->af) {
#ifdef INET
case AF_INET:
p.pfra_net = 32;
p.pfra_ip4addr = (*state)->src_node->addr.v4;
break;
#endif /* INET */
#ifdef INET6
case AF_INET6:
p.pfra_net = 128;
p.pfra_ip6addr = (*state)->src_node->addr.v6;
break;
#endif /* INET6 */
}
pfr_insert_kentry((*state)->rule.ptr->overload_tbl, &p, time_second);
if ((*state)->rule.ptr->flush == 0)
return (1);
/* Schedule flushing task. */
pffe = malloc(sizeof(*pffe), M_PFTEMP, M_NOWAIT);
if (pffe == NULL)
/* Schedule overloading and flushing task. */
pfoe = malloc(sizeof(*pfoe), M_PFTEMP, M_NOWAIT);
if (pfoe == NULL)
return (1); /* too bad :( */
bcopy(&(*state)->src_node->addr, &pffe->addr, sizeof(pffe->addr));
pffe->af = (*state)->key[PF_SK_WIRE]->af;
pffe->dir = (*state)->direction;
if ((*state)->rule.ptr->flush & PF_FLUSH_GLOBAL)
pffe->rule = NULL;
else
pffe->rule = (*state)->rule.ptr;
PF_FLUSHQ_LOCK();
SLIST_INSERT_HEAD(&V_pf_flushqueue, pffe, next);
PF_FLUSHQ_UNLOCK();
taskqueue_enqueue(taskqueue_swi, &V_pf_flushtask);
bcopy(&(*state)->src_node->addr, &pfoe->addr, sizeof(pfoe->addr));
pfoe->af = (*state)->key[PF_SK_WIRE]->af;
pfoe->rule = (*state)->rule.ptr;
pfoe->dir = (*state)->direction;
PF_OVERLOADQ_LOCK();
SLIST_INSERT_HEAD(&V_pf_overloadqueue, pfoe, next);
PF_OVERLOADQ_UNLOCK();
taskqueue_enqueue(taskqueue_swi, &V_pf_overloadtask);
return (1);
}
static void
pf_flush_task(void *c, int pending)
pf_overload_task(void *c, int pending)
{
struct pf_flush_head queue;
struct pf_flush_entry *pffe, *pffe1;
struct pf_overload_head queue;
struct pfr_addr p;
struct pf_overload_entry *pfoe, *pfoe1;
uint32_t killed = 0;
PF_FLUSHQ_LOCK();
queue = *(struct pf_flush_head *)c;
SLIST_INIT((struct pf_flush_head *)c);
PF_FLUSHQ_UNLOCK();
PF_OVERLOADQ_LOCK();
queue = *(struct pf_overload_head *)c;
SLIST_INIT((struct pf_overload_head *)c);
PF_OVERLOADQ_UNLOCK();
V_pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++;
bzero(&p, sizeof(p));
SLIST_FOREACH(pfoe, &queue, next) {
V_pf_status.lcounters[LCNT_OVERLOAD_TABLE]++;
if (V_pf_status.debug >= PF_DEBUG_MISC) {
printf("%s: blocking address ", __func__);
pf_print_host(&pfoe->addr, 0, pfoe->af);
printf("\n");
}
p.pfra_af = pfoe->af;
switch (pfoe->af) {
#ifdef INET
case AF_INET:
p.pfra_net = 32;
p.pfra_ip4addr = pfoe->addr.v4;
break;
#endif
#ifdef INET6
case AF_INET6:
p.pfra_net = 128;
p.pfra_ip6addr = pfoe->addr.v6;
break;
#endif
}
PF_RULES_WLOCK();
pfr_insert_kentry(pfoe->rule->overload_tbl, &p, time_second);
PF_RULES_WUNLOCK();
}
/*
* Remove those entries, that don't need flushing.
*/
SLIST_FOREACH_SAFE(pfoe, &queue, next, pfoe1)
if (pfoe->rule->flush == 0) {
SLIST_REMOVE(&queue, pfoe, pf_overload_entry, next);
free(pfoe, M_PFTEMP);
} else
V_pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++;
/* If nothing to flush, return. */
if (SLIST_EMPTY(&queue))
return;
for (int i = 0; i <= V_pf_hashmask; i++) {
struct pf_idhash *ih = &V_pf_idhash[i];
@ -566,13 +575,14 @@ pf_flush_task(void *c, int pending)
PF_HASHROW_LOCK(ih);
LIST_FOREACH(s, &ih->states, entry) {
sk = s->key[PF_SK_WIRE];
SLIST_FOREACH(pffe, &queue, next)
if (sk->af == pffe->af && (pffe->rule == NULL ||
pffe->rule == s->rule.ptr) &&
((pffe->dir == PF_OUT &&
PF_AEQ(&pffe->addr, &sk->addr[1], sk->af)) ||
(pffe->dir == PF_IN &&
PF_AEQ(&pffe->addr, &sk->addr[0], sk->af)))) {
SLIST_FOREACH(pfoe, &queue, next)
if (sk->af == pfoe->af &&
((pfoe->rule->flush & PF_FLUSH_GLOBAL) ||
pfoe->rule == s->rule.ptr) &&
((pfoe->dir == PF_OUT &&
PF_AEQ(&pfoe->addr, &sk->addr[1], sk->af)) ||
(pfoe->dir == PF_IN &&
PF_AEQ(&pfoe->addr, &sk->addr[0], sk->af)))) {
s->timeout = PFTM_PURGE;
s->src.state = s->dst.state = TCPS_CLOSED;
killed++;
@ -580,8 +590,8 @@ pf_flush_task(void *c, int pending)
}
PF_HASHROW_UNLOCK(ih);
}
SLIST_FOREACH_SAFE(pffe, &queue, next, pffe1)
free(pffe, M_PFTEMP);
SLIST_FOREACH_SAFE(pfoe, &queue, next, pfoe1)
free(pfoe, M_PFTEMP);
if (V_pf_status.debug >= PF_DEBUG_MISC)
printf("%s: %u states killed", __func__, killed);
}
@ -742,12 +752,13 @@ pf_initialize()
sizeof(struct pf_mtag), NULL, NULL, pf_mtag_init, NULL,
UMA_ALIGN_PTR, 0);
/* Send & flush queues. */
/* Send & overload+flush queues. */
STAILQ_INIT(&V_pf_sendqueue);
SLIST_INIT(&V_pf_flushqueue);
TASK_INIT(&V_pf_flushtask, 0, pf_flush_task, &V_pf_flushqueue);
SLIST_INIT(&V_pf_overloadqueue);
TASK_INIT(&V_pf_overloadtask, 0, pf_overload_task, &V_pf_overloadqueue);
mtx_init(&pf_sendqueue_mtx, "pf send queue", NULL, MTX_DEF);
mtx_init(&pf_flushqueue_mtx, "pf flush queue", NULL, MTX_DEF);
mtx_init(&pf_overloadqueue_mtx, "pf overload/flush queue", NULL,
MTX_DEF);
/* Unlinked, but may be referenced rules. */
TAILQ_INIT(&V_pf_unlinked_rules);
@ -788,7 +799,7 @@ pf_cleanup()
}
mtx_destroy(&pf_sendqueue_mtx);
mtx_destroy(&pf_flushqueue_mtx);
mtx_destroy(&pf_overloadqueue_mtx);
mtx_destroy(&pf_unlnkdrules_mtx);
uma_zdestroy(V_pf_mtag_z);
@ -1578,6 +1589,19 @@ pf_purge_unlinked_rules()
struct pf_rulequeue tmpq;
struct pf_rule *r, *r1;
/*
* If we have overloading task pending, then we'd
* better skip purging this time. There is a tiny
* probability that overloading task references
* an already unlinked rule.
*/
PF_OVERLOADQ_LOCK();
if (!SLIST_EMPTY(&V_pf_overloadqueue)) {
PF_OVERLOADQ_UNLOCK();
return;
}
PF_OVERLOADQ_UNLOCK();
/*
* Do naive mark-and-sweep garbage collecting of old rules.
* Reference flag is raised by pf_purge_expired_states()

View File

@ -742,6 +742,8 @@ pfr_lookup_addr(struct pfr_ktable *kt, struct pfr_addr *ad, int exact)
struct radix_node_head *head = NULL;
struct pfr_kentry *ke;
PF_RULES_ASSERT();
bzero(&sa, sizeof(sa));
if (ad->pfra_af == AF_INET) {
FILLIN_SIN(sa.sin, ad->pfra_ip4addr);
@ -929,6 +931,8 @@ pfr_route_kentry(struct pfr_ktable *kt, struct pfr_kentry *ke)
struct radix_node *rn;
struct radix_node_head *head = NULL;
PF_RULES_WASSERT();
bzero(ke->pfrke_node, sizeof(ke->pfrke_node));
if (ke->pfrke_af == AF_INET)
head = kt->pfrkt_ip4;