When connection rate hits and we overload a source to a table,

we are actually editing table, which means editing rules, thus we need writer access to 'em. Fix this by offloading the update of table to the same taskqueue, we already use for flushing. Since taskqueues major task is now overloading, and flushing is optional, do mechanical rename s/flush/overload/ in the code related to the taskqueue. Since overloading tasks do unsafe referencing of rules, provide a bandaid in pf_purge_unlinked_rules(). If the latter sees any queued tasks, then it skips purging for this run. In table code: - Assert any lock in pfr_lookup_addr(). - Assert writer lock in pfr_route_kentry().
svn path=/head/; revision=240811
2012-09-22 10:14:47 +00:00 · 2012-09-22 10:14:47 +00:00 · 29bdd62c85 · 2020-12-20 02:59:44 +00:00
commit 29bdd62c85
parent e706fd3a3a
2 changed files with 109 additions and 81 deletions
--- a/sys/netpfil/pf/pf.c
+++ b/sys/netpfil/pf/pf.c
@ -163,25 +163,25 @@ static struct mtx pf_sendqueue_mtx;
 #define	PF_SENDQ_UNLOCK()	mtx_unlock(&pf_sendqueue_mtx)

 /*
- * Queue for pf_flush_task() tasks.
+ * Queue for pf_overload_task() tasks.
 */
-struct pf_flush_entry {
-	SLIST_ENTRY(pf_flush_entry)	next;
+struct pf_overload_entry {
+	SLIST_ENTRY(pf_overload_entry)	next;
 	struct pf_addr  		addr;
 	sa_family_t			af;
 	uint8_t				dir;
-	struct pf_rule  		*rule;  /* never dereferenced */
+	struct pf_rule  		*rule;
 };

-SLIST_HEAD(pf_flush_head, pf_flush_entry);
-static VNET_DEFINE(struct pf_flush_head, pf_flushqueue);
-#define V_pf_flushqueue	VNET(pf_flushqueue)
-static VNET_DEFINE(struct task, pf_flushtask);
-#define	V_pf_flushtask	VNET(pf_flushtask)
+SLIST_HEAD(pf_overload_head, pf_overload_entry);
+static VNET_DEFINE(struct pf_overload_head, pf_overloadqueue);
+#define V_pf_overloadqueue	VNET(pf_overloadqueue)
+static VNET_DEFINE(struct task, pf_overloadtask);
+#define	V_pf_overloadtask	VNET(pf_overloadtask)

-static struct mtx pf_flushqueue_mtx;
-#define	PF_FLUSHQ_LOCK()	mtx_lock(&pf_flushqueue_mtx)
-#define	PF_FLUSHQ_UNLOCK()	mtx_unlock(&pf_flushqueue_mtx)
+static struct mtx pf_overloadqueue_mtx;
+#define	PF_OVERLOADQ_LOCK()	mtx_lock(&pf_overloadqueue_mtx)
+#define	PF_OVERLOADQ_UNLOCK()	mtx_unlock(&pf_overloadqueue_mtx)

 VNET_DEFINE(struct pf_rulequeue, pf_unlinked_rules);
 struct mtx pf_unlnkdrules_mtx;
@ -279,7 +279,7 @@ static int		 pf_addr_wrap_neq(struct pf_addr_wrap *,
 static struct pf_state	*pf_find_state(struct pfi_kif *,
 			    struct pf_state_key_cmp *, u_int);
 static int		 pf_src_connlimit(struct pf_state **);
-static void		 pf_flush_task(void *c, int pending);
+static void		 pf_overload_task(void *c, int pending);
 static int		 pf_insert_src_node(struct pf_src_node **,
 			    struct pf_rule *, struct pf_addr *, sa_family_t);
 static int		 pf_purge_expired_states(int);
@ -461,8 +461,7 @@ pf_check_threshold(struct pf_threshold *threshold)
 static int
 pf_src_connlimit(struct pf_state **state)
 {
-	struct pfr_addr p;
-	struct pf_flush_entry *pffe;
+	struct pf_overload_entry *pfoe;
 	int bad = 0;

 	PF_STATE_LOCK_ASSERT(*state);
@ -494,69 +493,79 @@ pf_src_connlimit(struct pf_state **state)
 	if ((*state)->rule.ptr->overload_tbl == NULL)
 		return (1);

-	V_pf_status.lcounters[LCNT_OVERLOAD_TABLE]++;
-	if (V_pf_status.debug >= PF_DEBUG_MISC) {
-		printf("%s: blocking address ", __func__);
-		pf_print_host(&(*state)->src_node->addr, 0,
-		    (*state)->key[PF_SK_WIRE]->af);
-		printf("\n");
-	}
-
-	bzero(&p, sizeof(p));
-	p.pfra_af = (*state)->key[PF_SK_WIRE]->af;
-	switch ((*state)->key[PF_SK_WIRE]->af) {
-#ifdef INET
-	case AF_INET:
-		p.pfra_net = 32;
-		p.pfra_ip4addr = (*state)->src_node->addr.v4;
-		break;
-#endif /* INET */
-#ifdef INET6
-	case AF_INET6:
-		p.pfra_net = 128;
-		p.pfra_ip6addr = (*state)->src_node->addr.v6;
-		break;
-#endif /* INET6 */
-	}
-
-	pfr_insert_kentry((*state)->rule.ptr->overload_tbl, &p, time_second);
-
-	if ((*state)->rule.ptr->flush == 0)
-		return (1);
-
-	/* Schedule flushing task. */
-	pffe = malloc(sizeof(*pffe), M_PFTEMP, M_NOWAIT);
-	if (pffe == NULL)
+	/* Schedule overloading and flushing task. */
+	pfoe = malloc(sizeof(*pfoe), M_PFTEMP, M_NOWAIT);
+	if (pfoe == NULL)
 		return (1);	/* too bad :( */

-	bcopy(&(*state)->src_node->addr, &pffe->addr, sizeof(pffe->addr));
-	pffe->af = (*state)->key[PF_SK_WIRE]->af;
-	pffe->dir = (*state)->direction;
-	if ((*state)->rule.ptr->flush & PF_FLUSH_GLOBAL)
-		pffe->rule = NULL;
-	else
-		pffe->rule = (*state)->rule.ptr;
-	PF_FLUSHQ_LOCK();
-	SLIST_INSERT_HEAD(&V_pf_flushqueue, pffe, next);
-	PF_FLUSHQ_UNLOCK();
-	taskqueue_enqueue(taskqueue_swi, &V_pf_flushtask);
+	bcopy(&(*state)->src_node->addr, &pfoe->addr, sizeof(pfoe->addr));
+	pfoe->af = (*state)->key[PF_SK_WIRE]->af;
+	pfoe->rule = (*state)->rule.ptr;
+	pfoe->dir = (*state)->direction;
+	PF_OVERLOADQ_LOCK();
+	SLIST_INSERT_HEAD(&V_pf_overloadqueue, pfoe, next);
+	PF_OVERLOADQ_UNLOCK();
+	taskqueue_enqueue(taskqueue_swi, &V_pf_overloadtask);

 	return (1);
 }

 static void
-pf_flush_task(void *c, int pending)
+pf_overload_task(void *c, int pending)
 {
-	struct pf_flush_head queue;
-	struct pf_flush_entry *pffe, *pffe1;
+	struct pf_overload_head queue;
+	struct pfr_addr p;
+	struct pf_overload_entry *pfoe, *pfoe1;
 	uint32_t killed = 0;

-	PF_FLUSHQ_LOCK();
-	queue = *(struct pf_flush_head *)c;
-	SLIST_INIT((struct pf_flush_head *)c);
-	PF_FLUSHQ_UNLOCK();
+	PF_OVERLOADQ_LOCK();
+	queue = *(struct pf_overload_head *)c;
+	SLIST_INIT((struct pf_overload_head *)c);
+	PF_OVERLOADQ_UNLOCK();

-	V_pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++;
+	bzero(&p, sizeof(p));
+	SLIST_FOREACH(pfoe, &queue, next) {
+		V_pf_status.lcounters[LCNT_OVERLOAD_TABLE]++;
+		if (V_pf_status.debug >= PF_DEBUG_MISC) {
+			printf("%s: blocking address ", __func__);
+			pf_print_host(&pfoe->addr, 0, pfoe->af);
+			printf("\n");
+		}
+
+		p.pfra_af = pfoe->af;
+		switch (pfoe->af) {
+#ifdef INET
+		case AF_INET:
+			p.pfra_net = 32;
+			p.pfra_ip4addr = pfoe->addr.v4;
+			break;
+#endif
+#ifdef INET6
+		case AF_INET6:
+			p.pfra_net = 128;
+			p.pfra_ip6addr = pfoe->addr.v6;
+			break;
+#endif
+		}
+
+		PF_RULES_WLOCK();
+		pfr_insert_kentry(pfoe->rule->overload_tbl, &p, time_second);
+		PF_RULES_WUNLOCK();
+	}
+
+	/*
+	 * Remove those entries, that don't need flushing.
+	 */
+	SLIST_FOREACH_SAFE(pfoe, &queue, next, pfoe1)
+		if (pfoe->rule->flush == 0) {
+			SLIST_REMOVE(&queue, pfoe, pf_overload_entry, next);
+			free(pfoe, M_PFTEMP);
+		} else
+			V_pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++;
+
+	/* If nothing to flush, return. */
+	if (SLIST_EMPTY(&queue))
+		return;

 	for (int i = 0; i <= V_pf_hashmask; i++) {
 		struct pf_idhash *ih = &V_pf_idhash[i];
@ -566,13 +575,14 @@ pf_flush_task(void *c, int pending)
 		PF_HASHROW_LOCK(ih);
 		LIST_FOREACH(s, &ih->states, entry) {
 		    sk = s->key[PF_SK_WIRE];
-		    SLIST_FOREACH(pffe, &queue, next)
-			if (sk->af == pffe->af && (pffe->rule == NULL ||
-			    pffe->rule == s->rule.ptr) &&
-			    ((pffe->dir == PF_OUT &&
-			    PF_AEQ(&pffe->addr, &sk->addr[1], sk->af)) ||
-			    (pffe->dir == PF_IN &&
-			    PF_AEQ(&pffe->addr, &sk->addr[0], sk->af)))) {
+		    SLIST_FOREACH(pfoe, &queue, next)
+			if (sk->af == pfoe->af &&
+			    ((pfoe->rule->flush & PF_FLUSH_GLOBAL) ||
+			    pfoe->rule == s->rule.ptr) &&
+			    ((pfoe->dir == PF_OUT &&
+			    PF_AEQ(&pfoe->addr, &sk->addr[1], sk->af)) ||
+			    (pfoe->dir == PF_IN &&
+			    PF_AEQ(&pfoe->addr, &sk->addr[0], sk->af)))) {
 				s->timeout = PFTM_PURGE;
 				s->src.state = s->dst.state = TCPS_CLOSED;
 				killed++;
@ -580,8 +590,8 @@ pf_flush_task(void *c, int pending)
 		}
 		PF_HASHROW_UNLOCK(ih);
 	}
-	SLIST_FOREACH_SAFE(pffe, &queue, next, pffe1)
-		free(pffe, M_PFTEMP);
+	SLIST_FOREACH_SAFE(pfoe, &queue, next, pfoe1)
+		free(pfoe, M_PFTEMP);
 	if (V_pf_status.debug >= PF_DEBUG_MISC)
 		printf("%s: %u states killed", __func__, killed);
 }
@ -742,12 +752,13 @@ pf_initialize()
 	    sizeof(struct pf_mtag), NULL, NULL, pf_mtag_init, NULL,
 	    UMA_ALIGN_PTR, 0);

-	/* Send & flush queues. */
+	/* Send & overload+flush queues. */
 	STAILQ_INIT(&V_pf_sendqueue);
-	SLIST_INIT(&V_pf_flushqueue);
-	TASK_INIT(&V_pf_flushtask, 0, pf_flush_task, &V_pf_flushqueue);
+	SLIST_INIT(&V_pf_overloadqueue);
+	TASK_INIT(&V_pf_overloadtask, 0, pf_overload_task, &V_pf_overloadqueue);
 	mtx_init(&pf_sendqueue_mtx, "pf send queue", NULL, MTX_DEF);
-	mtx_init(&pf_flushqueue_mtx, "pf flush queue", NULL, MTX_DEF);
+	mtx_init(&pf_overloadqueue_mtx, "pf overload/flush queue", NULL,
+	    MTX_DEF);

 	/* Unlinked, but may be referenced rules. */
 	TAILQ_INIT(&V_pf_unlinked_rules);
@ -788,7 +799,7 @@ pf_cleanup()
 	}

 	mtx_destroy(&pf_sendqueue_mtx);
-	mtx_destroy(&pf_flushqueue_mtx);
+	mtx_destroy(&pf_overloadqueue_mtx);
 	mtx_destroy(&pf_unlnkdrules_mtx);

 	uma_zdestroy(V_pf_mtag_z);
@ -1578,6 +1589,19 @@ pf_purge_unlinked_rules()
 	struct pf_rulequeue tmpq;
 	struct pf_rule *r, *r1;

+	/*
+	 * If we have overloading task pending, then we'd
+	 * better skip purging this time. There is a tiny
+	 * probability that overloading task references
+	 * an already unlinked rule.
+	 */
+	PF_OVERLOADQ_LOCK();
+	if (!SLIST_EMPTY(&V_pf_overloadqueue)) {
+		PF_OVERLOADQ_UNLOCK();
+		return;
+	}
+	PF_OVERLOADQ_UNLOCK();
+
 	/*
 	 * Do naive mark-and-sweep garbage collecting of old rules.
 	 * Reference flag is raised by pf_purge_expired_states()
--- a/sys/netpfil/pf/pf_table.c
+++ b/sys/netpfil/pf/pf_table.c
@ -742,6 +742,8 @@ pfr_lookup_addr(struct pfr_ktable *kt, struct pfr_addr *ad, int exact)
 	struct radix_node_head	*head = NULL;
 	struct pfr_kentry	*ke;

+	PF_RULES_ASSERT();
+
 	bzero(&sa, sizeof(sa));
 	if (ad->pfra_af == AF_INET) {
 		FILLIN_SIN(sa.sin, ad->pfra_ip4addr);
@ -929,6 +931,8 @@ pfr_route_kentry(struct pfr_ktable *kt, struct pfr_kentry *ke)
 	struct radix_node	*rn;
 	struct radix_node_head	*head = NULL;

+	PF_RULES_WASSERT();
+
 	bzero(ke->pfrke_node, sizeof(ke->pfrke_node));
 	if (ke->pfrke_af == AF_INET)
 		head = kt->pfrkt_ip4;