Once pf became not covered by a single mutex, many counters in it became
race prone. Some just gather statistics, but some are later used in different calculations. A real problem was the race provoked underflow of the states_cur counter on a rule. Once it goes below zero, it wraps to UINT32_MAX. Later this value is used in pf_state_expires() and any state created by this rule is immediately expired. Thus, make fields states_cur, states_tot and src_nodes of struct pf_rule be counter(9)s. Thanks to Dennis for providing me shell access to problematic box and his help with reproducing, debugging and investigating the problem. Thanks to: Dennis Yusupoff <dyr smartspb.net> Also reported by: dumbbell, pgj, Rambler Sponsored by: Nginx, Inc.
This commit is contained in:
parent
e7560978e3
commit
1ea1d562a3
@ -791,17 +791,17 @@ pfctl_print_rule_counters(struct pf_rule *rule, int opts)
|
||||
}
|
||||
if (opts & PF_OPT_VERBOSE) {
|
||||
printf(" [ Evaluations: %-8llu Packets: %-8llu "
|
||||
"Bytes: %-10llu States: %-6u]\n",
|
||||
"Bytes: %-10llu States: %-6lu]\n",
|
||||
(unsigned long long)rule->evaluations,
|
||||
(unsigned long long)(rule->packets[0] +
|
||||
rule->packets[1]),
|
||||
(unsigned long long)(rule->bytes[0] +
|
||||
rule->bytes[1]), rule->states_cur);
|
||||
rule->bytes[1]), (uint64_t)rule->states_cur);
|
||||
if (!(opts & PF_OPT_DEBUG))
|
||||
printf(" [ Inserted: uid %u pid %u "
|
||||
"State Creations: %-6u]\n",
|
||||
"State Creations: %-6lu]\n",
|
||||
(unsigned)rule->cuid, (unsigned)rule->cpid,
|
||||
rule->states_tot);
|
||||
(uint64_t)rule->states_tot);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -35,6 +35,7 @@
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/queue.h>
|
||||
#include <sys/counter.h>
|
||||
#include <sys/refcount.h>
|
||||
#include <sys/tree.h>
|
||||
|
||||
@ -512,13 +513,9 @@ struct pf_rule {
|
||||
|
||||
int rtableid;
|
||||
u_int32_t timeout[PFTM_MAX];
|
||||
u_int32_t states_cur;
|
||||
u_int32_t states_tot;
|
||||
u_int32_t max_states;
|
||||
u_int32_t src_nodes;
|
||||
u_int32_t max_src_nodes;
|
||||
u_int32_t max_src_states;
|
||||
u_int32_t spare1; /* netgraph */
|
||||
u_int32_t max_src_conn;
|
||||
struct {
|
||||
u_int32_t limit;
|
||||
@ -532,6 +529,10 @@ struct pf_rule {
|
||||
uid_t cuid;
|
||||
pid_t cpid;
|
||||
|
||||
counter_u64_t states_cur;
|
||||
counter_u64_t states_tot;
|
||||
counter_u64_t src_nodes;
|
||||
|
||||
u_int16_t return_icmp;
|
||||
u_int16_t return_icmp6;
|
||||
u_int16_t max_mss;
|
||||
|
@ -438,7 +438,8 @@ pfsync_state_import(struct pfsync_state *sp, u_int8_t flags)
|
||||
else
|
||||
r = &V_pf_default_rule;
|
||||
|
||||
if ((r->max_states && r->states_cur >= r->max_states))
|
||||
if ((r->max_states &&
|
||||
counter_u64_fetch(r->states_cur) >= r->max_states))
|
||||
goto cleanup;
|
||||
|
||||
/*
|
||||
@ -516,18 +517,15 @@ pfsync_state_import(struct pfsync_state *sp, u_int8_t flags)
|
||||
st->pfsync_time = time_uptime;
|
||||
st->sync_state = PFSYNC_S_NONE;
|
||||
|
||||
/* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */
|
||||
r->states_cur++;
|
||||
r->states_tot++;
|
||||
|
||||
if (!(flags & PFSYNC_SI_IOCTL))
|
||||
st->state_flags |= PFSTATE_NOSYNC;
|
||||
|
||||
if ((error = pf_state_insert(kif, skw, sks, st)) != 0) {
|
||||
/* XXX when we have nat_rule/anchors, use STATE_DEC_COUNTERS */
|
||||
r->states_cur--;
|
||||
if ((error = pf_state_insert(kif, skw, sks, st)) != 0)
|
||||
goto cleanup_state;
|
||||
}
|
||||
|
||||
/* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */
|
||||
counter_u64_add(r->states_cur, 1);
|
||||
counter_u64_add(r->states_tot, 1);
|
||||
|
||||
if (!(flags & PFSYNC_SI_IOCTL)) {
|
||||
st->state_flags &= ~PFSTATE_NOSYNC;
|
||||
|
@ -335,27 +335,27 @@ enum { PF_ICMP_MULTI_NONE, PF_ICMP_MULTI_SOLICITED, PF_ICMP_MULTI_LINK };
|
||||
#define BOUND_IFACE(r, k) \
|
||||
((r)->rule_flag & PFRULE_IFBOUND) ? (k) : V_pfi_all
|
||||
|
||||
#define STATE_INC_COUNTERS(s) \
|
||||
do { \
|
||||
s->rule.ptr->states_cur++; \
|
||||
s->rule.ptr->states_tot++; \
|
||||
if (s->anchor.ptr != NULL) { \
|
||||
s->anchor.ptr->states_cur++; \
|
||||
s->anchor.ptr->states_tot++; \
|
||||
} \
|
||||
if (s->nat_rule.ptr != NULL) { \
|
||||
s->nat_rule.ptr->states_cur++; \
|
||||
s->nat_rule.ptr->states_tot++; \
|
||||
} \
|
||||
#define STATE_INC_COUNTERS(s) \
|
||||
do { \
|
||||
counter_u64_add(s->rule.ptr->states_cur, 1); \
|
||||
counter_u64_add(s->rule.ptr->states_tot, 1); \
|
||||
if (s->anchor.ptr != NULL) { \
|
||||
counter_u64_add(s->anchor.ptr->states_cur, 1); \
|
||||
counter_u64_add(s->anchor.ptr->states_tot, 1); \
|
||||
} \
|
||||
if (s->nat_rule.ptr != NULL) { \
|
||||
counter_u64_add(s->nat_rule.ptr->states_cur, 1);\
|
||||
counter_u64_add(s->nat_rule.ptr->states_tot, 1);\
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define STATE_DEC_COUNTERS(s) \
|
||||
do { \
|
||||
if (s->nat_rule.ptr != NULL) \
|
||||
s->nat_rule.ptr->states_cur--; \
|
||||
if (s->anchor.ptr != NULL) \
|
||||
s->anchor.ptr->states_cur--; \
|
||||
s->rule.ptr->states_cur--; \
|
||||
#define STATE_DEC_COUNTERS(s) \
|
||||
do { \
|
||||
if (s->nat_rule.ptr != NULL) \
|
||||
counter_u64_add(s->nat_rule.ptr->states_cur, -1);\
|
||||
if (s->anchor.ptr != NULL) \
|
||||
counter_u64_add(s->anchor.ptr->states_cur, -1); \
|
||||
counter_u64_add(s->rule.ptr->states_cur, -1); \
|
||||
} while (0)
|
||||
|
||||
static MALLOC_DEFINE(M_PFHASH, "pf_hash", "pf(4) hash header structures");
|
||||
@ -647,7 +647,7 @@ pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule,
|
||||
PF_HASHROW_ASSERT(sh);
|
||||
|
||||
if (!rule->max_src_nodes ||
|
||||
rule->src_nodes < rule->max_src_nodes)
|
||||
counter_u64_fetch(rule->src_nodes) < rule->max_src_nodes)
|
||||
(*sn) = uma_zalloc(V_pf_sources_z, M_NOWAIT | M_ZERO);
|
||||
else
|
||||
V_pf_status.lcounters[LCNT_SRCNODES]++;
|
||||
@ -667,7 +667,7 @@ pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule,
|
||||
(*sn)->creation = time_uptime;
|
||||
(*sn)->ruletype = rule->action;
|
||||
if ((*sn)->rule.ptr != NULL)
|
||||
(*sn)->rule.ptr->src_nodes++;
|
||||
counter_u64_add((*sn)->rule.ptr->src_nodes, 1);
|
||||
PF_HASHROW_UNLOCK(sh);
|
||||
V_pf_status.scounters[SCNT_SRC_NODE_INSERT]++;
|
||||
V_pf_status.src_nodes++;
|
||||
@ -692,7 +692,7 @@ pf_unlink_src_node_locked(struct pf_src_node *src)
|
||||
#endif
|
||||
LIST_REMOVE(src, entry);
|
||||
if (src->rule.ptr)
|
||||
src->rule.ptr->src_nodes--;
|
||||
counter_u64_add(src->rule.ptr->src_nodes, -1);
|
||||
V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
|
||||
V_pf_status.src_nodes--;
|
||||
}
|
||||
@ -1478,7 +1478,7 @@ pf_state_expires(const struct pf_state *state)
|
||||
start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START];
|
||||
if (start) {
|
||||
end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END];
|
||||
states = state->rule.ptr->states_cur; /* XXXGL */
|
||||
states = counter_u64_fetch(state->rule.ptr->states_cur);
|
||||
} else {
|
||||
start = V_pf_default_rule.timeout[PFTM_ADAPTIVE_START];
|
||||
end = V_pf_default_rule.timeout[PFTM_ADAPTIVE_END];
|
||||
@ -1587,11 +1587,7 @@ pf_unlink_state(struct pf_state *s, u_int flags)
|
||||
if (pfsync_delete_state_ptr != NULL)
|
||||
pfsync_delete_state_ptr(s);
|
||||
|
||||
--s->rule.ptr->states_cur;
|
||||
if (s->nat_rule.ptr != NULL)
|
||||
--s->nat_rule.ptr->states_cur;
|
||||
if (s->anchor.ptr != NULL)
|
||||
--s->anchor.ptr->states_cur;
|
||||
STATE_DEC_COUNTERS(s);
|
||||
|
||||
s->timeout = PFTM_UNLINKED;
|
||||
|
||||
@ -3606,7 +3602,8 @@ pf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a,
|
||||
u_short reason;
|
||||
|
||||
/* check maximums */
|
||||
if (r->max_states && (r->states_cur >= r->max_states)) {
|
||||
if (r->max_states &&
|
||||
(counter_u64_fetch(r->states_cur) >= r->max_states)) {
|
||||
V_pf_status.lcounters[LCNT_STATES]++;
|
||||
REASON_SET(&reason, PFRES_MAXSTATES);
|
||||
return (PF_DROP);
|
||||
|
@ -229,6 +229,10 @@ pfattach(void)
|
||||
V_pf_default_rule.nr = -1;
|
||||
V_pf_default_rule.rtableid = -1;
|
||||
|
||||
V_pf_default_rule.states_cur = counter_u64_alloc(M_WAITOK);
|
||||
V_pf_default_rule.states_tot = counter_u64_alloc(M_WAITOK);
|
||||
V_pf_default_rule.src_nodes = counter_u64_alloc(M_WAITOK);
|
||||
|
||||
/* initialize default timeouts */
|
||||
my_timeout[PFTM_TCP_FIRST_PACKET] = PFTM_TCP_FIRST_PACKET_VAL;
|
||||
my_timeout[PFTM_TCP_OPENING] = PFTM_TCP_OPENING_VAL;
|
||||
@ -398,6 +402,9 @@ pf_free_rule(struct pf_rule *rule)
|
||||
pfi_kif_unref(rule->kif);
|
||||
pf_anchor_remove(rule);
|
||||
pf_empty_pool(&rule->rpool.list);
|
||||
counter_u64_free(rule->states_cur);
|
||||
counter_u64_free(rule->states_tot);
|
||||
counter_u64_free(rule->src_nodes);
|
||||
free(rule, M_PFRULE);
|
||||
}
|
||||
|
||||
@ -1149,6 +1156,9 @@ pfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td
|
||||
bcopy(&pr->rule, rule, sizeof(struct pf_rule));
|
||||
if (rule->ifname[0])
|
||||
kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK);
|
||||
rule->states_cur = counter_u64_alloc(M_WAITOK);
|
||||
rule->states_tot = counter_u64_alloc(M_WAITOK);
|
||||
rule->src_nodes = counter_u64_alloc(M_WAITOK);
|
||||
rule->cuid = td->td_ucred->cr_ruid;
|
||||
rule->cpid = td->td_proc ? td->td_proc->p_pid : 0;
|
||||
TAILQ_INIT(&rule->rpool.list);
|
||||
@ -1265,6 +1275,9 @@ pfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td
|
||||
#undef ERROUT
|
||||
DIOCADDRULE_error:
|
||||
PF_RULES_WUNLOCK();
|
||||
counter_u64_free(rule->states_cur);
|
||||
counter_u64_free(rule->states_tot);
|
||||
counter_u64_free(rule->src_nodes);
|
||||
free(rule, M_PFRULE);
|
||||
if (kif)
|
||||
free(kif, PFI_MTYPE);
|
||||
@ -1336,6 +1349,16 @@ DIOCADDRULE_error:
|
||||
break;
|
||||
}
|
||||
bcopy(rule, &pr->rule, sizeof(struct pf_rule));
|
||||
/*
|
||||
* XXXGL: this is what happens when internal kernel
|
||||
* structures are used as ioctl API structures.
|
||||
*/
|
||||
pr->rule.states_cur =
|
||||
(counter_u64_t )counter_u64_fetch(rule->states_cur);
|
||||
pr->rule.states_tot =
|
||||
(counter_u64_t )counter_u64_fetch(rule->states_tot);
|
||||
pr->rule.src_nodes =
|
||||
(counter_u64_t )counter_u64_fetch(rule->src_nodes);
|
||||
if (pf_anchor_copyout(ruleset, rule, pr)) {
|
||||
PF_RULES_WUNLOCK();
|
||||
error = EBUSY;
|
||||
@ -1354,7 +1377,7 @@ DIOCADDRULE_error:
|
||||
rule->evaluations = 0;
|
||||
rule->packets[0] = rule->packets[1] = 0;
|
||||
rule->bytes[0] = rule->bytes[1] = 0;
|
||||
rule->states_tot = 0;
|
||||
counter_u64_zero(rule->states_tot);
|
||||
}
|
||||
PF_RULES_WUNLOCK();
|
||||
break;
|
||||
@ -1394,15 +1417,14 @@ DIOCADDRULE_error:
|
||||
#endif /* INET6 */
|
||||
newrule = malloc(sizeof(*newrule), M_PFRULE, M_WAITOK);
|
||||
bcopy(&pcr->rule, newrule, sizeof(struct pf_rule));
|
||||
if (newrule->ifname[0])
|
||||
kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK);
|
||||
newrule->states_cur = counter_u64_alloc(M_WAITOK);
|
||||
newrule->states_tot = counter_u64_alloc(M_WAITOK);
|
||||
newrule->src_nodes = counter_u64_alloc(M_WAITOK);
|
||||
newrule->cuid = td->td_ucred->cr_ruid;
|
||||
newrule->cpid = td->td_proc ? td->td_proc->p_pid : 0;
|
||||
TAILQ_INIT(&newrule->rpool.list);
|
||||
/* Initialize refcounting. */
|
||||
newrule->states_cur = 0;
|
||||
newrule->entries.tqe_prev = NULL;
|
||||
|
||||
if (newrule->ifname[0])
|
||||
kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK);
|
||||
}
|
||||
|
||||
#define ERROUT(x) { error = (x); goto DIOCCHANGERULE_error; }
|
||||
@ -1570,8 +1592,12 @@ DIOCADDRULE_error:
|
||||
#undef ERROUT
|
||||
DIOCCHANGERULE_error:
|
||||
PF_RULES_WUNLOCK();
|
||||
if (newrule != NULL)
|
||||
if (newrule != NULL) {
|
||||
counter_u64_free(newrule->states_cur);
|
||||
counter_u64_free(newrule->states_tot);
|
||||
counter_u64_free(newrule->src_nodes);
|
||||
free(newrule, M_PFRULE);
|
||||
}
|
||||
if (kif != NULL)
|
||||
free(kif, PFI_MTYPE);
|
||||
break;
|
||||
@ -3427,6 +3453,11 @@ shutdown_pf(void)
|
||||
char nn = '\0';
|
||||
|
||||
V_pf_status.running = 0;
|
||||
|
||||
counter_u64_free(V_pf_default_rule.states_cur);
|
||||
counter_u64_free(V_pf_default_rule.states_tot);
|
||||
counter_u64_free(V_pf_default_rule.src_nodes);
|
||||
|
||||
do {
|
||||
if ((error = pf_begin_rules(&t[0], PF_RULESET_SCRUB, &nn))
|
||||
!= 0) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user