pf: depessimize table handling
Creating tables and zeroing their counters induces excessive IPIs (14 per table), which in turns kills single- and multi-threaded performance. Work around the problem by extending per-CPU counters with a general counter populated on "zeroing" requests -- it stores the currently found sum. Then requests to report the current value are the sum of per-CPU counters subtracted by the saved value. Sample timings when loading a config with 100k tables on a 104-way box: stock: pfctl -f tables100000.conf 0.39s user 69.37s system 99% cpu 1:09.76 total pfctl -f tables100000.conf 0.40s user 68.14s system 99% cpu 1:08.54 total patched: pfctl -f tables100000.conf 0.35s user 6.41s system 99% cpu 6.771 total pfctl -f tables100000.conf 0.48s user 6.47s system 99% cpu 6.949 total Reviewed by: kp (previous version) Sponsored by: Rubicon Communications, LLC ("Netgate")
This commit is contained in:
parent
c5d6dd80b5
commit
f92c21a28c
@ -817,16 +817,70 @@ struct pfr_tstats {
|
||||
int pfrts_refcnt[PFR_REFCNT_MAX];
|
||||
};
|
||||
|
||||
#ifdef _KERNEL
|
||||
|
||||
struct pfr_kstate_counter {
|
||||
counter_u64_t pkc_pcpu;
|
||||
u_int64_t pkc_zero;
|
||||
};
|
||||
|
||||
static inline int
|
||||
pfr_kstate_counter_init(struct pfr_kstate_counter *pfrc, int flags)
|
||||
{
|
||||
|
||||
pfrc->pkc_zero = 0;
|
||||
pfrc->pkc_pcpu = counter_u64_alloc(flags);
|
||||
if (pfrc->pkc_pcpu == NULL)
|
||||
return (ENOMEM);
|
||||
return (0);
|
||||
}
|
||||
|
||||
static inline void
|
||||
pfr_kstate_counter_deinit(struct pfr_kstate_counter *pfrc)
|
||||
{
|
||||
|
||||
counter_u64_free(pfrc->pkc_pcpu);
|
||||
}
|
||||
|
||||
static inline u_int64_t
|
||||
pfr_kstate_counter_fetch(struct pfr_kstate_counter *pfrc)
|
||||
{
|
||||
u_int64_t c;
|
||||
|
||||
c = counter_u64_fetch(pfrc->pkc_pcpu);
|
||||
c -= pfrc->pkc_zero;
|
||||
return (c);
|
||||
}
|
||||
|
||||
static inline void
|
||||
pfr_kstate_counter_zero(struct pfr_kstate_counter *pfrc)
|
||||
{
|
||||
u_int64_t c;
|
||||
|
||||
c = counter_u64_fetch(pfrc->pkc_pcpu);
|
||||
pfrc->pkc_zero = c;
|
||||
}
|
||||
|
||||
static inline void
|
||||
pfr_kstate_counter_add(struct pfr_kstate_counter *pfrc, int64_t n)
|
||||
{
|
||||
|
||||
counter_u64_add(pfrc->pkc_pcpu, n);
|
||||
}
|
||||
|
||||
struct pfr_ktstats {
|
||||
struct pfr_table pfrts_t;
|
||||
counter_u64_t pfrkts_packets[PFR_DIR_MAX][PFR_OP_TABLE_MAX];
|
||||
counter_u64_t pfrkts_bytes[PFR_DIR_MAX][PFR_OP_TABLE_MAX];
|
||||
counter_u64_t pfrkts_match;
|
||||
counter_u64_t pfrkts_nomatch;
|
||||
struct pfr_kstate_counter pfrkts_packets[PFR_DIR_MAX][PFR_OP_TABLE_MAX];
|
||||
struct pfr_kstate_counter pfrkts_bytes[PFR_DIR_MAX][PFR_OP_TABLE_MAX];
|
||||
struct pfr_kstate_counter pfrkts_match;
|
||||
struct pfr_kstate_counter pfrkts_nomatch;
|
||||
long pfrkts_tzero;
|
||||
int pfrkts_cnt;
|
||||
int pfrkts_refcnt[PFR_REFCNT_MAX];
|
||||
};
|
||||
|
||||
#endif /* _KERNEL */
|
||||
|
||||
#define pfrts_name pfrts_t.pfrt_name
|
||||
#define pfrts_flags pfrts_t.pfrt_flags
|
||||
|
||||
|
@ -1326,15 +1326,15 @@ pfr_get_tstats(struct pfr_table *filter, struct pfr_tstats *tbl, int *size,
|
||||
for (pfr_dir = 0; pfr_dir < PFR_DIR_MAX; pfr_dir ++) {
|
||||
for (pfr_op = 0; pfr_op < PFR_OP_TABLE_MAX; pfr_op ++) {
|
||||
tbl->pfrts_packets[pfr_dir][pfr_op] =
|
||||
counter_u64_fetch(
|
||||
p->pfrkt_packets[pfr_dir][pfr_op]);
|
||||
pfr_kstate_counter_fetch(
|
||||
&p->pfrkt_packets[pfr_dir][pfr_op]);
|
||||
tbl->pfrts_bytes[pfr_dir][pfr_op] =
|
||||
counter_u64_fetch(
|
||||
p->pfrkt_bytes[pfr_dir][pfr_op]);
|
||||
pfr_kstate_counter_fetch(
|
||||
&p->pfrkt_bytes[pfr_dir][pfr_op]);
|
||||
}
|
||||
}
|
||||
tbl->pfrts_match = counter_u64_fetch(p->pfrkt_match);
|
||||
tbl->pfrts_nomatch = counter_u64_fetch(p->pfrkt_nomatch);
|
||||
tbl->pfrts_match = pfr_kstate_counter_fetch(&p->pfrkt_match);
|
||||
tbl->pfrts_nomatch = pfr_kstate_counter_fetch(&p->pfrkt_nomatch);
|
||||
tbl->pfrts_tzero = p->pfrkt_tzero;
|
||||
tbl->pfrts_cnt = p->pfrkt_cnt;
|
||||
for (pfr_op = 0; pfr_op < PFR_REFCNT_MAX; pfr_op++)
|
||||
@ -1870,12 +1870,12 @@ pfr_clstats_ktable(struct pfr_ktable *kt, long tzero, int recurse)
|
||||
}
|
||||
for (pfr_dir = 0; pfr_dir < PFR_DIR_MAX; pfr_dir ++) {
|
||||
for (pfr_op = 0; pfr_op < PFR_OP_TABLE_MAX; pfr_op ++) {
|
||||
counter_u64_zero(kt->pfrkt_packets[pfr_dir][pfr_op]);
|
||||
counter_u64_zero(kt->pfrkt_bytes[pfr_dir][pfr_op]);
|
||||
pfr_kstate_counter_zero(&kt->pfrkt_packets[pfr_dir][pfr_op]);
|
||||
pfr_kstate_counter_zero(&kt->pfrkt_bytes[pfr_dir][pfr_op]);
|
||||
}
|
||||
}
|
||||
counter_u64_zero(kt->pfrkt_match);
|
||||
counter_u64_zero(kt->pfrkt_nomatch);
|
||||
pfr_kstate_counter_zero(&kt->pfrkt_match);
|
||||
pfr_kstate_counter_zero(&kt->pfrkt_nomatch);
|
||||
kt->pfrkt_tzero = tzero;
|
||||
}
|
||||
|
||||
@ -1905,28 +1905,24 @@ pfr_create_ktable(struct pfr_table *tbl, long tzero, int attachruleset)
|
||||
|
||||
for (pfr_dir = 0; pfr_dir < PFR_DIR_MAX; pfr_dir ++) {
|
||||
for (pfr_op = 0; pfr_op < PFR_OP_TABLE_MAX; pfr_op ++) {
|
||||
kt->pfrkt_packets[pfr_dir][pfr_op] =
|
||||
counter_u64_alloc(M_NOWAIT);
|
||||
if (! kt->pfrkt_packets[pfr_dir][pfr_op]) {
|
||||
if (pfr_kstate_counter_init(
|
||||
&kt->pfrkt_packets[pfr_dir][pfr_op], M_NOWAIT) != 0) {
|
||||
pfr_destroy_ktable(kt, 0);
|
||||
return (NULL);
|
||||
}
|
||||
kt->pfrkt_bytes[pfr_dir][pfr_op] =
|
||||
counter_u64_alloc(M_NOWAIT);
|
||||
if (! kt->pfrkt_bytes[pfr_dir][pfr_op]) {
|
||||
if (pfr_kstate_counter_init(
|
||||
&kt->pfrkt_bytes[pfr_dir][pfr_op], M_NOWAIT) != 0) {
|
||||
pfr_destroy_ktable(kt, 0);
|
||||
return (NULL);
|
||||
}
|
||||
}
|
||||
}
|
||||
kt->pfrkt_match = counter_u64_alloc(M_NOWAIT);
|
||||
if (! kt->pfrkt_match) {
|
||||
if (pfr_kstate_counter_init(&kt->pfrkt_match, M_NOWAIT) != 0) {
|
||||
pfr_destroy_ktable(kt, 0);
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
kt->pfrkt_nomatch = counter_u64_alloc(M_NOWAIT);
|
||||
if (! kt->pfrkt_nomatch) {
|
||||
if (pfr_kstate_counter_init(&kt->pfrkt_nomatch, M_NOWAIT) != 0) {
|
||||
pfr_destroy_ktable(kt, 0);
|
||||
return (NULL);
|
||||
}
|
||||
@ -1977,12 +1973,12 @@ pfr_destroy_ktable(struct pfr_ktable *kt, int flushaddr)
|
||||
}
|
||||
for (pfr_dir = 0; pfr_dir < PFR_DIR_MAX; pfr_dir ++) {
|
||||
for (pfr_op = 0; pfr_op < PFR_OP_TABLE_MAX; pfr_op ++) {
|
||||
counter_u64_free(kt->pfrkt_packets[pfr_dir][pfr_op]);
|
||||
counter_u64_free(kt->pfrkt_bytes[pfr_dir][pfr_op]);
|
||||
pfr_kstate_counter_deinit(&kt->pfrkt_packets[pfr_dir][pfr_op]);
|
||||
pfr_kstate_counter_deinit(&kt->pfrkt_bytes[pfr_dir][pfr_op]);
|
||||
}
|
||||
}
|
||||
counter_u64_free(kt->pfrkt_match);
|
||||
counter_u64_free(kt->pfrkt_nomatch);
|
||||
pfr_kstate_counter_deinit(&kt->pfrkt_match);
|
||||
pfr_kstate_counter_deinit(&kt->pfrkt_nomatch);
|
||||
|
||||
free(kt, M_PFTABLE);
|
||||
}
|
||||
@ -2052,9 +2048,9 @@ pfr_match_addr(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af)
|
||||
}
|
||||
match = (ke && !ke->pfrke_not);
|
||||
if (match)
|
||||
counter_u64_add(kt->pfrkt_match, 1);
|
||||
pfr_kstate_counter_add(&kt->pfrkt_match, 1);
|
||||
else
|
||||
counter_u64_add(kt->pfrkt_nomatch, 1);
|
||||
pfr_kstate_counter_add(&kt->pfrkt_nomatch, 1);
|
||||
return (match);
|
||||
}
|
||||
|
||||
@ -2109,8 +2105,8 @@ pfr_update_stats(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af,
|
||||
("pfr_update_stats: assertion failed.\n"));
|
||||
op_pass = PFR_OP_XPASS;
|
||||
}
|
||||
counter_u64_add(kt->pfrkt_packets[dir_out][op_pass], 1);
|
||||
counter_u64_add(kt->pfrkt_bytes[dir_out][op_pass], len);
|
||||
pfr_kstate_counter_add(&kt->pfrkt_packets[dir_out][op_pass], 1);
|
||||
pfr_kstate_counter_add(&kt->pfrkt_bytes[dir_out][op_pass], len);
|
||||
if (ke != NULL && op_pass != PFR_OP_XPASS &&
|
||||
(kt->pfrkt_flags & PFR_TFLAG_COUNTERS)) {
|
||||
counter_u64_add(pfr_kentry_counter(&ke->pfrke_counters,
|
||||
@ -2206,7 +2202,7 @@ pfr_pool_get(struct pfr_ktable *kt, int *pidx, struct pf_addr *counter,
|
||||
_next_block:
|
||||
ke = pfr_kentry_byidx(kt, idx, af);
|
||||
if (ke == NULL) {
|
||||
counter_u64_add(kt->pfrkt_nomatch, 1);
|
||||
pfr_kstate_counter_add(&kt->pfrkt_nomatch, 1);
|
||||
return (1);
|
||||
}
|
||||
pfr_prepare_network(&umask, af, ke->pfrke_net);
|
||||
@ -2231,7 +2227,7 @@ pfr_pool_get(struct pfr_ktable *kt, int *pidx, struct pf_addr *counter,
|
||||
/* this is a single IP address - no possible nested block */
|
||||
PF_ACPY(counter, addr, af);
|
||||
*pidx = idx;
|
||||
counter_u64_add(kt->pfrkt_match, 1);
|
||||
pfr_kstate_counter_add(&kt->pfrkt_match, 1);
|
||||
return (0);
|
||||
}
|
||||
for (;;) {
|
||||
@ -2251,7 +2247,7 @@ pfr_pool_get(struct pfr_ktable *kt, int *pidx, struct pf_addr *counter,
|
||||
/* lookup return the same block - perfect */
|
||||
PF_ACPY(counter, addr, af);
|
||||
*pidx = idx;
|
||||
counter_u64_add(kt->pfrkt_match, 1);
|
||||
pfr_kstate_counter_add(&kt->pfrkt_match, 1);
|
||||
return (0);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user