From b6ee846e04c662ad34ce8c0c203dd3d774766dfd Mon Sep 17 00:00:00 2001 From: "Alexander V. Chernikov" Date: Sat, 2 Aug 2014 17:18:47 +0000 Subject: [PATCH] * Fix case when returning more that 4096 bytes of data * Use different approach to ensure algo has enough space to store N elements: - explicitly ask algo (under UH_WLOCK) before/after insertion. This (along with existing reallocation callbacks) really guarantees us that it is safe to insert N elements at once while holding UH_WLOCK+WLOCK. - remove old aflags/flags approach --- sys/netpfil/ipfw/ip_fw_private.h | 1 + sys/netpfil/ipfw/ip_fw_sockopt.c | 17 +- sys/netpfil/ipfw/ip_fw_table.c | 215 ++++++++++++++---------- sys/netpfil/ipfw/ip_fw_table.h | 11 +- sys/netpfil/ipfw/ip_fw_table_algo.c | 243 +++++++++++++++++++--------- 5 files changed, 321 insertions(+), 166 deletions(-) diff --git a/sys/netpfil/ipfw/ip_fw_private.h b/sys/netpfil/ipfw/ip_fw_private.h index 0435eacfe4a9..641b2041f0de 100644 --- a/sys/netpfil/ipfw/ip_fw_private.h +++ b/sys/netpfil/ipfw/ip_fw_private.h @@ -305,6 +305,7 @@ struct sockopt_data { size_t kavail; /* number of bytes available */ size_t ktotal; /* total bytes pushed */ struct sockopt *sopt; /* socket data */ + caddr_t sopt_val; /* sopt user buffer */ size_t valsize; /* original data size */ }; diff --git a/sys/netpfil/ipfw/ip_fw_sockopt.c b/sys/netpfil/ipfw/ip_fw_sockopt.c index 283f50d1d43e..bbab77d70c92 100644 --- a/sys/netpfil/ipfw/ip_fw_sockopt.c +++ b/sys/netpfil/ipfw/ip_fw_sockopt.c @@ -1807,6 +1807,7 @@ ipfw_ctl3(struct sockopt *sopt) } sdata.sopt = sopt; + sdata.sopt_val = sopt->sopt_val; sdata.valsize = valsize; /* @@ -1906,6 +1907,9 @@ ipfw_ctl3(struct sockopt *sopt) else ipfw_flush_sopt_data(&sdata); + /* Restore original pointer and set number of bytes written */ + sopt->sopt_val = sdata.sopt_val; + sopt->sopt_valsize = sdata.ktotal; if (sdata.kbuf != xbuf) free(sdata.kbuf, M_TEMP); @@ -2113,8 +2117,8 @@ ipfw_ctl(struct sockopt *sopt) ti.type = IPFW_TABLE_CIDR; error = (opt == IP_FW_TABLE_ADD) ? - add_table_entry(chain, &ti, &tei) : - del_table_entry(chain, &ti, &tei); + add_table_entry(chain, &ti, &tei, 1) : + del_table_entry(chain, &ti, &tei, 1); } break; @@ -2239,12 +2243,13 @@ static int ipfw_flush_sopt_data(struct sockopt_data *sd) { int error; + size_t sz; - if (sd->koff == 0) + if ((sz = sd->koff) == 0) return (0); if (sd->sopt->sopt_dir == SOPT_GET) { - error = sooptcopyout(sd->sopt, sd->kbuf, sd->koff); + error = sooptcopyout(sd->sopt, sd->kbuf, sz); if (error != 0) return (error); } @@ -2257,6 +2262,10 @@ ipfw_flush_sopt_data(struct sockopt_data *sd) else sd->kavail = sd->valsize - sd->ktotal; + /* Update sopt buffer */ + sd->sopt->sopt_valsize = sd->kavail; + sd->sopt->sopt_val = sd->sopt_val + sd->ktotal; + return (0); } diff --git a/sys/netpfil/ipfw/ip_fw_table.c b/sys/netpfil/ipfw/ip_fw_table.c index ad48dc9ef3b4..97b9a1052fc7 100644 --- a/sys/netpfil/ipfw/ip_fw_table.c +++ b/sys/netpfil/ipfw/ip_fw_table.c @@ -81,7 +81,6 @@ struct table_config { uint8_t spare; uint32_t count; /* Number of records */ uint32_t limit; /* Max number of records */ - uint64_t flags; /* state flags */ char tablename[64]; /* table name */ struct table_algo *ta; /* Callbacks for given algo */ void *astate; /* algorithm state */ @@ -121,8 +120,8 @@ static int ipfw_manage_table_ent_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3 static int ipfw_manage_table_ent_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd); -static int modify_table(struct ip_fw_chain *ch, struct table_config *tc, - struct table_algo *ta, void *ta_buf, uint64_t pflags); +static int check_table_space(struct ip_fw_chain *ch, struct table_config *tc, + struct table_info *ti, uint32_t count); static int destroy_table(struct ip_fw_chain *ch, struct tid_info *ti); static struct table_algo *find_table_algo(struct tables_config *tableconf, @@ -132,10 +131,12 @@ static struct table_algo *find_table_algo(struct tables_config *tableconf, #define CHAIN_TO_NI(chain) (CHAIN_TO_TCFG(chain)->namehash) #define KIDX_TO_TI(ch, k) (&(((struct table_info *)(ch)->tablestate)[k])) +#define TA_BUF_SZ 128 /* On-stack buffer for add/delete state */ + int add_table_entry(struct ip_fw_chain *ch, struct tid_info *ti, - struct tentry_info *tei) + struct tentry_info *tei, uint32_t count) { struct table_config *tc; struct table_algo *ta; @@ -143,9 +144,8 @@ add_table_entry(struct ip_fw_chain *ch, struct tid_info *ti, uint16_t kidx; int error; uint32_t num; - uint64_t aflags; - ipfw_xtable_info xi; - char ta_buf[128]; + ipfw_xtable_info *xi; + char ta_buf[TA_BUF_SZ]; IPFW_UH_WLOCK(ch); ni = CHAIN_TO_NI(ch); @@ -171,7 +171,6 @@ add_table_entry(struct ip_fw_chain *ch, struct tid_info *ti, /* Reference and unlock */ tc->no.refcnt++; ta = tc->ta; - aflags = tc->flags; } IPFW_UH_WUNLOCK(ch); @@ -180,10 +179,11 @@ add_table_entry(struct ip_fw_chain *ch, struct tid_info *ti, if ((tei->flags & TEI_FLAGS_COMPAT) == 0) return (ESRCH); - memset(&xi, 0, sizeof(xi)); - xi.vtype = IPFW_VTYPE_U32; + xi = malloc(sizeof(ipfw_xtable_info), M_TEMP, M_WAITOK|M_ZERO); + xi->vtype = IPFW_VTYPE_U32; - error = create_table_internal(ch, ti, NULL, &xi); + error = create_table_internal(ch, ti, NULL, xi); + free(xi, M_TEMP); if (error != 0) return (error); @@ -203,22 +203,10 @@ add_table_entry(struct ip_fw_chain *ch, struct tid_info *ti, /* Reference and unlock */ tc->no.refcnt++; ta = tc->ta; - aflags = tc->flags; IPFW_UH_WUNLOCK(ch); } - if (aflags != 0) { - - /* - * Previous add/delete call returned non-zero state. - * Run appropriate handler. - */ - error = modify_table(ch, tc, ta, &ta_buf, aflags); - if (error != 0) - return (error); - } - /* Prepare record (allocate memory) */ memset(&ta_buf, 0, sizeof(ta_buf)); error = ta->prepare_add(ch, tei, &ta_buf); @@ -227,17 +215,28 @@ add_table_entry(struct ip_fw_chain *ch, struct tid_info *ti, IPFW_UH_WLOCK(ch); + /* + * Ensure we are able to add all entries without additional + * memory allocations. May release/reacquire UH_WLOCK. + */ + kidx = tc->no.kidx; + error = check_table_space(ch, tc, KIDX_TO_TI(ch, kidx), count); + if (error != 0) { + IPFW_UH_WUNLOCK(ch); + ta->flush_entry(ch, tei, &ta_buf); + return (error); + } + ni = CHAIN_TO_NI(ch); /* Drop reference we've used in first search */ tc->no.refcnt--; - /* Update aflags since it can be changed after previous read */ - aflags = tc->flags; /* Check limit before adding */ if (tc->limit != 0 && tc->count == tc->limit) { if ((tei->flags & TEI_FLAGS_UPDATE) == 0) { IPFW_UH_WUNLOCK(ch); + ta->flush_entry(ch, tei, &ta_buf); return (EFBIG); } @@ -256,15 +255,15 @@ add_table_entry(struct ip_fw_chain *ch, struct tid_info *ti, num = 0; IPFW_WLOCK(ch); - error = ta->add(tc->astate, KIDX_TO_TI(ch, kidx), tei, &ta_buf, - &aflags, &num); + error = ta->add(tc->astate, KIDX_TO_TI(ch, kidx), tei, &ta_buf, &num); IPFW_WUNLOCK(ch); /* Update number of records. */ - if (error == 0) + if (error == 0) { tc->count += num; - - tc->flags = aflags; + /* Permit post-add algorithm grow/rehash. */ + error = check_table_space(ch, tc, KIDX_TO_TI(ch, kidx), 0); + } IPFW_UH_WUNLOCK(ch); @@ -276,7 +275,7 @@ add_table_entry(struct ip_fw_chain *ch, struct tid_info *ti, int del_table_entry(struct ip_fw_chain *ch, struct tid_info *ti, - struct tentry_info *tei) + struct tentry_info *tei, uint32_t count) { struct table_config *tc; struct table_algo *ta; @@ -284,8 +283,7 @@ del_table_entry(struct ip_fw_chain *ch, struct tid_info *ti, uint16_t kidx; int error; uint32_t num; - uint64_t aflags; - char ta_buf[128]; + char ta_buf[TA_BUF_SZ]; IPFW_UH_WLOCK(ch); ni = CHAIN_TO_NI(ch); @@ -299,33 +297,23 @@ del_table_entry(struct ip_fw_chain *ch, struct tid_info *ti, return (EINVAL); } - aflags = tc->flags; ta = tc->ta; - if (aflags != 0) { - - /* - * Give the chance to algo to shrink its state. - */ - tc->no.refcnt++; + /* + * Give a chance for algorithm to shrink. + * May release/reacquire UH_WLOCK. + */ + kidx = tc->no.kidx; + error = check_table_space(ch, tc, KIDX_TO_TI(ch, kidx), 0); + if (error != 0) { IPFW_UH_WUNLOCK(ch); - memset(&ta_buf, 0, sizeof(ta_buf)); - - error = modify_table(ch, tc, ta, &ta_buf, aflags); - - IPFW_UH_WLOCK(ch); - tc->no.refcnt--; - aflags = tc->flags; - - if (error != 0) { - IPFW_UH_WUNLOCK(ch); - return (error); - } + ta->flush_entry(ch, tei, &ta_buf); + return (error); } /* * We assume ta_buf size is enough for storing - * prepare_del() key, so we're running under UH_LOCK here. + * prepare_del() key, so we're running under UH_WLOCK here. */ memset(&ta_buf, 0, sizeof(ta_buf)); if ((error = ta->prepare_del(ch, tei, &ta_buf)) != 0) { @@ -337,13 +325,14 @@ del_table_entry(struct ip_fw_chain *ch, struct tid_info *ti, num = 0; IPFW_WLOCK(ch); - error = ta->del(tc->astate, KIDX_TO_TI(ch, kidx), tei, &ta_buf, - &aflags, &num); + error = ta->del(tc->astate, KIDX_TO_TI(ch, kidx), tei, &ta_buf, &num); IPFW_WUNLOCK(ch); - if (error == 0) + if (error == 0) { tc->count -= num; - tc->flags = aflags; + /* Run post-del hook to permit shrinking */ + error = check_table_space(ch, tc, KIDX_TO_TI(ch, kidx), 0); + } IPFW_UH_WUNLOCK(ch); @@ -353,49 +342,88 @@ del_table_entry(struct ip_fw_chain *ch, struct tid_info *ti, } /* - * Runs callbacks to modify algo state (typically, table resize). + * Ensure that table @tc has enough space to add @count entries without + * need for reallocation. * * Callbacks order: + * 0) has_space() (UH_WLOCK) - checks if @count items can be added w/o resize. + * * 1) alloc_modify (no locks, M_WAITOK) - alloc new state based on @pflags. * 2) prepare_modifyt (UH_WLOCK) - copy old data into new storage * 3) modify (UH_WLOCK + WLOCK) - switch pointers - * 4) flush_modify (no locks) - free state, if needed + * 4) flush_modify (UH_WLOCK) - free state, if needed + * + * Returns 0 on success. */ static int -modify_table(struct ip_fw_chain *ch, struct table_config *tc, - struct table_algo *ta, void *ta_buf, uint64_t pflags) +check_table_space(struct ip_fw_chain *ch, struct table_config *tc, + struct table_info *ti, uint32_t count) { - struct table_info *ti; + struct table_algo *ta; + uint64_t pflags; + char ta_buf[TA_BUF_SZ]; int error; - error = ta->prepare_mod(ta_buf, &pflags); - if (error != 0) - return (error); + IPFW_UH_WLOCK_ASSERT(ch); - IPFW_UH_WLOCK(ch); - ti = KIDX_TO_TI(ch, tc->no.kidx); - - error = ta->fill_mod(tc->astate, ti, ta_buf, &pflags); + error = 0; + ta = tc->ta; + /* Acquire reference not to loose @tc between locks/unlocks */ + tc->no.refcnt++; /* - * prepare_mofify may return zero in @pflags to - * indicate that modifications are not unnesessary. + * TODO: think about avoiding race between large add/large delete + * operation on algorithm which implements shrinking along with + * growing. */ + while (true) { + pflags = 0; + if (ta->has_space(tc->astate, ti, count, &pflags) != 0) { + tc->no.refcnt--; + return (0); + } - if (error == 0 && pflags != 0) { - /* Do actual modification */ - IPFW_WLOCK(ch); - ta->modify(tc->astate, ti, ta_buf, pflags); - IPFW_WUNLOCK(ch); + /* We have to shrink/grow table */ + IPFW_UH_WUNLOCK(ch); + memset(&ta_buf, 0, sizeof(ta_buf)); + + if ((error = ta->prepare_mod(ta_buf, &pflags)) != 0) { + IPFW_UH_WLOCK(ch); + break; + } + + IPFW_UH_WLOCK(ch); + + /* Check if we still need to alter table */ + ti = KIDX_TO_TI(ch, tc->no.kidx); + if (ta->has_space(tc->astate, ti, count, &pflags) != 0) { + + /* + * Other threads has already performed resize. + * Flush our state and return/ + */ + ta->flush_mod(ta_buf); + break; + } + + error = ta->fill_mod(tc->astate, ti, ta_buf, &pflags); + if (error == 0) { + /* Do actual modification */ + IPFW_WLOCK(ch); + ta->modify(tc->astate, ti, ta_buf, pflags); + IPFW_WUNLOCK(ch); + } + + /* Anyway, flush data and retry */ + ta->flush_mod(ta_buf); } - IPFW_UH_WUNLOCK(ch); - - ta->flush_mod(ta_buf); - + tc->no.refcnt--; return (error); } + + int ipfw_manage_table_ent(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) @@ -463,8 +491,8 @@ ipfw_manage_table_ent_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3, ti.type = xent->type; error = (op3->opcode == IP_FW_TABLE_XADD) ? - add_table_entry(ch, &ti, &tei) : - del_table_entry(ch, &ti, &tei); + add_table_entry(ch, &ti, &tei, 1) : + del_table_entry(ch, &ti, &tei, 1); return (error); } @@ -538,8 +566,8 @@ ipfw_manage_table_ent_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3, ti.uidx = tent->idx; error = (oh->opheader.opcode == IP_FW_TABLE_XADD) ? - add_table_entry(ch, &ti, &tei) : - del_table_entry(ch, &ti, &tei); + add_table_entry(ch, &ti, &tei, 1) : + del_table_entry(ch, &ti, &tei, 1); return (error); } @@ -1614,16 +1642,28 @@ find_table_algo(struct tables_config *tcfg, struct tid_info *ti, char *name) return (tcfg->def_algo[ti->type]); } +/* + * Register new table algo @ta. + * Stores algo id iside @idx. + * + * Returns 0 on success. + */ int ipfw_add_table_algo(struct ip_fw_chain *ch, struct table_algo *ta, size_t size, int *idx) { struct tables_config *tcfg; struct table_algo *ta_new; + size_t sz; if (size > sizeof(struct table_algo)) return (EINVAL); + /* Check for the required on-stack size for add/del */ + sz = roundup2(ta->ta_buf_size, sizeof(void *)); + if (sz > TA_BUF_SZ) + return (EINVAL); + KASSERT(ta->type >= IPFW_TABLE_MAXTYPE,("Increase IPFW_TABLE_MAXTYPE")); ta_new = malloc(sizeof(struct table_algo), M_IPFW, M_WAITOK | M_ZERO); @@ -1646,6 +1686,9 @@ ipfw_add_table_algo(struct ip_fw_chain *ch, struct table_algo *ta, size_t size, return (0); } +/* + * Unregisters table algo using @idx as id. + */ void ipfw_del_table_algo(struct ip_fw_chain *ch, int idx) { @@ -1654,8 +1697,8 @@ ipfw_del_table_algo(struct ip_fw_chain *ch, int idx) tcfg = CHAIN_TO_TCFG(ch); - KASSERT(idx <= tcfg->algo_count, ("algo idx %d out of rage 1..%d", idx, - tcfg->algo_count)); + KASSERT(idx <= tcfg->algo_count, ("algo idx %d out of range 1..%d", + idx, tcfg->algo_count)); ta = tcfg->algo[idx]; KASSERT(ta != NULL, ("algo idx %d is NULL", idx)); diff --git a/sys/netpfil/ipfw/ip_fw_table.h b/sys/netpfil/ipfw/ip_fw_table.h index 3d449cf4f2c4..c98365f774ce 100644 --- a/sys/netpfil/ipfw/ip_fw_table.h +++ b/sys/netpfil/ipfw/ip_fw_table.h @@ -71,12 +71,14 @@ typedef int (ta_prepare_add)(struct ip_fw_chain *ch, struct tentry_info *tei, typedef int (ta_prepare_del)(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf); typedef int (ta_add)(void *ta_state, struct table_info *ti, - struct tentry_info *tei, void *ta_buf, uint64_t *pflags, uint32_t *pnum); + struct tentry_info *tei, void *ta_buf, uint32_t *pnum); typedef int (ta_del)(void *ta_state, struct table_info *ti, - struct tentry_info *tei, void *ta_buf, uint64_t *pflags, uint32_t *pnum); + struct tentry_info *tei, void *ta_buf, uint32_t *pnum); typedef void (ta_flush_entry)(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf); +typedef int (ta_has_space)(void *ta_state, struct table_info *ti, + uint32_t count, uint64_t *pflags); typedef int (ta_prepare_mod)(void *ta_buf, uint64_t *pflags); typedef int (ta_fill_mod)(void *ta_state, struct table_info *ti, void *ta_buf, uint64_t *pflags); @@ -113,6 +115,7 @@ struct table_algo { ta_del *del; ta_flush_entry *flush_entry; ta_find_tentry *find_tentry; + ta_has_space *has_space; ta_prepare_mod *prepare_mod; ta_fill_mod *fill_mod; ta_modify *modify; @@ -151,9 +154,9 @@ int ipfw_flush_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3, int ipfw_list_table_algo(struct ip_fw_chain *ch, struct sockopt_data *sd); /* Exported to support legacy opcodes */ int add_table_entry(struct ip_fw_chain *ch, struct tid_info *ti, - struct tentry_info *tei); + struct tentry_info *tei, uint32_t count); int del_table_entry(struct ip_fw_chain *ch, struct tid_info *ti, - struct tentry_info *tei); + struct tentry_info *tei, uint32_t count); int flush_table(struct ip_fw_chain *ch, struct tid_info *ti); int ipfw_rewrite_table_uidx(struct ip_fw_chain *chain, diff --git a/sys/netpfil/ipfw/ip_fw_table_algo.c b/sys/netpfil/ipfw/ip_fw_table_algo.c index 10484ae5baed..1e9507f60a06 100644 --- a/sys/netpfil/ipfw/ip_fw_table_algo.c +++ b/sys/netpfil/ipfw/ip_fw_table_algo.c @@ -397,7 +397,7 @@ ta_prepare_add_cidr(struct ip_fw_chain *ch, struct tentry_info *tei, static int ta_add_cidr(void *ta_state, struct table_info *ti, struct tentry_info *tei, - void *ta_buf, uint64_t *pflags, uint32_t *pnum) + void *ta_buf, uint32_t *pnum) { struct radix_node_head *rnh; struct radix_node *rn; @@ -489,7 +489,7 @@ ta_prepare_del_cidr(struct ip_fw_chain *ch, struct tentry_info *tei, static int ta_del_cidr(void *ta_state, struct table_info *ti, struct tentry_info *tei, - void *ta_buf, uint64_t *pflags, uint32_t *pnum) + void *ta_buf, uint32_t *pnum) { struct radix_node_head *rnh; struct radix_node *rn; @@ -526,6 +526,20 @@ ta_flush_cidr_entry(struct ip_fw_chain *ch, struct tentry_info *tei, free(tb->ent_ptr, M_IPFW_TBL); } +static int +ta_has_space_radix(void *ta_state, struct table_info *ti, uint32_t count, + uint64_t *pflags) +{ + + /* + * radix does not not require additional memory allocations + * other than nodes itself. Adding new masks to the tree do + * but we don't have any API to call (and we don't known which + * sizes do we need). + */ + return (1); +} + struct table_algo cidr_radix = { .name = "cidr:radix", .type = IPFW_TABLE_CIDR, @@ -541,6 +555,7 @@ struct table_algo cidr_radix = { .foreach = ta_foreach_radix, .dump_tentry = ta_dump_radix_tentry, .find_tentry = ta_find_radix_tentry, + .has_space = ta_has_space_radix, }; @@ -1115,7 +1130,7 @@ ta_prepare_add_chash(struct ip_fw_chain *ch, struct tentry_info *tei, static int ta_add_chash(void *ta_state, struct table_info *ti, struct tentry_info *tei, - void *ta_buf, uint64_t *pflags, uint32_t *pnum) + void *ta_buf, uint32_t *pnum) { struct chash_cfg *ccfg; struct chashbhead *head; @@ -1172,16 +1187,11 @@ ta_add_chash(void *ta_state, struct table_info *ti, struct tentry_info *tei, tb->ent_ptr = NULL; *pnum = 1; - /* Update counters and check if we need to grow hash */ - if (tei->subtype == AF_INET) { + /* Update counters */ + if (tei->subtype == AF_INET) ccfg->items4++; - if (ccfg->items4 > ccfg->size4 && ccfg->size4 < 65536) - *pflags = (ccfg->size4 * 2) | (1UL << 32); - } else { + else ccfg->items6++; - if (ccfg->items6 > ccfg->size6 && ccfg->size6 < 65536) - *pflags = ccfg->size6 * 2; - } } return (0); @@ -1200,7 +1210,7 @@ ta_prepare_del_chash(struct ip_fw_chain *ch, struct tentry_info *tei, static int ta_del_chash(void *ta_state, struct table_info *ti, struct tentry_info *tei, - void *ta_buf, uint64_t *pflags, uint32_t *pnum) + void *ta_buf, uint32_t *pnum) { struct chash_cfg *ccfg; struct chashbhead *head; @@ -1263,8 +1273,39 @@ ta_flush_chash_entry(struct ip_fw_chain *ch, struct tentry_info *tei, struct mod_item { void *main_ptr; size_t size; + void *main_ptr6; + size_t size6; }; +static int +ta_has_space_chash(void *ta_state, struct table_info *ti, uint32_t count, + uint64_t *pflags) +{ + struct chash_cfg *cfg; + uint64_t data; + + /* + * Since we don't know exact number of IPv4/IPv6 records in @count, + * ignore non-zero @count value at all. Check current hash sizes + * and return appropriate data. + */ + + cfg = (struct chash_cfg *)ta_state; + + data = 0; + if (cfg->items4 > cfg->size4 && cfg->size4 < 65536) + data |= (cfg->size4 * 2) << 16; + if (cfg->items6 > cfg->size6 && cfg->size6 < 65536) + data |= cfg->size6 * 2; + + if (data != 0) { + *pflags = data; + return (0); + } + + return (1); +} + /* * Allocate new, larger chash. */ @@ -1278,13 +1319,23 @@ ta_prepare_mod_chash(void *ta_buf, uint64_t *pflags) mi = (struct mod_item *)ta_buf; memset(mi, 0, sizeof(struct mod_item)); - mi->size = *pflags & 0xFFFFFFFF; - head = malloc(sizeof(struct chashbhead) * mi->size, M_IPFW, - M_WAITOK | M_ZERO); - for (i = 0; i < mi->size; i++) - SLIST_INIT(&head[i]); + mi->size = (*pflags >> 16) & 0xFFFF; + mi->size6 = *pflags & 0xFFFF; + if (mi->size > 0) { + head = malloc(sizeof(struct chashbhead) * mi->size, + M_IPFW, M_WAITOK | M_ZERO); + for (i = 0; i < mi->size; i++) + SLIST_INIT(&head[i]); + mi->main_ptr = head; + } - mi->main_ptr = head; + if (mi->size6 > 0) { + head = malloc(sizeof(struct chashbhead) * mi->size6, + M_IPFW, M_WAITOK | M_ZERO); + for (i = 0; i < mi->size6; i++) + SLIST_INIT(&head[i]); + mi->main_ptr6 = head; + } return (0); } @@ -1301,7 +1352,6 @@ ta_fill_mod_chash(void *ta_state, struct table_info *ti, void *ta_buf, return (0); } - /* * Switch old & new arrays. */ @@ -1310,54 +1360,62 @@ ta_modify_chash(void *ta_state, struct table_info *ti, void *ta_buf, uint64_t pflags) { struct mod_item *mi; - struct chash_cfg *ccfg; + struct chash_cfg *cfg; struct chashbhead *old_head, *new_head; struct chashentry *ent, *ent_next; int af, i, mlen; uint32_t nhash; - size_t old_size; + size_t old_size, new_size; mi = (struct mod_item *)ta_buf; - ccfg = (struct chash_cfg *)ta_state; + cfg = (struct chash_cfg *)ta_state; /* Check which hash we need to grow and do we still need that */ - if ((pflags >> 32) == 1) { - old_size = ccfg->size4; + if (mi->size > 0 && cfg->size4 < mi->size) { + new_head = (struct chashbhead *)mi->main_ptr; + new_size = mi->size; + old_size = cfg->size4; old_head = ti->state; - mlen = ccfg->mask4; + mlen = cfg->mask4; af = AF_INET; - } else { - old_size = ccfg->size6; - old_head = ti->xstate; - mlen = ccfg->mask6; - af = AF_INET6; - } - if (old_size >= mi->size) - return (0); - - new_head = (struct chashbhead *)mi->main_ptr; - for (i = 0; i < old_size; i++) { - SLIST_FOREACH_SAFE(ent, &old_head[i], next, ent_next) { - nhash = hash_ent(ent, af, mlen, mi->size); - SLIST_INSERT_HEAD(&new_head[nhash], ent, next); + for (i = 0; i < old_size; i++) { + SLIST_FOREACH_SAFE(ent, &old_head[i], next, ent_next) { + nhash = hash_ent(ent, af, mlen, new_size); + SLIST_INSERT_HEAD(&new_head[nhash], ent, next); + } } - } - if (af == AF_INET) { ti->state = new_head; - ccfg->head4 = new_head; - ccfg->size4 = mi->size; - } else { - ti->xstate = new_head; - ccfg->head6 = new_head; - ccfg->size6 = mi->size; + cfg->head4 = new_head; + cfg->size4 = mi->size; + mi->main_ptr = old_head; } - ti->data = (ti->data & 0xFFFFFFFF00000000) | log2(ccfg->size4) << 8 | - log2(ccfg->size6); + if (mi->size6 > 0 && cfg->size6 < mi->size6) { + new_head = (struct chashbhead *)mi->main_ptr6; + new_size = mi->size6; + old_size = cfg->size6; + old_head = ti->xstate; + mlen = cfg->mask6; + af = AF_INET6; - mi->main_ptr = old_head; + for (i = 0; i < old_size; i++) { + SLIST_FOREACH_SAFE(ent, &old_head[i], next, ent_next) { + nhash = hash_ent(ent, af, mlen, new_size); + SLIST_INSERT_HEAD(&new_head[nhash], ent, next); + } + } + + ti->xstate = new_head; + cfg->head6 = new_head; + cfg->size6 = mi->size6; + mi->main_ptr6 = old_head; + } + + /* Update lower 32 bits with new values */ + ti->data &= 0xFFFFFFFF00000000; + ti->data |= log2(cfg->size4) << 8 | log2(cfg->size6); return (0); } @@ -1373,6 +1431,8 @@ ta_flush_mod_chash(void *ta_buf) mi = (struct mod_item *)ta_buf; if (mi->main_ptr != NULL) free(mi->main_ptr, M_IPFW); + if (mi->main_ptr6 != NULL) + free(mi->main_ptr6, M_IPFW); } struct table_algo cidr_hash = { @@ -1390,6 +1450,7 @@ struct table_algo cidr_hash = { .dump_tentry = ta_dump_chash_tentry, .find_tentry = ta_find_chash_tentry, .print_config = ta_print_chash_config, + .has_space = ta_has_space_chash, .prepare_mod = ta_prepare_mod_chash, .fill_mod = ta_fill_mod_chash, .modify = ta_modify_chash, @@ -1678,7 +1739,7 @@ ta_prepare_add_ifidx(struct ip_fw_chain *ch, struct tentry_info *tei, static int ta_add_ifidx(void *ta_state, struct table_info *ti, struct tentry_info *tei, - void *ta_buf, uint64_t *pflags, uint32_t *pnum) + void *ta_buf, uint32_t *pnum) { struct iftable_cfg *icfg; struct ifentry *ife, *tmp; @@ -1726,11 +1787,6 @@ ta_add_ifidx(void *ta_state, struct table_info *ti, struct tentry_info *tei, ipfw_iface_add_notify(icfg->ch, &ife->ic); icfg->count++; - if (icfg->count + 1 == icfg->size) { - /* Notify core we need to grow */ - *pflags = icfg->size + IFIDX_CHUNK; - } - tb->ife = NULL; *pnum = 1; @@ -1764,7 +1820,7 @@ ta_prepare_del_ifidx(struct ip_fw_chain *ch, struct tentry_info *tei, */ static int ta_del_ifidx(void *ta_state, struct table_info *ti, struct tentry_info *tei, - void *ta_buf, uint64_t *pflags, uint32_t *pnum) + void *ta_buf, uint32_t *pnum) { struct iftable_cfg *icfg; struct ifentry *ife; @@ -1883,6 +1939,22 @@ struct mod_ifidx { size_t size; }; +static int +ta_has_space_ifidx(void *ta_state, struct table_info *ti, uint32_t count, + uint64_t *pflags) +{ + struct iftable_cfg *cfg; + + cfg = (struct iftable_cfg *)ta_state; + + if (cfg->count + count > cfg->size) { + *pflags = roundup2(cfg->count + count, IFIDX_CHUNK); + return (0); + } + + return (1); +} + /* * Allocate ned, larger runtime ifidx array. */ @@ -2049,6 +2121,7 @@ struct table_algo iface_idx = { .foreach = ta_foreach_ifidx, .dump_tentry = ta_dump_ifidx_tentry, .find_tentry = ta_find_ifidx_tentry, + .has_space = ta_has_space_ifidx, .prepare_mod = ta_prepare_mod_ifidx, .fill_mod = ta_fill_mod_ifidx, .modify = ta_modify_ifidx, @@ -2186,7 +2259,7 @@ ta_prepare_add_numarray(struct ip_fw_chain *ch, struct tentry_info *tei, static int ta_add_numarray(void *ta_state, struct table_info *ti, struct tentry_info *tei, - void *ta_buf, uint64_t *pflags, uint32_t *pnum) + void *ta_buf, uint32_t *pnum) { struct numarray_cfg *cfg; struct ta_buf_numarray *tb; @@ -2219,11 +2292,6 @@ ta_add_numarray(void *ta_state, struct table_info *ti, struct tentry_info *tei, KASSERT(res == 1, ("number %d already exists", tb->na.number)); cfg->used++; ti->data = cfg->used; - - if (cfg->used + 1 == cfg->size) { - /* Notify core we need to grow */ - *pflags = cfg->size + NUMARRAY_CHUNK; - } *pnum = 1; return (0); @@ -2235,7 +2303,7 @@ ta_add_numarray(void *ta_state, struct table_info *ti, struct tentry_info *tei, */ static int ta_del_numarray(void *ta_state, struct table_info *ti, struct tentry_info *tei, - void *ta_buf, uint64_t *pflags, uint32_t *pnum) + void *ta_buf, uint32_t *pnum) { struct numarray_cfg *cfg; struct ta_buf_numarray *tb; @@ -2255,7 +2323,6 @@ ta_del_numarray(void *ta_state, struct table_info *ti, struct tentry_info *tei, KASSERT(res == 1, ("number %u does not exist", tb->na.number)); cfg->used--; ti->data = cfg->used; - *pnum = 1; return (0); @@ -2274,8 +2341,24 @@ ta_flush_numarray_entry(struct ip_fw_chain *ch, struct tentry_info *tei, * Table growing callbacks. */ +static int +ta_has_space_numarray(void *ta_state, struct table_info *ti, uint32_t count, + uint64_t *pflags) +{ + struct numarray_cfg *cfg; + + cfg = (struct numarray_cfg *)ta_state; + + if (cfg->used + count > cfg->size) { + *pflags = roundup2(cfg->used + count, NUMARRAY_CHUNK); + return (0); + } + + return (1); +} + /* - * Allocate ned, larger runtime numarray array. + * Allocate new, larger runtime array. */ static int ta_prepare_mod_numarray(void *ta_buf, uint64_t *pflags) @@ -2415,6 +2498,7 @@ struct table_algo number_array = { .foreach = ta_foreach_numarray, .dump_tentry = ta_dump_numarray_tentry, .find_tentry = ta_find_numarray_tentry, + .has_space = ta_has_space_numarray, .prepare_mod = ta_prepare_mod_numarray, .fill_mod = ta_fill_mod_numarray, .modify = ta_modify_numarray, @@ -2437,8 +2521,8 @@ struct table_algo number_array = { * * * pflags: - * [v4=1/v6=0][hsize] - * [ 32][ 32] + * [hsize4][hsize6] + * [ 16][ 16] */ struct fhashentry; @@ -2858,7 +2942,7 @@ ta_prepare_add_fhash(struct ip_fw_chain *ch, struct tentry_info *tei, static int ta_add_fhash(void *ta_state, struct table_info *ti, struct tentry_info *tei, - void *ta_buf, uint64_t *pflags, uint32_t *pnum) + void *ta_buf, uint32_t *pnum) { struct fhash_cfg *cfg; struct fhashbhead *head; @@ -2907,8 +2991,6 @@ ta_add_fhash(void *ta_state, struct table_info *ti, struct tentry_info *tei, /* Update counters and check if we need to grow hash */ cfg->items++; - if (cfg->items > cfg->size && cfg->size < 65536) - *pflags = cfg->size * 2; } return (0); @@ -2927,7 +3009,7 @@ ta_prepare_del_fhash(struct ip_fw_chain *ch, struct tentry_info *tei, static int ta_del_fhash(void *ta_state, struct table_info *ti, struct tentry_info *tei, - void *ta_buf, uint64_t *pflags, uint32_t *pnum) + void *ta_buf, uint32_t *pnum) { struct fhash_cfg *cfg; struct fhashbhead *head; @@ -2977,6 +3059,22 @@ ta_flush_fhash_entry(struct ip_fw_chain *ch, struct tentry_info *tei, * Hash growing callbacks. */ +static int +ta_has_space_fhash(void *ta_state, struct table_info *ti, uint32_t count, + uint64_t *pflags) +{ + struct fhash_cfg *cfg; + + cfg = (struct fhash_cfg *)ta_state; + + if (cfg->items > cfg->size && cfg->size < 65536) { + *pflags = cfg->size * 2; + return (0); + } + + return (1); +} + /* * Allocate new, larger fhash. */ @@ -3085,6 +3183,7 @@ struct table_algo flow_hash = { .foreach = ta_foreach_fhash, .dump_tentry = ta_dump_fhash_tentry, .find_tentry = ta_find_fhash_tentry, + .has_space = ta_has_space_fhash, .prepare_mod = ta_prepare_mod_fhash, .fill_mod = ta_fill_mod_fhash, .modify = ta_modify_fhash,