From 914bffb6ab13f86826761d3b39d45d9996556e35 Mon Sep 17 00:00:00 2001 From: "Alexander V. Chernikov" Date: Thu, 31 Jul 2014 20:08:19 +0000 Subject: [PATCH] * Add new "flow" table type to support N=1..5-tuple lookups * Add "flow:hash" algorithm Kernel changes: * Add O_IP_FLOW_LOOKUP opcode to support "flow" lookups * Add IPFW_TABLE_FLOW table type * Add "struct tflow_entry" as strage for 6-tuple flows * Add "flow:hash" algorithm. Basically it is auto-growing chained hash table. Additionally, we store mask of fields we need to compare in each instance/ * Increase ipfw_obj_tentry size by adding struct tflow_entry * Add per-algorithm stat (ifpw_ta_tinfo) to ipfw_xtable_info * Increase algoname length: 32 -> 64 (algo options passed there as string) * Assume every table type can be customized by flags, use u8 to store "tflags" field. * Simplify ipfw_find_table_entry() by providing @tentry directly to algo callback. * Fix bug in cidr:chash resize procedure. Userland changes: * add "flow table(NAME)" syntax to support n-tuple checking tables. * make fill_flags() separate function to ease working with _s_x arrays * change "table info" output to reflect longer "type" fields Syntax: ipfw table fl2 create type flow:[src-ip][,proto][,src-port][,dst-ip][dst-port] [algo flow:hash] Examples: 0:02 [2] zfscurr0# ipfw table fl2 create type flow:src-ip,proto,dst-port algo flow:hash 0:02 [2] zfscurr0# ipfw table fl2 info +++ table(fl2), set(0) +++ kindex: 0, type: flow:src-ip,proto,dst-port valtype: number, references: 0 algorithm: flow:hash items: 0, size: 280 0:02 [2] zfscurr0# ipfw table fl2 add 2a02:6b8::333,tcp,443 45000 0:02 [2] zfscurr0# ipfw table fl2 add 10.0.0.92,tcp,80 22000 0:02 [2] zfscurr0# ipfw table fl2 list +++ table(fl2), set(0) +++ 2a02:6b8::333,6,443 45000 10.0.0.92,6,80 22000 0:02 [2] zfscurr0# ipfw add 200 count tcp from me to 78.46.89.105 80 flow 'table(fl2)' 00200 count tcp from me to 78.46.89.105 dst-port 80 flow table(fl2) 0:03 [2] zfscurr0# ipfw show 00200 0 0 count tcp from me to 78.46.89.105 dst-port 80 flow table(fl2) 65535 617 59416 allow ip from any to any 0:03 [2] zfscurr0# telnet -s 10.0.0.92 78.46.89.105 80 Trying 78.46.89.105... .. 0:04 [2] zfscurr0# ipfw show 00200 5 272 count tcp from me to 78.46.89.105 dst-port 80 flow table(fl2) 65535 682 66733 allow ip from any to any --- sbin/ipfw/ipfw2.c | 152 ++++-- sbin/ipfw/ipfw2.h | 5 + sbin/ipfw/tables.c | 296 +++++++++-- sys/netinet/ip_fw.h | 63 ++- sys/netpfil/ipfw/ip_fw2.c | 11 + sys/netpfil/ipfw/ip_fw_sockopt.c | 13 +- sys/netpfil/ipfw/ip_fw_table.c | 81 ++- sys/netpfil/ipfw/ip_fw_table.h | 16 +- sys/netpfil/ipfw/ip_fw_table_algo.c | 739 ++++++++++++++++++++++++++-- 9 files changed, 1201 insertions(+), 175 deletions(-) diff --git a/sbin/ipfw/ipfw2.c b/sbin/ipfw/ipfw2.c index 8ac36f9876b6..a31451b5d41c 100644 --- a/sbin/ipfw/ipfw2.c +++ b/sbin/ipfw/ipfw2.c @@ -364,6 +364,7 @@ static struct _s_x rule_options[] = { { "src-ipv6", TOK_SRCIP6}, { "src-ip6", TOK_SRCIP6}, { "lookup", TOK_LOOKUP}, + { "flow", TOK_FLOW}, { "//", TOK_COMMENT }, { "not", TOK_NOT }, /* pseudo option */ @@ -706,6 +707,54 @@ concat_tokens(char *buf, size_t bufsize, struct _s_x *table, char *delimiter) return (sz); } +/* + * helper function to process a set of flags and set bits in the + * appropriate masks. + */ +void +fill_flags(struct _s_x *flags, char *p, uint8_t *set, uint8_t *clear) +{ + char *q; /* points to the separator */ + int val; + uint8_t *which; /* mask we are working on */ + + while (p && *p) { + if (*p == '!') { + p++; + which = clear; + } else + which = set; + q = strchr(p, ','); + if (q) + *q++ = '\0'; + val = match_token(flags, p); + if (val <= 0) + errx(EX_DATAERR, "invalid flag %s", p); + *which |= (uint8_t)val; + p = q; + } +} + +void +print_flags_buffer(char *buf, size_t sz, struct _s_x *list, uint8_t set) +{ + char const *comma = ""; + int i, l; + + for (i = 0; list[i].x != 0; i++) { + if ((set & list[i].x) == 0) + continue; + + set &= ~list[i].x; + l = snprintf(buf, sz, "%s%s", comma, list[i].s); + if (l >= sz) + return; + comma = ","; + buf += l; + sz -=l; + } +} + /* * _substrcmp takes two strings and returns 1 if they do not match, * and 0 if they match exactly or the first string is a sub-string @@ -1087,6 +1136,7 @@ print_flags(char const *name, ipfw_insn *cmd, struct _s_x *list) } } + /* * Print the ip address contained in a command. */ @@ -1795,6 +1845,18 @@ show_static_rule(struct cmdline_opts *co, struct format_opts *fo, break; } + case O_IP_FLOW_LOOKUP: + { + char *t; + + t = table_search_ctlv(fo->tstate, cmd->arg1); + printf(" flow table(%s", t); + if (F_LEN(cmd) == F_INSN_SIZE(ipfw_insn_u32)) + printf(",%u", + ((ipfw_insn_u32 *)cmd)->d[0]); + printf(")"); + break; + } case O_IPID: if (F_LEN(cmd) == 1) printf(" ipid %u", cmd->arg1 ); @@ -2660,6 +2722,33 @@ pack_table(struct tidx *tstate, char *name, uint32_t set) return (ntlv->idx); } +static void +fill_table(ipfw_insn *cmd, char *av, uint8_t opcode, struct tidx *tstate) +{ + uint32_t *d = ((ipfw_insn_u32 *)cmd)->d; + uint16_t uidx; + char *p; + + if ((p = strchr(av + 6, ')')) == NULL) + errx(EX_DATAERR, "forgotten parenthesis: '%s'", av); + *p = '\0'; + p = strchr(av + 6, ','); + if (p) + *p++ = '\0'; + + if ((uidx = pack_table(tstate, av + 6, 0)) == 0) + errx(EX_DATAERR, "Invalid table name: %s", av + 6); + + cmd->opcode = opcode; + cmd->arg1 = uidx; + if (p) { + cmd->len |= F_INSN_SIZE(ipfw_insn_u32); + d[0] = strtoul(p, NULL, 0); + } else + cmd->len |= F_INSN_SIZE(ipfw_insn); +} + + /* * fills the addr and mask fields in the instruction as appropriate from av. * Update length as appropriate. @@ -2676,8 +2765,6 @@ fill_ip(ipfw_insn_ip *cmd, char *av, int cblen, struct tidx *tstate) { int len = 0; uint32_t *d = ((ipfw_insn_u32 *)cmd)->d; - uint16_t uidx; - char *p; cmd->o.len &= ~F_LEN_MASK; /* zero len */ @@ -2690,23 +2777,7 @@ fill_ip(ipfw_insn_ip *cmd, char *av, int cblen, struct tidx *tstate) } if (strncmp(av, "table(", 6) == 0) { - if ((p = strchr(av + 6, ')')) == NULL) - errx(EX_DATAERR, "forgotten parenthesis: '%s'", av); - *p = '\0'; - p = strchr(av + 6, ','); - if (p) - *p++ = '\0'; - - if ((uidx = pack_table(tstate, av + 6, 0)) == 0) - errx(EX_DATAERR, "Invalid table name: %s", av + 6); - - cmd->o.opcode = O_IP_DST_LOOKUP; - cmd->o.arg1 = uidx; - if (p) { - cmd->o.len |= F_INSN_SIZE(ipfw_insn_u32); - d[0] = strtoul(p, NULL, 0); - } else - cmd->o.len |= F_INSN_SIZE(ipfw_insn); + fill_table(&cmd->o, av, O_IP_DST_LOOKUP, tstate); return; } @@ -2887,35 +2958,14 @@ n2mask(struct in6_addr *mask, int n) return; } -/* - * helper function to process a set of flags and set bits in the - * appropriate masks. - */ static void -fill_flags(ipfw_insn *cmd, enum ipfw_opcodes opcode, +fill_flags_cmd(ipfw_insn *cmd, enum ipfw_opcodes opcode, struct _s_x *flags, char *p) { - uint8_t set=0, clear=0; + uint8_t set = 0, clear = 0; - while (p && *p) { - char *q; /* points to the separator */ - int val; - uint8_t *which; /* mask we are working on */ + fill_flags(flags, p, &set, &clear); - if (*p == '!') { - p++; - which = &clear; - } else - which = &set; - q = strchr(p, ','); - if (q) - *q++ = '\0'; - val = match_token(flags, p); - if (val <= 0) - errx(EX_DATAERR, "invalid flag %s", p); - *which |= (uint8_t)val; - p = q; - } cmd->opcode = opcode; cmd->len = (cmd->len & (F_NOT | F_OR)) | 1; cmd->arg1 = (set & 0xff) | ( (clear & 0xff) << 8); @@ -4087,13 +4137,13 @@ compile_rule(char *av[], uint32_t *rbuf, int *rbufsize, struct tidx *tstate) case TOK_IPOPTS: NEED1("missing argument for ipoptions"); - fill_flags(cmd, O_IPOPT, f_ipopts, *av); + fill_flags_cmd(cmd, O_IPOPT, f_ipopts, *av); av++; break; case TOK_IPTOS: NEED1("missing argument for iptos"); - fill_flags(cmd, O_IPTOS, f_iptos, *av); + fill_flags_cmd(cmd, O_IPTOS, f_iptos, *av); av++; break; @@ -4171,7 +4221,7 @@ compile_rule(char *av[], uint32_t *rbuf, int *rbufsize, struct tidx *tstate) case TOK_TCPOPTS: NEED1("missing argument for tcpoptions"); - fill_flags(cmd, O_TCPOPTS, f_tcpopts, *av); + fill_flags_cmd(cmd, O_TCPOPTS, f_tcpopts, *av); av++; break; @@ -4198,7 +4248,7 @@ compile_rule(char *av[], uint32_t *rbuf, int *rbufsize, struct tidx *tstate) case TOK_TCPFLAGS: NEED1("missing argument for tcpflags"); cmd->opcode = O_TCPFLAGS; - fill_flags(cmd, O_TCPFLAGS, f_tcpflags, *av); + fill_flags_cmd(cmd, O_TCPFLAGS, f_tcpflags, *av); av++; break; @@ -4407,6 +4457,14 @@ compile_rule(char *av[], uint32_t *rbuf, int *rbufsize, struct tidx *tstate) av++; } break; + case TOK_FLOW: + NEED1("missing table name"); + if (strncmp(*av, "table(", 6) != 0) + errx(EX_DATAERR, + "enclose table name into \"table()\""); + fill_table(cmd, *av, O_IP_FLOW_LOOKUP, tstate); + av++; + break; default: errx(EX_USAGE, "unrecognised option [%d] %s\n", i, s); diff --git a/sbin/ipfw/ipfw2.h b/sbin/ipfw/ipfw2.h index cabc12cf8e33..1ed2ee8062ff 100644 --- a/sbin/ipfw/ipfw2.h +++ b/sbin/ipfw/ipfw2.h @@ -217,6 +217,7 @@ enum tokens { TOK_DEL, TOK_VALTYPE, TOK_ALGO, + TOK_FLOW, }; /* * the following macro returns an error message if we run out of @@ -253,6 +254,10 @@ int match_token(struct _s_x *table, char *string); char const *match_value(struct _s_x *p, int value); size_t concat_tokens(char *buf, size_t bufsize, struct _s_x *table, char *delimiter); +void fill_flags(struct _s_x *flags, char *p, uint8_t *set, uint8_t *clear); +void print_flags(char const *name, struct _s_x *list, uint8_t set, + uint8_t clear); +void print_flags_buffer(char *buf, size_t sz, struct _s_x *list, uint8_t set); struct _ip_fw3_opheader; int do_cmd(int optname, void *optval, uintptr_t optlen); diff --git a/sbin/ipfw/tables.c b/sbin/ipfw/tables.c index ca01c1af2d52..f94ca72c2934 100644 --- a/sbin/ipfw/tables.c +++ b/sbin/ipfw/tables.c @@ -83,6 +83,7 @@ static struct _s_x tabletypes[] = { { "cidr", IPFW_TABLE_CIDR }, { "iface", IPFW_TABLE_INTERFACE }, { "number", IPFW_TABLE_NUMBER }, + { "flow", IPFW_TABLE_FLOW }, { NULL, 0 } }; @@ -256,6 +257,59 @@ static struct _s_x tablenewcmds[] = { { NULL, 0 } }; +static struct _s_x flowtypecmds[] = { + { "src-ip", IPFW_TFFLAG_SRCIP }, + { "proto", IPFW_TFFLAG_PROTO }, + { "src-port", IPFW_TFFLAG_SRCPORT }, + { "dst-ip", IPFW_TFFLAG_DSTIP }, + { "dst-port", IPFW_TFFLAG_DSTPORT }, + { NULL, 0 } +}; + +int +table_parse_type(uint8_t ttype, char *p, uint8_t *tflags) +{ + uint8_t fset, fclear; + + /* Parse type options */ + switch(ttype) { + case IPFW_TABLE_FLOW: + fset = fclear = 0; + fill_flags(flowtypecmds, p, &fset, + &fclear); + *tflags = fset; + break; + default: + return (EX_USAGE); + } + + return (0); +} + +void +table_print_type(char *tbuf, size_t size, uint8_t type, uint8_t tflags) +{ + const char *tname; + int l; + + if ((tname = match_value(tabletypes, type)) == NULL) + tname = "unknown"; + + l = snprintf(tbuf, size, "%s", tname); + tbuf += l; + size -= l; + + switch(type) { + case IPFW_TABLE_FLOW: + if (tflags != 0) { + *tbuf++ = ':'; + l--; + print_flags_buffer(tbuf, size, flowtypecmds, tflags); + } + break; + } +} + /* * Creates new table * @@ -271,6 +325,7 @@ table_create(ipfw_obj_header *oh, int ac, char *av[]) ipfw_xtable_info xi; int error, tcmd, val; size_t sz; + char *p; char tbuf[128]; sz = sizeof(tbuf); @@ -288,15 +343,25 @@ table_create(ipfw_obj_header *oh, int ac, char *av[]) switch (tcmd) { case TOK_TYPE: NEED1("table type required"); + /* Type may have suboptions after ':' */ + if ((p = strchr(*av, ':')) != NULL) + *p++ = '\0'; val = match_token(tabletypes, *av); - if (val != -1) { - xi.type = val; - ac--; av++; - break; + if (val == -1) { + concat_tokens(tbuf, sizeof(tbuf), tabletypes, + ", "); + errx(EX_USAGE, + "Unknown tabletype: %s. Supported: %s", + *av, tbuf); } - concat_tokens(tbuf, sizeof(tbuf), tabletypes, ", "); - errx(EX_USAGE, "Unknown tabletype: %s. Supported: %s", - *av, tbuf); + xi.type = val; + if (p != NULL) { + error = table_parse_type(val, p, &xi.tflags); + if (error != 0) + errx(EX_USAGE, + "Unsupported suboptions: %s", p); + } + ac--; av++; break; case TOK_VALTYPE: NEED1("table value type required"); @@ -408,15 +473,15 @@ table_get_info(ipfw_obj_header *oh, ipfw_xtable_info *i) static int table_show_info(ipfw_xtable_info *i, void *arg) { - const char *ttype, *vtype; + const char *vtype; + char ttype[64]; - printf("--- table(%s), set(%u) ---\n", i->tablename, i->set); - if ((ttype = match_value(tabletypes, i->type)) == NULL) - ttype = "unknown"; + table_print_type(ttype, sizeof(ttype), i->type, i->tflags); if ((vtype = match_value(tablevaltypes, i->vtype)) == NULL) vtype = "unknown"; - printf(" type: %s, kindex: %d\n", ttype, i->kidx); + printf("--- table(%s), set(%u) ---\n", i->tablename, i->set); + printf(" kindex: %d, type: %s\n", i->kidx, ttype); printf(" valtype: %s, references: %u\n", vtype, i->refcnt); printf(" algorithm: %s\n", i->algoname); printf(" items: %u, size: %u\n", i->count, i->size); @@ -575,12 +640,15 @@ table_lookup(ipfw_obj_header *oh, int ac, char *av[]) { ipfw_obj_tentry xtent; ipfw_xtable_info xi; + char key[64]; int error; if (ac == 0) errx(EX_USAGE, "address required"); - error = table_do_lookup(oh, *av, &xi, &xtent); + strlcpy(key, *av, sizeof(key)); + + error = table_do_lookup(oh, key, &xi, &xtent); switch (error) { case 0: @@ -600,12 +668,17 @@ table_lookup(ipfw_obj_header *oh, int ac, char *av[]) } static void -tentry_fill_key_type(char *arg, ipfw_obj_tentry *tentry, uint8_t type) +tentry_fill_key_type(char *arg, ipfw_obj_tentry *tentry, uint8_t type, + uint8_t tflags) { - char *p; + char *p, *pp; int mask, af; - struct in6_addr *paddr; + struct in6_addr *paddr, tmp; + struct tflow_entry *tfe; uint32_t key, *pkey; + uint16_t port; + struct protoent *pent; + struct servent *sent; int masklen; masklen = 0; @@ -664,6 +737,117 @@ tentry_fill_key_type(char *arg, ipfw_obj_tentry *tentry, uint8_t type) *pkey = key; masklen = 32; break; + case IPFW_TABLE_FLOW: + /* Assume [src-ip][,proto][,src-port][,dst-ip][,dst-port] */ + tfe = &tentry->k.flow; + af = 0; + + /* Handle */ + if ((tflags & IPFW_TFFLAG_SRCIP) != 0) { + if ((p = strchr(arg, ',')) != NULL) + *p++ = '\0'; + /* Determine family using temporary storage */ + if (inet_pton(AF_INET, arg, &tmp) == 1) { + if (af != 0 && af != AF_INET) + errx(EX_DATAERR, + "Inconsistent address family\n"); + af = AF_INET; + memcpy(&tfe->a.a4.sip, &tmp, 4); + } else if (inet_pton(AF_INET6, arg, &tmp) == 1) { + if (af != 0 && af != AF_INET6) + errx(EX_DATAERR, + "Inconsistent address family\n"); + af = AF_INET6; + memcpy(&tfe->a.a6.sip6, &tmp, 16); + } + + arg = p; + } + + /* Handle */ + if ((tflags & IPFW_TFFLAG_PROTO) != 0) { + if ((p = strchr(arg, ',')) != NULL) + *p++ = '\0'; + + key = strtol(arg, &pp, 10); + if (*pp != '\0') { + if ((pent = getprotobyname(arg)) == NULL) + errx(EX_DATAERR, "Unknown proto: %s", + arg); + else + key = pent->p_proto; + } + + if (key > 255) + errx(EX_DATAERR, "Bad protocol number: %u",key); + + tfe->proto = key; + + arg = p; + } + + /* Handle */ + if ((tflags & IPFW_TFFLAG_SRCPORT) != 0) { + if ((p = strchr(arg, ',')) != NULL) + *p++ = '\0'; + + if ((port = htons(strtol(arg, NULL, 10))) == 0) { + if ((sent = getservbyname(arg, NULL)) == NULL) + errx(EX_DATAERR, "Unknown service: %s", + arg); + else + key = sent->s_port; + } + + tfe->sport = port; + + arg = p; + } + + /* Handle */ + if ((tflags & IPFW_TFFLAG_DSTIP) != 0) { + if ((p = strchr(arg, ',')) != NULL) + *p++ = '\0'; + /* Determine family using temporary storage */ + if (inet_pton(AF_INET, arg, &tmp) == 1) { + if (af != 0 && af != AF_INET) + errx(EX_DATAERR, + "Inconsistent address family"); + af = AF_INET; + memcpy(&tfe->a.a4.dip, &tmp, 4); + } else if (inet_pton(AF_INET6, arg, &tmp) == 1) { + if (af != 0 && af != AF_INET6) + errx(EX_DATAERR, + "Inconsistent address family"); + af = AF_INET6; + memcpy(&tfe->a.a6.dip6, &tmp, 16); + } + + arg = p; + } + + /* Handle */ + if ((tflags & IPFW_TFFLAG_DSTPORT) != 0) { + if ((p = strchr(arg, ',')) != NULL) + *p++ = '\0'; + + if ((port = htons(strtol(arg, NULL, 10))) == 0) { + if ((sent = getservbyname(arg, NULL)) == NULL) + errx(EX_DATAERR, "Unknown service: %s", + arg); + else + key = sent->s_port; + } + + tfe->dport = port; + + arg = p; + } + + tfe->af = af; + + break; + default: errx(EX_DATAERR, "Unsupported table type: %d", type); } @@ -676,11 +860,12 @@ static void tentry_fill_key(ipfw_obj_header *oh, ipfw_obj_tentry *tent, char *key, uint8_t *ptype, uint8_t *pvtype, ipfw_xtable_info *xi) { - uint8_t type, vtype; + uint8_t type, tflags, vtype; int error; char *del; type = 0; + tflags = 0; vtype = 0; error = table_get_info(oh, xi); @@ -688,6 +873,7 @@ tentry_fill_key(ipfw_obj_header *oh, ipfw_obj_tentry *tent, char *key, if (error == 0) { /* Table found. */ type = xi->type; + tflags = xi->tflags; vtype = xi->vtype; } else { if (error != ESRCH) @@ -718,7 +904,7 @@ tentry_fill_key(ipfw_obj_header *oh, ipfw_obj_tentry *tent, char *key, *del = '/'; } - tentry_fill_key_type(key, tent, type); + tentry_fill_key_type(key, tent, type, tflags); *ptype = type; *pvtype = vtype; @@ -874,41 +1060,75 @@ table_show_list(ipfw_obj_header *oh, int need_header) static void table_show_entry(ipfw_xtable_info *i, ipfw_obj_tentry *tent) { - char tbuf[128]; + char *comma, tbuf[128], pval[32]; + void *paddr; uint32_t tval; + struct tflow_entry *tfe; tval = tent->value; + if (co.do_value_as_ip) { + tval = htonl(tval); + inet_ntop(AF_INET, &tval, pval, sizeof(pval)); + } else + snprintf(pval, sizeof(pval), "%u", tval); + switch (i->type) { case IPFW_TABLE_CIDR: /* IPv4 or IPv6 prefixes */ inet_ntop(tent->subtype, &tent->k, tbuf, sizeof(tbuf)); - - if (co.do_value_as_ip) { - tval = htonl(tval); - printf("%s/%u %s\n", tbuf, tent->masklen, - inet_ntoa(*(struct in_addr *)&tval)); - } else - printf("%s/%u %u\n", tbuf, tent->masklen, tval); + printf("%s/%u %s\n", tbuf, tent->masklen, pval); break; case IPFW_TABLE_INTERFACE: /* Interface names */ - if (co.do_value_as_ip) { - tval = htonl(tval); - printf("%s %s\n", tent->k.iface, - inet_ntoa(*(struct in_addr *)&tval)); - } else - printf("%s %u\n", tent->k.iface, tval); + printf("%s %s\n", tent->k.iface, pval); break; case IPFW_TABLE_NUMBER: /* numbers */ - if (co.do_value_as_ip) { - tval = htonl(tval); - printf("%u %s\n", tent->k.key, - inet_ntoa(*(struct in_addr *)&tval)); - } else - printf("%u %u\n", tent->k.key, tval); + printf("%u %s\n", tent->k.key, pval); break; + case IPFW_TABLE_FLOW: + /* flows */ + tfe = &tent->k.flow; + comma = ""; + + if ((i->tflags & IPFW_TFFLAG_SRCIP) != 0) { + if (tfe->af == AF_INET) + paddr = &tfe->a.a4.sip; + else + paddr = &tfe->a.a6.sip6; + + inet_ntop(tfe->af, paddr, tbuf, sizeof(tbuf)); + printf("%s%s", comma, tbuf); + comma = ","; + } + + if ((i->tflags & IPFW_TFFLAG_PROTO) != 0) { + printf("%s%d", comma, tfe->proto); + comma = ","; + } + + if ((i->tflags & IPFW_TFFLAG_SRCPORT) != 0) { + printf("%s%d", comma, ntohs(tfe->sport)); + comma = ","; + } + if ((i->tflags & IPFW_TFFLAG_DSTIP) != 0) { + if (tfe->af == AF_INET) + paddr = &tfe->a.a4.dip; + else + paddr = &tfe->a.a6.dip6; + + inet_ntop(tfe->af, paddr, tbuf, sizeof(tbuf)); + printf("%s%s", comma, tbuf); + comma = ","; + } + + if ((i->tflags & IPFW_TFFLAG_DSTPORT) != 0) { + printf("%s%d", comma, ntohs(tfe->dport)); + comma = ","; + } + + printf(" %s\n", pval); } } diff --git a/sys/netinet/ip_fw.h b/sys/netinet/ip_fw.h index 057c1c6534c1..a1459aed6ae5 100644 --- a/sys/netinet/ip_fw.h +++ b/sys/netinet/ip_fw.h @@ -262,6 +262,7 @@ enum ipfw_opcodes { /* arguments (4 byte each) */ O_DSCP, /* 2 u32 = DSCP mask */ O_SETDSCP, /* arg1=DSCP value */ + O_IP_FLOW_LOOKUP, /* arg1=table number, u32=value */ O_LAST_OPCODE /* not an opcode! */ }; @@ -675,7 +676,8 @@ struct _ipfw_dyn_rule { #define IPFW_TABLE_CIDR 1 /* Table for holding IPv4/IPv6 prefixes */ #define IPFW_TABLE_INTERFACE 2 /* Table for holding interface names */ #define IPFW_TABLE_NUMBER 3 /* Table for holding ports/uid/gid/etc */ -#define IPFW_TABLE_MAXTYPE 3 /* Maximum valid number */ +#define IPFW_TABLE_FLOW 4 /* Table for holding flow data */ +#define IPFW_TABLE_MAXTYPE 4 /* Maximum valid number */ #define IPFW_VTYPE_U32 1 /* Skipto/tablearg integer */ #define IPFW_VTYPE_IP 2 /* Nexthop IP address */ @@ -743,6 +745,25 @@ typedef struct _ipfw_obj_ntlv { char name[64]; /* Null-terminated name */ } ipfw_obj_ntlv; +/* IPv4/IPv6 L4 flow description */ +struct tflow_entry { + uint8_t af; + uint8_t proto; + uint16_t spare; + uint16_t sport; + uint16_t dport; + union { + struct { + struct in_addr sip; + struct in_addr dip; + } a4; + struct { + struct in6_addr sip6; + struct in6_addr dip6; + } a6; + } a; +}; + /* Table entry TLV */ typedef struct _ipfw_obj_tentry { ipfw_obj_tlv head; /* TLV header */ @@ -753,10 +774,11 @@ typedef struct _ipfw_obj_tentry { uint64_t spare; union { /* Longest field needs to be aligned by 8-byte boundary */ - struct in_addr addr; /* IPv4 address */ - uint32_t key; /* uid/gid/port */ - struct in6_addr addr6; /* IPv6 address */ + struct in_addr addr; /* IPv4 address */ + uint32_t key; /* uid/gid/port */ + struct in6_addr addr6; /* IPv6 address */ char iface[IF_NAMESIZE]; /* interface name */ + struct tflow_entry flow; } k; } ipfw_obj_tentry; #define IPFW_TF_UPDATE 0x01 /* Update record if exists */ @@ -776,19 +798,44 @@ typedef struct _ipfw_obj_ctlv { uint8_t spare; } ipfw_obj_ctlv; +typedef struct _ifpw_ta_tinfo { + uint32_t flags; /* Format flags */ + uint8_t taclass; /* algorithm class */ + uint8_t spare0; + uint16_t spare1; + uint32_t rssize4; /* runtime structure size */ + uint32_t rcount4; /* number of items in runtime */ + uint32_t rsize4; /* item size in runtime */ + uint32_t rssize6; /* runtime structure size */ + uint32_t rcount6; /* number of items in runtime */ + uint32_t rsize6; /* item size in runtime */ +} ifpw_ta_tinfo; +#define IPFW_TACLASS_HASH 1 /* algo is based on hash */ +#define IPFW_TACLASS_ARRAY 2 /* algo is based on array */ +#define IPFW_TACLASS_RADIX 3 /* algo is based on radix tree */ + +#define IPFW_TATFLAGS_DATA 0x0001 /* Has data filled in */ +#define IPFW_TATFLAGS_AF 0x0002 /* Separate data per AF */ + typedef struct _ipfw_xtable_info { uint8_t type; /* table type (cidr,iface,..) */ + uint8_t tflags; /* type flags */ uint8_t ftype; /* table value format type */ uint8_t vtype; /* value type */ - uint16_t spare0; uint32_t set; /* set table is in */ uint32_t kidx; /* kernel index */ uint32_t refcnt; /* number of references */ uint32_t count; /* Number of records */ - uint32_t size; /* Total size of records */ + uint32_t size; /* Total size of records(export)*/ char tablename[64]; /* table name */ - char algoname[32]; /* algorithm name */ + char algoname[64]; /* algorithm name */ + ifpw_ta_tinfo ta_info; /* additional algo stats */ } ipfw_xtable_info; +#define IPFW_TFFLAG_SRCIP 0x01 +#define IPFW_TFFLAG_DSTIP 0x02 +#define IPFW_TFFLAG_SRCPORT 0x04 +#define IPFW_TFFLAG_DSTPORT 0x08 +#define IPFW_TFFLAG_PROTO 0x10 typedef struct _ipfw_iface_info { char ifname[64]; /* interface name */ @@ -801,7 +848,7 @@ typedef struct _ipfw_iface_info { #define IPFW_IFFLAG_RESOLVED 0x01 /* Interface exists */ typedef struct _ipfw_ta_info { - char algoname[32]; /* algorithm name */ + char algoname[64]; /* algorithm name */ uint32_t type; /* lookup type */ uint32_t flags; uint32_t refcnt; diff --git a/sys/netpfil/ipfw/ip_fw2.c b/sys/netpfil/ipfw/ip_fw2.c index 44a82d2234b4..5415da82ce37 100644 --- a/sys/netpfil/ipfw/ip_fw2.c +++ b/sys/netpfil/ipfw/ip_fw2.c @@ -1522,6 +1522,17 @@ do { \ } break; + case O_IP_FLOW_LOOKUP: + { + uint32_t v = 0; + match = ipfw_lookup_table_extended(chain, + cmd->arg1, 0, &args->f_id, &v); + if (cmdlen == F_INSN_SIZE(ipfw_insn_u32)) + match = ((ipfw_insn_u32 *)cmd)->d[0] == v; + if (match) + tablearg = v; + } + break; case O_IP_SRC_MASK: case O_IP_DST_MASK: if (is_ipv4) { diff --git a/sys/netpfil/ipfw/ip_fw_sockopt.c b/sys/netpfil/ipfw/ip_fw_sockopt.c index ef373fc775f4..283f50d1d43e 100644 --- a/sys/netpfil/ipfw/ip_fw_sockopt.c +++ b/sys/netpfil/ipfw/ip_fw_sockopt.c @@ -1011,6 +1011,17 @@ check_ipfw_rule_body(ipfw_insn *cmd, int cmd_len, struct rule_check_info *ci) goto bad_size; ci->table_opcodes++; break; + case O_IP_FLOW_LOOKUP: + if (cmd->arg1 >= V_fw_tables_max) { + printf("ipfw: invalid table number %d\n", + cmd->arg1); + return (EINVAL); + } + if (cmdlen != F_INSN_SIZE(ipfw_insn) && + cmdlen != F_INSN_SIZE(ipfw_insn_u32)) + goto bad_size; + ci->table_opcodes++; + break; case O_MACADDR2: if (cmdlen != F_INSN_SIZE(ipfw_insn_mac)) goto bad_size; @@ -1726,7 +1737,7 @@ ipfw_ctl3(struct sockopt *sopt) size_t bsize_max, size, valsize; struct ip_fw_chain *chain; uint32_t opt; - char xbuf[128]; + char xbuf[256]; struct sockopt_data sdata; ip_fw3_opheader *op3 = NULL; diff --git a/sys/netpfil/ipfw/ip_fw_table.c b/sys/netpfil/ipfw/ip_fw_table.c index c8260b80d826..3809c3e3c92a 100644 --- a/sys/netpfil/ipfw/ip_fw_table.c +++ b/sys/netpfil/ipfw/ip_fw_table.c @@ -77,7 +77,8 @@ struct table_config { struct named_object no; uint8_t vtype; /* format table type */ uint8_t linked; /* 1 if already linked */ - uint16_t spare; + uint8_t tflags; /* type flags */ + uint8_t spare; uint32_t count; /* Number of records */ uint64_t flags; /* state flags */ char tablename[64]; /* table name */ @@ -95,11 +96,12 @@ struct tables_config { static struct table_config *find_table(struct namedobj_instance *ni, struct tid_info *ti); static struct table_config *alloc_table_config(struct ip_fw_chain *ch, - struct tid_info *ti, struct table_algo *ta, char *adata, uint8_t vtype); + struct tid_info *ti, struct table_algo *ta, char *adata, uint8_t tflags, + uint8_t vtype); static void free_table_config(struct namedobj_instance *ni, struct table_config *tc); static int create_table_internal(struct ip_fw_chain *ch, struct tid_info *ti, - char *aname, uint8_t vtype); + char *aname, uint8_t tflags, uint8_t vtype); static void link_table(struct ip_fw_chain *chain, struct table_config *tc); static void unlink_table(struct ip_fw_chain *chain, struct table_config *tc); static void free_table_state(void **state, void **xstate, uint8_t type); @@ -169,7 +171,7 @@ add_table_entry(struct ip_fw_chain *ch, struct tid_info *ti, if ((tei->flags & TEI_FLAGS_COMPAT) == 0) return (ESRCH); - error = create_table_internal(ch, ti, NULL, IPFW_VTYPE_U32); + error = create_table_internal(ch, ti, NULL, 0, IPFW_VTYPE_U32); if (error != 0) return (error); @@ -533,8 +535,7 @@ ipfw_find_table_entry(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct table_algo *ta; struct table_info *kti; struct namedobj_instance *ni; - int error, plen; - void *paddr; + int error; size_t sz; /* Check minimum header size */ @@ -571,41 +572,13 @@ ipfw_find_table_entry(struct ip_fw_chain *ch, ip_fw3_opheader *op3, return (EINVAL); } - /* Check lookup key for validness */ - plen = 0; - paddr = &tent->k; - switch (ti.type) - { - case IPFW_TABLE_CIDR: - if (tent->subtype == AF_INET) - plen = sizeof(struct in_addr); - else if (tent->subtype == AF_INET6) - plen = sizeof(struct in6_addr); - else { - IPFW_UH_RUNLOCK(ch); - return (EINVAL); - } - break; - case IPFW_TABLE_INTERFACE: - /* Check key first */ - plen = sizeof(tent->k.iface); - if (strnlen(tent->k.iface, plen) == plen) { - IPFW_UH_RUNLOCK(ch); - return (EINVAL); - } - case IPFW_TABLE_NUMBER: - plen = sizeof(uint32_t); - break; - - break; - default: - IPFW_UH_RUNLOCK(ch); - return (ENOTSUP); - } kti = KIDX_TO_TI(ch, tc->no.kidx); ta = tc->ta; - error = ta->find_tentry(tc->astate, kti, paddr, plen, tent); + if (ta->find_tentry == NULL) + return (ENOTSUP); + + error = ta->find_tentry(tc->astate, kti, tent); IPFW_UH_RUNLOCK(ch); @@ -651,9 +624,10 @@ flush_table(struct ip_fw_chain *ch, struct tid_info *ti) struct table_algo *ta; struct table_info ti_old, ti_new, *tablestate; void *astate_old, *astate_new; - char algostate[32], *pstate; + char algostate[64], *pstate; int error; uint16_t kidx; + uint8_t tflags; /* * Stage 1: save table algoritm. @@ -674,13 +648,14 @@ flush_table(struct ip_fw_chain *ch, struct tid_info *ti) pstate = algostate; } else pstate = NULL; + tflags = tc->tflags; IPFW_UH_WUNLOCK(ch); /* * Stage 2: allocate new table instance using same algo. */ memset(&ti_new, 0, sizeof(struct table_info)); - if ((error = ta->init(ch, &astate_new, &ti_new, pstate)) != 0) { + if ((error = ta->init(ch, &astate_new, &ti_new, pstate, tflags)) != 0) { IPFW_UH_WLOCK(ch); tc->no.refcnt--; IPFW_UH_WUNLOCK(ch); @@ -1211,7 +1186,7 @@ ipfw_create_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3, } IPFW_UH_RUNLOCK(ch); - return (create_table_internal(ch, &ti, aname, i->vtype)); + return (create_table_internal(ch, &ti, aname, i->tflags, i->vtype)); } /* @@ -1224,7 +1199,7 @@ ipfw_create_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3, */ static int create_table_internal(struct ip_fw_chain *ch, struct tid_info *ti, - char *aname, uint8_t vtype) + char *aname, uint8_t tflags, uint8_t vtype) { struct namedobj_instance *ni; struct table_config *tc; @@ -1237,7 +1212,7 @@ create_table_internal(struct ip_fw_chain *ch, struct tid_info *ti, if (ta == NULL) return (ENOTSUP); - if ((tc = alloc_table_config(ch, ti, ta, aname, vtype)) == NULL) + if ((tc = alloc_table_config(ch, ti, ta, aname, tflags, vtype)) == NULL) return (ENOMEM); IPFW_UH_WLOCK(ch); @@ -1311,6 +1286,7 @@ export_table_info(struct ip_fw_chain *ch, struct table_config *tc, struct table_info *ti; i->type = tc->no.type; + i->tflags = tc->tflags; i->vtype = tc->vtype; i->set = tc->no.set; i->kidx = tc->no.kidx; @@ -1605,6 +1581,10 @@ find_table_algo(struct tables_config *tcfg, struct tid_info *ti, char *name) return (&cidr_radix); case IPFW_TABLE_INTERFACE: return (&iface_idx); + case IPFW_TABLE_NUMBER: + return (&number_array); + case IPFW_TABLE_FLOW: + return (&flow_hash); } return (NULL); @@ -1776,6 +1756,11 @@ classify_table_opcode(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype) *puidx = cmdif->p.glob; skip = 0; break; + case O_IP_FLOW_LOOKUP: + *puidx = cmd->arg1; + *ptype = IPFW_TABLE_FLOW; + skip = 0; + break; } return (skip); @@ -1803,6 +1788,9 @@ update_table_opcode(ipfw_insn *cmd, uint16_t idx) cmdif = (ipfw_insn_if *)cmd; cmdif->p.glob = idx; break; + case O_IP_FLOW_LOOKUP: + cmd->arg1 = idx; + break; } } @@ -1906,7 +1894,7 @@ find_table(struct namedobj_instance *ni, struct tid_info *ti) static struct table_config * alloc_table_config(struct ip_fw_chain *ch, struct tid_info *ti, - struct table_algo *ta, char *aname, uint8_t vtype) + struct table_algo *ta, char *aname, uint8_t tflags, uint8_t vtype) { char *name, bname[16]; struct table_config *tc; @@ -1930,6 +1918,7 @@ alloc_table_config(struct ip_fw_chain *ch, struct tid_info *ti, tc->no.name = tc->tablename; tc->no.type = ti->type; tc->no.set = set; + tc->tflags = tflags; tc->ta = ta; strlcpy(tc->tablename, name, sizeof(tc->tablename)); /* Set default value type to u32 for compability reasons */ @@ -1944,7 +1933,7 @@ alloc_table_config(struct ip_fw_chain *ch, struct tid_info *ti, } /* Preallocate data structures for new tables */ - error = ta->init(ch, &tc->astate, &tc->ti, aname); + error = ta->init(ch, &tc->astate, &tc->ti, aname, tflags); if (error != 0) { free(tc, M_IPFW); return (NULL); @@ -2285,7 +2274,7 @@ ipfw_rewrite_table_uidx(struct ip_fw_chain *chain, error = ENOTSUP; goto free; } - tc = alloc_table_config(chain, &ti, ta, NULL, + tc = alloc_table_config(chain, &ti, ta, NULL, 0, IPFW_VTYPE_U32); if (tc == NULL) { diff --git a/sys/netpfil/ipfw/ip_fw_table.h b/sys/netpfil/ipfw/ip_fw_table.h index 37e724b05aca..e904d6417be7 100644 --- a/sys/netpfil/ipfw/ip_fw_table.h +++ b/sys/netpfil/ipfw/ip_fw_table.h @@ -63,7 +63,7 @@ struct tentry_info { #define TEI_FLAGS_COMPAT 0x04 /* Called from old ABI */ typedef int (ta_init)(struct ip_fw_chain *ch, void **ta_state, - struct table_info *ti, char *data); + struct table_info *ti, char *data, uint8_t tflags); typedef void (ta_destroy)(void *ta_state, struct table_info *ti); typedef int (ta_prepare_add)(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf); @@ -92,8 +92,10 @@ typedef void ta_foreach(void *ta_state, struct table_info *ti, ta_foreach_f *f, void *arg); typedef int ta_dump_tentry(void *ta_state, struct table_info *ti, void *e, ipfw_obj_tentry *tent); -typedef int ta_find_tentry(void *ta_state, struct table_info *ti, void *key, - uint32_t keylen, ipfw_obj_tentry *tent); +typedef int ta_find_tentry(void *ta_state, struct table_info *ti, + ipfw_obj_tentry *tent); +typedef int ta_dump_tinfo(void *ta_state, struct table_info *ti, + ifpw_ta_tinfo *tinfo); struct table_algo { char name[16]; @@ -108,21 +110,23 @@ struct table_algo { ta_add *add; ta_del *del; ta_flush_entry *flush_entry; + ta_find_tentry *find_tentry; ta_prepare_mod *prepare_mod; ta_fill_mod *fill_mod; ta_modify *modify; ta_flush_mod *flush_mod; + ta_change_ti *change_ti; ta_foreach *foreach; ta_dump_tentry *dump_tentry; ta_print_config *print_config; - ta_find_tentry *find_tentry; - ta_change_ti *change_ti; + ta_dump_tinfo *dump_tinfo; }; int ipfw_add_table_algo(struct ip_fw_chain *ch, struct table_algo *ta, size_t size, int *idx); void ipfw_del_table_algo(struct ip_fw_chain *ch, int idx); -extern struct table_algo cidr_radix, iface_idx; + +extern struct table_algo cidr_radix, iface_idx, number_array, flow_hash; void ipfw_table_algo_init(struct ip_fw_chain *chain); void ipfw_table_algo_destroy(struct ip_fw_chain *chain); diff --git a/sys/netpfil/ipfw/ip_fw_table_algo.c b/sys/netpfil/ipfw/ip_fw_table_algo.c index 3235f08240e5..6929b176924d 100644 --- a/sys/netpfil/ipfw/ip_fw_table_algo.c +++ b/sys/netpfil/ipfw/ip_fw_table_algo.c @@ -149,7 +149,7 @@ ta_lookup_radix(struct table_info *ti, void *key, uint32_t keylen, */ static int ta_init_radix(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti, - char *data) + char *data, uint8_t tflags) { if (!rn_inithead(&ti->state, OFF_LEN_INET)) @@ -221,23 +221,23 @@ ta_dump_radix_tentry(void *ta_state, struct table_info *ti, void *e, } static int -ta_find_radix_tentry(void *ta_state, struct table_info *ti, void *key, - uint32_t keylen, ipfw_obj_tentry *tent) +ta_find_radix_tentry(void *ta_state, struct table_info *ti, + ipfw_obj_tentry *tent) { struct radix_node_head *rnh; void *e; e = NULL; - if (keylen == sizeof(in_addr_t)) { + if (tent->subtype == AF_INET) { struct sockaddr_in sa; KEY_LEN(sa) = KEY_LEN_INET; - sa.sin_addr.s_addr = *((in_addr_t *)key); + sa.sin_addr.s_addr = tent->k.addr.s_addr; rnh = (struct radix_node_head *)ti->state; e = rnh->rnh_matchaddr(&sa, rnh); } else { struct sa_in6 sa6; KEY_LEN(sa6) = KEY_LEN_INET6; - memcpy(&sa6.sin6_addr, key, sizeof(struct in6_addr)); + memcpy(&sa6.sin6_addr, &tent->k.addr6, sizeof(struct in6_addr)); rnh = (struct radix_node_head *)ti->xstate; e = rnh->rnh_matchaddr(&sa6, rnh); } @@ -817,6 +817,17 @@ ta_print_chash_config(void *ta_state, struct table_info *ti, char *buf, snprintf(buf, bufsize, "%s", "cidr:hash"); } +static int +log2(uint32_t v) +{ + uint32_t r; + + r = 0; + while (v >>= 1) + r++; + + return (r); +} /* * New table. @@ -825,10 +836,10 @@ ta_print_chash_config(void *ta_state, struct table_info *ti, char *buf, */ static int ta_init_chash(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti, - char *data) + char *data, uint8_t tflags) { int error, i; - int v4, v6; + uint32_t hsize; struct chash_cfg *ccfg; ccfg = malloc(sizeof(struct chash_cfg), M_IPFW, M_WAITOK | M_ZERO); @@ -841,10 +852,8 @@ ta_init_chash(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti, return (error); } - v4 = 6; - v6 = 6; - ccfg->size4 = 1 << v4; - ccfg->size6 = 1 << v6; + ccfg->size4 = 128; + ccfg->size6 = 128; ccfg->head4 = malloc(sizeof(struct chashbhead) * ccfg->size4, M_IPFW, M_WAITOK | M_ZERO); @@ -861,18 +870,19 @@ ta_init_chash(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti, ti->xstate = ccfg->head6; /* Store data depending on v6 mask length */ + hsize = log2(ccfg->size4) << 8 | log2(ccfg->size6); if (ccfg->mask6 == 64) { - ti->data = (32 - ccfg->mask4) << 24 | (128 - ccfg->mask6) << 16 | - v4 << 8 | v6; + ti->data = (32 - ccfg->mask4) << 24 | (128 - ccfg->mask6) << 16| + hsize; ti->lookup = ta_lookup_chash_64; } else if ((ccfg->mask6 % 8) == 0) { ti->data = (32 - ccfg->mask4) << 24 | - ccfg->mask6 << 13 | v4 << 8 | v6; + ccfg->mask6 << 13 | hsize; ti->lookup = ta_lookup_chash_aligned; } else { /* don't do that! */ ti->data = (32 - ccfg->mask4) << 24 | - ccfg->mask6 << 16 | v4 << 8 | v6; + ccfg->mask6 << 16 | hsize; ti->lookup = ta_lookup_chash_slow; } @@ -986,8 +996,8 @@ tei_to_chash_ent(struct tentry_info *tei, struct chashentry *ent) static int -ta_find_chash_tentry(void *ta_state, struct table_info *ti, void *key, - uint32_t keylen, ipfw_obj_tentry *tent) +ta_find_chash_tentry(void *ta_state, struct table_info *ti, + ipfw_obj_tentry *tent) { struct chash_cfg *ccfg; struct chashbhead *head; @@ -1001,8 +1011,8 @@ ta_find_chash_tentry(void *ta_state, struct table_info *ti, void *key, memset(&ent, 0, sizeof(ent)); memset(&tei, 0, sizeof(tei)); - if (keylen == sizeof(in_addr_t)) { - tei.paddr = key; + if (tent->subtype == AF_INET) { + tei.paddr = &tent->k.addr; tei.masklen = ccfg->mask4; tei.subtype = AF_INET; @@ -1020,7 +1030,7 @@ ta_find_chash_tentry(void *ta_state, struct table_info *ti, void *key, return (0); } } else { - tei.paddr = key; + tei.paddr = &tent->k.addr6; tei.masklen = ccfg->mask6; tei.subtype = AF_INET6; @@ -1330,6 +1340,9 @@ ta_modify_chash(void *ta_state, struct table_info *ti, void *ta_buf, ccfg->size6 = mi->size; } + ti->data = (ti->data & 0xFFFFFFFF00000000) | log2(ccfg->size4) << 8 | + log2(ccfg->size6); + mi->main_ptr = old_head; return (0); @@ -1539,7 +1552,7 @@ ta_lookup_ifidx(struct table_info *ti, void *key, uint32_t keylen, static int ta_init_ifidx(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti, - char *data) + char *data, uint8_t tflags) { struct iftable_cfg *icfg; @@ -1949,15 +1962,15 @@ ta_dump_ifidx_tentry(void *ta_state, struct table_info *ti, void *e, } static int -ta_find_ifidx_tentry(void *ta_state, struct table_info *ti, void *key, - uint32_t keylen, ipfw_obj_tentry *tent) +ta_find_ifidx_tentry(void *ta_state, struct table_info *ti, + ipfw_obj_tentry *tent) { struct iftable_cfg *icfg; struct ifentry *ife; char *ifname; icfg = (struct iftable_cfg *)ta_state; - ifname = (char *)key; + ifname = tent->k.iface; if (strnlen(ifname, IF_NAMESIZE) == IF_NAMESIZE) return (EINVAL); @@ -2098,7 +2111,7 @@ ta_lookup_numarray(struct table_info *ti, void *key, uint32_t keylen, static int ta_init_numarray(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti, - char *data) + char *data, uint8_t tflags) { struct numarray_cfg *cfg; @@ -2335,15 +2348,15 @@ ta_dump_numarray_tentry(void *ta_state, struct table_info *ti, void *e, } static int -ta_find_numarray_tentry(void *ta_state, struct table_info *ti, void *key, - uint32_t keylen, ipfw_obj_tentry *tent) +ta_find_numarray_tentry(void *ta_state, struct table_info *ti, + ipfw_obj_tentry *tent) { struct numarray_cfg *cfg; struct numarray *ri; cfg = (struct numarray_cfg *)ta_state; - ri = numarray_find(ti, key); + ri = numarray_find(ti, &tent->k.key); if (ri != NULL) { ta_dump_numarray_tentry(ta_state, ti, ri, tent); @@ -2387,6 +2400,672 @@ struct table_algo number_array = { .flush_mod = ta_flush_mod_numarray, }; +/* + * flow:hash cmds + * + * + * ti->data: + * [inv.mask4][inv.mask6][log2hsize4][log2hsize6] + * [ 8][ 8[ 8][ 8] + * + * inv.mask4: 32 - mask + * inv.mask6: + * 1) _slow lookup: mask + * 2) _aligned: (128 - mask) / 8 + * 3) _64: 8 + * + * + * pflags: + * [v4=1/v6=0][hsize] + * [ 32][ 32] + */ + +struct fhashentry; + +SLIST_HEAD(fhashbhead, fhashentry); + +/* +struct tflow_entry { + uint8_t af; + uint8_t proto; + uint16_t spare; + uint16_t dport; + uint16_t sport; + union { + struct { + struct in_addr sip; + struct in_addr dip; + } v4; + struct { + struct in6_addr sip6; + struct in6_addr dip6; + } v6; + } a; +}; +*/ + +struct fhashentry { + SLIST_ENTRY(fhashentry) next; + uint8_t af; + uint8_t proto; + uint16_t spare0; + uint16_t dport; + uint16_t sport; + uint32_t value; + uint32_t spare1; +}; + +struct fhashentry4 { + struct fhashentry e; + struct in_addr dip; + struct in_addr sip; +}; + +struct fhashentry6 { + struct fhashentry e; + struct in6_addr dip6; + struct in6_addr sip6; +}; + +struct fhash_cfg { + struct fhashbhead *head; + size_t size; + size_t items; + struct fhashentry4 fe4; + struct fhashentry6 fe6; +}; + +static __inline int +cmp_flow_ent(struct fhashentry *a, struct fhashentry *b, size_t sz) +{ + uint64_t *ka, *kb; + + ka = (uint64_t *)(&a->next + 1); + kb = (uint64_t *)(&b->next + 1); + + if (*ka == *kb && (memcmp(a + 1, b + 1, sz) == 0)) + return (1); + + return (0); +} + +static __inline uint32_t +hash_flow4(struct fhashentry4 *f, int hsize) +{ + uint32_t i; + + i = (f->dip.s_addr) ^ (f->sip.s_addr) ^ (f->e.dport) ^ (f->e.sport); + + return (i % (hsize - 1)); +} + +static __inline uint32_t +hash_flow6(struct fhashentry6 *f, int hsize) +{ + uint32_t i; + + i = (f->dip6.__u6_addr.__u6_addr32[2]) ^ + (f->dip6.__u6_addr.__u6_addr32[3]) ^ + (f->sip6.__u6_addr.__u6_addr32[2]) ^ + (f->sip6.__u6_addr.__u6_addr32[3]) ^ + (f->e.dport) ^ (f->e.sport); + + return (i % (hsize - 1)); +} + +static uint32_t +hash_flow_ent(struct fhashentry *ent, uint32_t size) +{ + uint32_t hash; + + if (ent->af == AF_INET) { + hash = hash_flow4((struct fhashentry4 *)ent, size); + } else { + hash = hash_flow6((struct fhashentry6 *)ent, size); + } + + return (hash); +} + +static int +ta_lookup_fhash(struct table_info *ti, void *key, uint32_t keylen, + uint32_t *val) +{ + struct fhashbhead *head; + struct fhashentry *ent; + struct fhashentry4 *m4; + struct ipfw_flow_id *id; + uint16_t hash, hsize; + + id = (struct ipfw_flow_id *)key; + head = (struct fhashbhead *)ti->state; + hsize = ti->data; + m4 = (struct fhashentry4 *)ti->xstate; + + if (id->addr_type == 4) { + struct fhashentry4 f; + + /* Copy hash mask */ + f = *m4; + + f.dip.s_addr &= id->dst_ip; + f.sip.s_addr &= id->src_ip; + f.e.dport &= id->dst_port; + f.e.sport &= id->src_port; + f.e.proto &= id->proto; + hash = hash_flow4(&f, hsize); + SLIST_FOREACH(ent, &head[hash], next) { + if (cmp_flow_ent(ent, &f.e, 2 * 4) != 0) { + *val = ent->value; + return (1); + } + } + } else if (id->addr_type == 6) { + struct fhashentry6 f; + uint64_t *fp, *idp; + + /* Copy hash mask */ + f = *((struct fhashentry6 *)(m4 + 1)); + + /* Handle lack of __u6_addr.__u6_addr64 */ + fp = (uint64_t *)&f.dip6; + idp = (uint64_t *)&id->dst_ip6; + /* src IPv6 is stored after dst IPv6 */ + *fp++ &= *idp++; + *fp++ &= *idp++; + *fp++ &= *idp++; + *fp &= *idp; + f.e.dport &= id->dst_port; + f.e.sport &= id->src_port; + f.e.proto &= id->proto; + hash = hash_flow6(&f, hsize); + SLIST_FOREACH(ent, &head[hash], next) { + if (cmp_flow_ent(ent, &f.e, 2 * 16) != 0) { + *val = ent->value; + return (1); + } + } + } + + return (0); +} + +/* + * New table. + */ +static int +ta_init_fhash(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti, + char *data, uint8_t tflags) +{ + int i; + struct fhash_cfg *cfg; + struct fhashentry4 *fe4; + struct fhashentry6 *fe6; + + cfg = malloc(sizeof(struct fhash_cfg), M_IPFW, M_WAITOK | M_ZERO); + + cfg->size = 512; + + cfg->head = malloc(sizeof(struct fhashbhead) * cfg->size, M_IPFW, + M_WAITOK | M_ZERO); + for (i = 0; i < cfg->size; i++) + SLIST_INIT(&cfg->head[i]); + + /* Fill in fe masks based on @tflags */ + fe4 = &cfg->fe4; + fe6 = &cfg->fe6; + if (tflags & IPFW_TFFLAG_SRCIP) { + memset(&fe4->sip, 0xFF, sizeof(fe4->sip)); + memset(&fe6->sip6, 0xFF, sizeof(fe6->sip6)); + } + if (tflags & IPFW_TFFLAG_DSTIP) { + memset(&fe4->dip, 0xFF, sizeof(fe4->dip)); + memset(&fe6->dip6, 0xFF, sizeof(fe6->dip6)); + } + if (tflags & IPFW_TFFLAG_SRCPORT) { + memset(&fe4->e.sport, 0xFF, sizeof(fe4->e.sport)); + memset(&fe6->e.sport, 0xFF, sizeof(fe6->e.sport)); + } + if (tflags & IPFW_TFFLAG_DSTPORT) { + memset(&fe4->e.dport, 0xFF, sizeof(fe4->e.dport)); + memset(&fe6->e.dport, 0xFF, sizeof(fe6->e.dport)); + } + if (tflags & IPFW_TFFLAG_PROTO) { + memset(&fe4->e.proto, 0xFF, sizeof(fe4->e.proto)); + memset(&fe6->e.proto, 0xFF, sizeof(fe6->e.proto)); + } + + fe4->e.af = AF_INET; + fe6->e.af = AF_INET6; + + *ta_state = cfg; + ti->state = cfg->head; + ti->xstate = &cfg->fe4; + ti->data = cfg->size; + ti->lookup = ta_lookup_fhash; + + return (0); +} + +static void +ta_destroy_fhash(void *ta_state, struct table_info *ti) +{ + struct fhash_cfg *cfg; + struct fhashentry *ent, *ent_next; + int i; + + cfg = (struct fhash_cfg *)ta_state; + + for (i = 0; i < cfg->size; i++) + SLIST_FOREACH_SAFE(ent, &cfg->head[i], next, ent_next) + free(ent, M_IPFW_TBL); + + free(cfg->head, M_IPFW); + free(cfg, M_IPFW); +} + +static int +ta_dump_fhash_tentry(void *ta_state, struct table_info *ti, void *e, + ipfw_obj_tentry *tent) +{ + struct fhash_cfg *cfg; + struct fhashentry *ent; + struct fhashentry4 *fe4; + struct fhashentry6 *fe6; + struct tflow_entry *tfe; + + cfg = (struct fhash_cfg *)ta_state; + ent = (struct fhashentry *)e; + tfe = &tent->k.flow; + + tfe->af = ent->af; + tfe->proto = ent->proto; + tfe->dport = htons(ent->dport); + tfe->sport = htons(ent->sport); + tent->value = ent->value; + tent->subtype = ent->af; + + if (ent->af == AF_INET) { + fe4 = (struct fhashentry4 *)ent; + tfe->a.a4.sip.s_addr = htonl(fe4->sip.s_addr); + tfe->a.a4.dip.s_addr = htonl(fe4->dip.s_addr); + tent->masklen = 32; +#ifdef INET6 + } else { + fe6 = (struct fhashentry6 *)ent; + tfe->a.a6.sip6 = fe6->sip6; + tfe->a.a6.dip6 = fe6->dip6; + tent->masklen = 128; +#endif + } + + return (0); +} + +static int +tei_to_fhash_ent(struct tentry_info *tei, struct fhashentry *ent) +{ + struct fhashentry4 *fe4; + struct fhashentry6 *fe6; + struct tflow_entry *tfe; + + tfe = (struct tflow_entry *)tei->paddr; + + ent->af = tei->subtype; + ent->proto = tfe->proto; + ent->value = tei->value; + ent->dport = ntohs(tfe->dport); + ent->sport = ntohs(tfe->sport); + + if (tei->subtype == AF_INET) { +#ifdef INET + fe4 = (struct fhashentry4 *)ent; + fe4->sip.s_addr = ntohl(tfe->a.a4.sip.s_addr); + fe4->dip.s_addr = ntohl(tfe->a.a4.dip.s_addr); +#endif +#ifdef INET6 + } else if (tei->subtype == AF_INET6) { + fe6 = (struct fhashentry6 *)ent; + fe6->sip6 = tfe->a.a6.sip6; + fe6->dip6 = tfe->a.a6.dip6; +#endif + } else { + /* Unknown CIDR type */ + return (EINVAL); + } + + return (0); +} + + +static int +ta_find_fhash_tentry(void *ta_state, struct table_info *ti, + ipfw_obj_tentry *tent) +{ + struct fhash_cfg *cfg; + struct fhashbhead *head; + struct fhashentry *ent, *tmp; + struct fhashentry6 fe6; + struct tentry_info tei; + int error; + uint32_t hash; + size_t sz; + + cfg = (struct fhash_cfg *)ta_state; + + ent = &fe6.e; + + memset(&fe6, 0, sizeof(fe6)); + memset(&tei, 0, sizeof(tei)); + + tei.paddr = &tent->k.flow; + tei.subtype = tent->subtype; + + if ((error = tei_to_fhash_ent(&tei, ent)) != 0) + return (error); + + head = cfg->head; + hash = hash_flow_ent(ent, cfg->size); + + if (tei.subtype == AF_INET) + sz = 2 * sizeof(struct in_addr); + else + sz = 2 * sizeof(struct in6_addr); + + /* Check for existence */ + SLIST_FOREACH(tmp, &head[hash], next) { + if (cmp_flow_ent(tmp, ent, sz) != 0) { + ta_dump_fhash_tentry(ta_state, ti, tmp, tent); + return (0); + } + } + + return (ENOENT); +} + +static void +ta_foreach_fhash(void *ta_state, struct table_info *ti, ta_foreach_f *f, + void *arg) +{ + struct fhash_cfg *cfg; + struct fhashentry *ent, *ent_next; + int i; + + cfg = (struct fhash_cfg *)ta_state; + + for (i = 0; i < cfg->size; i++) + SLIST_FOREACH_SAFE(ent, &cfg->head[i], next, ent_next) + f(ent, arg); +} + + +struct ta_buf_fhash +{ + void *ent_ptr; + struct fhashentry6 fe6; +}; + +static int +ta_prepare_add_fhash(struct ip_fw_chain *ch, struct tentry_info *tei, + void *ta_buf) +{ + struct ta_buf_fhash *tb; + struct fhashentry *ent; + size_t sz; + int error; + + tb = (struct ta_buf_fhash *)ta_buf; + memset(tb, 0, sizeof(struct ta_buf_fhash)); + + if (tei->subtype == AF_INET) + sz = sizeof(struct fhashentry4); + else if (tei->subtype == AF_INET6) + sz = sizeof(struct fhashentry6); + else + return (EINVAL); + + ent = malloc(sz, M_IPFW_TBL, M_WAITOK | M_ZERO); + + error = tei_to_fhash_ent(tei, ent); + if (error != 0) { + free(ent, M_IPFW_TBL); + return (error); + } + tb->ent_ptr = ent; + + return (0); +} + +static int +ta_add_fhash(void *ta_state, struct table_info *ti, struct tentry_info *tei, + void *ta_buf, uint64_t *pflags, uint32_t *pnum) +{ + struct fhash_cfg *cfg; + struct fhashbhead *head; + struct fhashentry *ent, *tmp; + struct ta_buf_fhash *tb; + int exists; + uint32_t hash; + size_t sz; + + cfg = (struct fhash_cfg *)ta_state; + tb = (struct ta_buf_fhash *)ta_buf; + ent = (struct fhashentry *)tb->ent_ptr; + exists = 0; + + head = cfg->head; + hash = hash_flow_ent(ent, cfg->size); + + if (tei->subtype == AF_INET) + sz = 2 * sizeof(struct in_addr); + else + sz = 2 * sizeof(struct in6_addr); + + /* Check for existence */ + SLIST_FOREACH(tmp, &head[hash], next) { + if (cmp_flow_ent(tmp, ent, sz) != 0) { + exists = 1; + break; + } + } + + if (exists == 1) { + if ((tei->flags & TEI_FLAGS_UPDATE) == 0) + return (EEXIST); + /* Record already exists. Update value if we're asked to */ + tmp->value = tei->value; + /* Indicate that update has happened instead of addition */ + tei->flags |= TEI_FLAGS_UPDATED; + *pnum = 0; + } else { + SLIST_INSERT_HEAD(&head[hash], ent, next); + tb->ent_ptr = NULL; + *pnum = 1; + + /* Update counters and check if we need to grow hash */ + cfg->items++; + if (cfg->items > cfg->size && cfg->size < 65536) + *pflags = cfg->size * 2; + } + + return (0); +} + +static int +ta_prepare_del_fhash(struct ip_fw_chain *ch, struct tentry_info *tei, + void *ta_buf) +{ + struct ta_buf_fhash *tb; + + tb = (struct ta_buf_fhash *)ta_buf; + memset(tb, 0, sizeof(struct ta_buf_fhash)); + + return (tei_to_fhash_ent(tei, &tb->fe6.e)); +} + +static int +ta_del_fhash(void *ta_state, struct table_info *ti, struct tentry_info *tei, + void *ta_buf, uint64_t *pflags, uint32_t *pnum) +{ + struct fhash_cfg *cfg; + struct fhashbhead *head; + struct fhashentry *ent, *tmp; + struct ta_buf_fhash *tb; + uint32_t hash; + size_t sz; + + cfg = (struct fhash_cfg *)ta_state; + tb = (struct ta_buf_fhash *)ta_buf; + ent = &tb->fe6.e; + + head = cfg->head; + hash = hash_flow_ent(ent, cfg->size); + + if (tei->subtype == AF_INET) + sz = 2 * sizeof(struct in_addr); + else + sz = 2 * sizeof(struct in6_addr); + + /* Check for existence */ + SLIST_FOREACH(tmp, &head[hash], next) { + if (cmp_flow_ent(tmp, ent, sz) != 0) { + SLIST_REMOVE(&head[hash], tmp, fhashentry, next); + *pnum = 1; + cfg->items--; + return (0); + } + } + + return (ENOENT); +} + +static void +ta_flush_fhash_entry(struct ip_fw_chain *ch, struct tentry_info *tei, + void *ta_buf) +{ + struct ta_buf_fhash *tb; + + tb = (struct ta_buf_fhash *)ta_buf; + + if (tb->ent_ptr != NULL) + free(tb->ent_ptr, M_IPFW_TBL); +} + +/* + * Hash growing callbacks. + */ + +/* + * Allocate new, larger fhash. + */ +static int +ta_prepare_mod_fhash(void *ta_buf, uint64_t *pflags) +{ + struct mod_item *mi; + struct fhashbhead *head; + int i; + + mi = (struct mod_item *)ta_buf; + + memset(mi, 0, sizeof(struct mod_item)); + mi->size = *pflags; + head = malloc(sizeof(struct fhashbhead) * mi->size, M_IPFW, + M_WAITOK | M_ZERO); + for (i = 0; i < mi->size; i++) + SLIST_INIT(&head[i]); + + mi->main_ptr = head; + + return (0); +} + +/* + * Copy data from old runtime array to new one. + */ +static int +ta_fill_mod_fhash(void *ta_state, struct table_info *ti, void *ta_buf, + uint64_t *pflags) +{ + + /* In is not possible to do rehash if we're not holidng WLOCK. */ + return (0); +} + + +/* + * Switch old & new arrays. + */ +static int +ta_modify_fhash(void *ta_state, struct table_info *ti, void *ta_buf, + uint64_t pflags) +{ + struct mod_item *mi; + struct fhash_cfg *cfg; + struct fhashbhead *old_head, *new_head; + struct fhashentry *ent, *ent_next; + int i; + uint32_t nhash; + size_t old_size; + + mi = (struct mod_item *)ta_buf; + cfg = (struct fhash_cfg *)ta_state; + + /* Check which hash we need to grow and do we still need that */ + old_size = cfg->size; + old_head = ti->state; + + if (old_size >= mi->size) + return (0); + + new_head = (struct fhashbhead *)mi->main_ptr; + for (i = 0; i < old_size; i++) { + SLIST_FOREACH_SAFE(ent, &old_head[i], next, ent_next) { + nhash = hash_flow_ent(ent, mi->size); + SLIST_INSERT_HEAD(&new_head[nhash], ent, next); + } + } + + ti->state = new_head; + ti->data = mi->size; + cfg->head = new_head; + cfg->size = mi->size; + + mi->main_ptr = old_head; + + return (0); +} + +/* + * Free unneded array. + */ +static void +ta_flush_mod_fhash(void *ta_buf) +{ + struct mod_item *mi; + + mi = (struct mod_item *)ta_buf; + if (mi->main_ptr != NULL) + free(mi->main_ptr, M_IPFW); +} + +struct table_algo flow_hash = { + .name = "flow:hash", + .type = IPFW_TABLE_FLOW, + .init = ta_init_fhash, + .destroy = ta_destroy_fhash, + .prepare_add = ta_prepare_add_fhash, + .prepare_del = ta_prepare_del_fhash, + .add = ta_add_fhash, + .del = ta_del_fhash, + .flush_entry = ta_flush_fhash_entry, + .foreach = ta_foreach_fhash, + .dump_tentry = ta_dump_fhash_tentry, + .find_tentry = ta_find_fhash_tentry, + .prepare_mod = ta_prepare_mod_fhash, + .fill_mod = ta_fill_mod_fhash, + .modify = ta_modify_fhash, + .flush_mod = ta_flush_mod_fhash, +}; void ipfw_table_algo_init(struct ip_fw_chain *ch) { @@ -2400,6 +3079,7 @@ ipfw_table_algo_init(struct ip_fw_chain *ch) ipfw_add_table_algo(ch, &cidr_hash, sz, &cidr_hash.idx); ipfw_add_table_algo(ch, &iface_idx, sz, &iface_idx.idx); ipfw_add_table_algo(ch, &number_array, sz, &number_array.idx); + ipfw_add_table_algo(ch, &flow_hash, sz, &flow_hash.idx); } void @@ -2410,6 +3090,7 @@ ipfw_table_algo_destroy(struct ip_fw_chain *ch) ipfw_del_table_algo(ch, cidr_hash.idx); ipfw_del_table_algo(ch, iface_idx.idx); ipfw_del_table_algo(ch, number_array.idx); + ipfw_del_table_algo(ch, flow_hash.idx); }