diff --git a/lib/libalias/alias_db.c b/lib/libalias/alias_db.c index 52384b3f6a36..f5b04056eaba 100644 --- a/lib/libalias/alias_db.c +++ b/lib/libalias/alias_db.c @@ -2641,6 +2641,82 @@ PacketAliasCheckNewLink(void) #include #include +#define NEW_IPFW 1 /* use new ipfw code */ +#ifdef NEW_IPFW +/* + * A function to fill simple commands of size 1. + * Existing flags are preserved. + */ +static void +fill_cmd(ipfw_insn *cmd, enum ipfw_opcodes opcode, int flags, u_int16_t arg) +{ + cmd->opcode = opcode; + cmd->len = ((cmd->len | flags) & (F_NOT | F_OR)) | 1; + cmd->arg1 = arg; +} + +/* + * helper function, updates the pointer to cmd with the length + * of the current command, and also cleans up the first word of + * the new command in case it has been clobbered before. + */ +static ipfw_insn * +next_cmd(ipfw_insn *cmd) +{ + cmd += F_LEN(cmd); + bzero(cmd, sizeof(*cmd)); + return cmd; +} + +static void +fill_ip(ipfw_insn_ip *cmd, enum ipfw_opcodes opcode, u_int32_t addr) +{ + cmd->o.opcode = opcode; + cmd->o.len = F_INSN_SIZE(ipfw_insn_u32); + cmd->addr.s_addr = addr; +} + +static void +fill_one_port(ipfw_insn_u16 *cmd, enum ipfw_opcodes opcode, u_int16_t port) +{ + cmd->o.opcode = opcode; + cmd->o.len = F_INSN_SIZE(ipfw_insn_u16); + cmd->ports[0] = cmd->ports[1] = port; +} + +static int +fill_rule(void *buf, int bufsize, int rulenum, + enum ipfw_opcodes action, int proto, + struct in_addr sa, u_int16_t sp, struct in_addr da, u_int32_t dp) +{ + struct ip_fw *rule = (struct ip_fw *)buf; + ipfw_insn *cmd = (ipfw_insn *)rule->cmd; + + bzero(buf, bufsize); + rule->rulenum = rulenum; + + fill_cmd(cmd, O_PROTO, 0, proto); + cmd = next_cmd(cmd); + + fill_ip((ipfw_insn_ip *)cmd, O_IP_SRC, sa.s_addr); + cmd = next_cmd(cmd); + + fill_one_port((ipfw_insn_u16 *)cmd, O_IP_SRCPORT, sp); + cmd = next_cmd(cmd); + + fill_ip((ipfw_insn_ip *)cmd, O_IP_DST, da.s_addr); + cmd = next_cmd(cmd); + + fill_one_port((ipfw_insn_u16 *)cmd, O_IP_DSTPORT, dp); + cmd = next_cmd(cmd); + + fill_cmd(cmd, O_ACCEPT, 0, 0); + cmd = next_cmd(cmd); + + return ((void *)cmd - buf); +} +#endif /* NEW_IPFW */ + static void ClearAllFWHoles(void); static int fireWallBaseNum; /* The first firewall entry free for our use */ @@ -2724,6 +2800,35 @@ PunchFWHole(struct alias_link *link) { /* Start next search at next position */ fireWallActiveNum = fwhole+1; +#ifdef NEW_IPFW + if (GetOriginalPort(link) != 0 && GetDestPort(link) != 0) { + /* + * generate two rules of the form + * + * add fwhole accept tcp from OAddr OPort to DAddr DPort + * add fwhole accept tcp from DAddr DPort to OAddr OPort + */ + u_int32_t rulebuf[255]; + int i; + + i = fill_rule(rulebuf, sizeof(rulebuf), fwhole, + O_ACCEPT, IPPROTO_TCP, + GetOriginalAddress(link), ntohs(GetOriginalPort(link)), + GetDestAddress(link), ntohs(GetDestPort(link)) ); + r = setsockopt(fireWallFD, IPPROTO_IP, IP_FW_ADD, rulebuf, i); + if (r) + err(1, "alias punch inbound(1) setsockopt(IP_FW_ADD)"); + + i = fill_rule(rulebuf, sizeof(rulebuf), fwhole, + O_ACCEPT, IPPROTO_TCP, + GetDestAddress(link), ntohs(GetDestPort(link)), + GetOriginalAddress(link), ntohs(GetOriginalPort(link)) ); + r = setsockopt(fireWallFD, IPPROTO_IP, IP_FW_ADD, rulebuf, i); + if (r) + err(1, "alias punch inbound(2) setsockopt(IP_FW_ADD)"); + } +#else /* !NEW_IPFW old code to generate ipfw rule */ + /* Build generic part of the two rules */ rule.fw_number = fwhole; IP_FW_SETNSRCP(&rule, 1); /* Number of source ports. */ @@ -2759,6 +2864,7 @@ PunchFWHole(struct alias_link *link) { err(1, "alias punch inbound(2) setsockopt(IP_FW_ADD)"); #endif } +#endif /* !NEW_IPFW */ /* Indicate hole applied */ link->data.tcp->fwhole = fwhole; fw_setfield(fireWallField, fwhole); @@ -2770,6 +2876,10 @@ static void ClearFWHole(struct alias_link *link) { if (link->link_type == LINK_TCP) { int fwhole = link->data.tcp->fwhole; /* Where is the firewall hole? */ +#ifdef NEW_IPFW + while (!setsockopt(fireWallFD, IPPROTO_IP, IP_FW_DEL, &fwhole, sizeof fwhole)) + ; +#else /* !NEW_IPFW */ struct ip_fw rule; if (fwhole < 0) @@ -2779,7 +2889,9 @@ ClearFWHole(struct alias_link *link) { rule.fw_number = fwhole; while (!setsockopt(fireWallFD, IPPROTO_IP, IP_FW_DEL, &rule, sizeof rule)) ; +#endif /* !NEW_IPFW */ fw_clrfield(fireWallField, fwhole); + link->data.tcp->fwhole = -1; } } @@ -2795,9 +2907,15 @@ ClearAllFWHoles(void) { memset(&rule, 0, sizeof rule); for (i = fireWallBaseNum; i < fireWallBaseNum + fireWallNumNums; i++) { +#ifdef NEW_IPFW + int r = i; + while (!setsockopt(fireWallFD, IPPROTO_IP, IP_FW_DEL, &r, sizeof r)) + ; +#else /* !NEW_IPFW */ rule.fw_number = i; while (!setsockopt(fireWallFD, IPPROTO_IP, IP_FW_DEL, &rule, sizeof rule)) ; +#endif /* NEW_IPFW */ } memset(fireWallField, 0, fireWallNumNums); } diff --git a/sbin/ipfw/Makefile b/sbin/ipfw/Makefile index 241d304673af..e49fe99a4adb 100644 --- a/sbin/ipfw/Makefile +++ b/sbin/ipfw/Makefile @@ -1,7 +1,8 @@ # $FreeBSD$ PROG= ipfw -WARNS= 0 +SRCS= ipfw2.c +#WARNS= 0 MAN= ipfw.8 .include diff --git a/sbin/ipfw/ipfw2.c b/sbin/ipfw/ipfw2.c new file mode 100644 index 000000000000..5cf462ea00a7 --- /dev/null +++ b/sbin/ipfw/ipfw2.c @@ -0,0 +1,3178 @@ +/* + * Copyright (c) 2002 Luigi Rizzo + * Copyright (c) 1996 Alex Nash, Paul Traina, Poul-Henning Kamp + * Copyright (c) 1994 Ugen J.S.Antsilevich + * + * Idea and grammar partially left from: + * Copyright (c) 1993 Daniel Boulet + * + * Redistribution and use in source forms, with and without modification, + * are permitted provided that this entire comment appears intact. + * + * Redistribution in binary form may occur without any restrictions. + * Obviously, it would be nice if you gave credit where credit is due + * but requiring it would be too onerous. + * + * This software is provided ``AS IS'' without any warranties of any kind. + * + * NEW command line interface for IP firewall facility + * + * $FreeBSD$ + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include /* def. of struct route */ +#include +#include +#include + +int s, /* main RAW socket */ + do_resolv, /* Would try to resolve all */ + do_acct, /* Show packet/byte count */ + do_time, /* Show time stamps */ + do_quiet, /* Be quiet in add and flush */ + do_force, /* Don't ask for confirmation */ + do_pipe, /* this cmd refers to a pipe */ + do_sort, /* field to sort results (0 = no) */ + do_dynamic, /* display dynamic rules */ + do_expired, /* display expired dynamic rules */ + verbose; + +#define IP_MASK_ALL 0xffffffff + +/* + * structure to hold flag names and associated values to be + * set in the appropriate masks. + * A NULL string terminates the array. + * Often, an element with 0 value contains an error string. + * + */ +struct _s_x { + char *s; + int x; +}; + +static struct _s_x f_tcpflags[] = { + { "syn", TH_SYN }, + { "fin", TH_FIN }, + { "ack", TH_ACK }, + { "psh", TH_PUSH }, + { "rst", TH_RST }, + { "urg", TH_URG }, + { "tcp flag", 0 }, + { NULL, 0 } +}; + +static struct _s_x f_tcpopts[] = { + { "mss", IP_FW_TCPOPT_MSS }, + { "maxseg", IP_FW_TCPOPT_MSS }, + { "window", IP_FW_TCPOPT_WINDOW }, + { "sack", IP_FW_TCPOPT_SACK }, + { "ts", IP_FW_TCPOPT_TS }, + { "timestamp", IP_FW_TCPOPT_TS }, + { "cc", IP_FW_TCPOPT_CC }, + { "tcp option", 0 }, + { NULL, 0 } +}; + +/* + * IP options span the range 0 to 255 so we need to remap them + * (though in fact only the low 5 bits are significant). + */ +static struct _s_x f_ipopts[] = { + { "ssrr", IP_FW_IPOPT_SSRR}, + { "lsrr", IP_FW_IPOPT_LSRR}, + { "rr", IP_FW_IPOPT_RR}, + { "ts", IP_FW_IPOPT_TS}, + { "ip option", 0 }, + { NULL, 0 } +}; + +#if 0 /* XXX not used yet */ +static struct _s_x f_iptos[] = { + { "lowdelay", IPTOS_LOWDELAY}, + { "throughput", IPTOS_THROUGHPUT}, + { "reliability", IPTOS_RELIABILITY}, + { "mincost", IPTOS_MINCOST}, + { "congestion", IPTOS_CE}, + { "ecntransport", IPTOS_ECT}, + { "ip tos option", 0}, + { NULL, 0 } +}; +#endif + +static struct _s_x limit_masks[] = { + {"all", DYN_SRC_ADDR|DYN_SRC_PORT|DYN_DST_ADDR|DYN_DST_PORT}, + {"src-addr", DYN_SRC_ADDR}, + {"src-port", DYN_SRC_PORT}, + {"dst-addr", DYN_DST_ADDR}, + {"dst-port", DYN_DST_PORT}, + {NULL, 0} +}; + +/* + * we use IPPROTO_ETHERTYPE as a fake protocol id to call the print routines + * This is only used in this code. + */ +#define IPPROTO_ETHERTYPE 0x1000 +static struct _s_x ether_types[] = { + /* + * Note, we cannot use "-:&/" in the names because they are field + * separators in the type specifications. Also, we use s = NULL as + * end-delimiter, because a type of 0 can be legal. + */ + { "ip", 0x0800 }, + { "ipv4", 0x0800 }, + { "ipv6", 0x86dd }, + { "arp", 0x0806 }, + { "rarp", 0x8035 }, + { "vlan", 0x8100 }, + { "loop", 0x9000 }, + { "trail", 0x1000 }, + { "at", 0x809b }, + { "atalk", 0x809b }, + { "aarp", 0x80f3 }, + { "pppoe_disc", 0x8863 }, + { "pppoe_sess", 0x8864 }, + { "ipx_8022", 0x00E0 }, + { "ipx_8023", 0x0000 }, + { "ipx_ii", 0x8137 }, + { "ipx_snap", 0x8137 }, + { "ipx", 0x8137 }, + { "ns", 0x0600 }, + { NULL, 0 } +}; + +static void show_usage(void); + +enum tokens { + TOK_NULL=0, + + TOK_OR, + TOK_NOT, + + TOK_ACCEPT, + TOK_COUNT, + TOK_PIPE, + TOK_QUEUE, + TOK_DIVERT, + TOK_TEE, + TOK_FORWARD, + TOK_SKIPTO, + TOK_DENY, + TOK_REJECT, + TOK_RESET, + TOK_UNREACH, + TOK_CHECKSTATE, + + TOK_UID, + TOK_GID, + TOK_IN, + TOK_LIMIT, + TOK_KEEPSTATE, + TOK_LAYER2, + TOK_OUT, + TOK_XMIT, + TOK_RECV, + TOK_VIA, + TOK_FRAG, + TOK_IPOPTS, + TOK_IPLEN, + TOK_IPID, + TOK_IPPRECEDENCE, + TOK_IPTOS, + TOK_IPTTL, + TOK_IPVER, + TOK_ESTAB, + TOK_SETUP, + TOK_TCPFLAGS, + TOK_TCPOPTS, + TOK_TCPSEQ, + TOK_TCPACK, + TOK_TCPWIN, + TOK_ICMPTYPES, + + TOK_PLR, + TOK_BUCKETS, + TOK_DSTIP, + TOK_SRCIP, + TOK_DSTPORT, + TOK_SRCPORT, + TOK_ALL, + TOK_MASK, + TOK_BW, + TOK_DELAY, + TOK_RED, + TOK_GRED, + TOK_DROPTAIL, + TOK_PROTO, + TOK_WEIGHT, +}; + +struct _s_x dummynet_params[] = { + { "plr", TOK_PLR }, + { "buckets", TOK_BUCKETS }, + { "dst-ip", TOK_DSTIP }, + { "src-ip", TOK_SRCIP }, + { "dst-port", TOK_DSTPORT }, + { "src-port", TOK_SRCPORT }, + { "proto", TOK_PROTO }, + { "weight", TOK_WEIGHT }, + { "all", TOK_ALL }, + { "mask", TOK_MASK }, + { "droptail", TOK_DROPTAIL }, + { "red", TOK_RED }, + { "gred", TOK_GRED }, + { "bw", TOK_BW }, + { "bandwidth", TOK_BW }, + { "delay", TOK_DELAY }, + { "queue", TOK_QUEUE }, + { "dummynet-params", TOK_NULL }, + { NULL, 0 } +}; + +struct _s_x rule_actions[] = { + { "accept", TOK_ACCEPT }, + { "pass", TOK_ACCEPT }, + { "allow", TOK_ACCEPT }, + { "permit", TOK_ACCEPT }, + { "count", TOK_COUNT }, + { "pipe", TOK_PIPE }, + { "queue", TOK_QUEUE }, + { "divert", TOK_DIVERT }, + { "tee", TOK_TEE }, + { "fwd", TOK_FORWARD }, + { "forward", TOK_FORWARD }, + { "skipto", TOK_SKIPTO }, + { "deny", TOK_DENY }, + { "drop", TOK_DENY }, + { "reject", TOK_REJECT }, + { "reset", TOK_RESET }, + { "check-state", TOK_CHECKSTATE }, + { NULL, TOK_NULL }, + { NULL, 0 } +}; + +struct _s_x rule_options[] = { + { "uid", TOK_UID }, + { "gid", TOK_GID }, + { "in", TOK_IN }, + { "limit", TOK_LIMIT }, + { "keep-state", TOK_KEEPSTATE }, + { "bridged", TOK_LAYER2 }, + { "layer2", TOK_LAYER2 }, + { "out", TOK_OUT }, + { "xmit", TOK_XMIT }, + { "recv", TOK_RECV }, + { "via", TOK_VIA }, + { "fragment", TOK_FRAG }, + { "frag", TOK_FRAG }, + { "ipoptions", TOK_IPOPTS }, + { "ipopts", TOK_IPOPTS }, + { "iplen", TOK_IPLEN }, + { "ipid", TOK_IPID }, + { "ipprecedence", TOK_IPPRECEDENCE }, + { "iptos", TOK_IPTOS }, + { "ipttl", TOK_IPTTL }, + { "ipversion", TOK_IPVER }, + { "ipver", TOK_IPVER }, + { "estab", TOK_ESTAB }, + { "established", TOK_ESTAB }, + { "setup", TOK_SETUP }, + { "tcpflags", TOK_TCPFLAGS }, + { "tcpflgs", TOK_TCPFLAGS }, + { "tcpoptions", TOK_TCPOPTS }, + { "tcpopts", TOK_TCPOPTS }, + { "tcpseq", TOK_TCPSEQ }, + { "tcpack", TOK_TCPACK }, + { "tcpwin", TOK_TCPWIN }, + { "icmptypes", TOK_ICMPTYPES }, + + { "not", TOK_NOT }, /* pseudo option */ + { "!", /* escape ? */ TOK_NOT }, /* pseudo option */ + { "or", TOK_OR }, /* pseudo option */ + { "|", /* escape */ TOK_OR }, /* pseudo option */ + { NULL, TOK_NULL }, + { NULL, 0 } +}; + +/** + * match_token takes a table and a string, returns the value associated + * with the string (0 meaning an error in most cases) + */ +static int +match_token(struct _s_x *table, char *string) +{ + struct _s_x *pt; + int i = strlen(string); + + for (pt = table ; i && pt->s != NULL ; pt++) + if (strlen(pt->s) == i && !bcmp(string, pt->s, i)) + return pt->x; + return -1; +}; + +static char * +match_value(struct _s_x *p, u_int32_t value) +{ + for (; p->s != NULL; p++) + if (p->x == value) + return p->s; + return NULL; +} + +/* + * prints one port, symbolic or numeric + */ +static void +print_port(int proto, u_int16_t port) +{ + + if (proto == IPPROTO_ETHERTYPE) { + char *s; + + if (do_resolv && (s = match_value(ether_types, port)) ) + printf("%s", s); + else + printf("0x%04x", port); + } else { + struct servent *se = NULL; + if (do_resolv) { + struct protoent *pe = getprotobynumber(proto); + + se = getservbyport(htons(port), pe ? pe->p_name : NULL); + } + if (se) + printf("%s", se->s_name); + else + printf("%d", port); + } +} + +/* + * print the values in a list of ports + * XXX todo: add support for mask. + */ +static void +print_newports(ipfw_insn_u16 *cmd, int proto) +{ + u_int16_t *p = cmd->ports; + int i; + char *sep= " "; + + if (cmd->o.len & F_NOT) + printf(" not"); + for (i = F_LEN((ipfw_insn *)cmd) - 1; i > 0; i--, p += 2) { + printf(sep); + print_port(proto, p[0]); + if (p[0] != p[1]) { + printf("-"); + print_port(proto, p[1]); + } + sep = ","; + } +} + +/* + * Like strtol, but also translates service names into port numbers + * for some protocols. + * In particular: + * proto == -1 disables the protocol check; + * proto == IPPROTO_ETHERTYPE looks up an internal table + * proto == matches the values there. + */ +static int +strtoport(char *s, char **end, int base, int proto) +{ + char *s1, sep; + int i; + + if ( *s == '\0') + goto none; + + if (isdigit(*s)) + return strtol(s, end, base); + + /* + * find separator and replace with a '\0' + */ + for (s1 = s; *s1 && isalnum(*s1) ; s1++) + ; + sep = *s1; + *s1 = '\0'; + + if (proto == IPPROTO_ETHERTYPE) { + i = match_token(ether_types, s); + *s1 = sep; + if (i == -1) { /* not found */ + *end = s; + return 0; + } else { + *end = s1; + return i; + } + } else { + struct protoent *pe = NULL; + struct servent *se; + + if (proto != 0) + pe = getprotobynumber(proto); + setservent(1); + se = getservbyname(s, pe ? pe->p_name : NULL); + *s1 = sep; + if (se != NULL) { + *end = s1; + return ntohs(se->s_port); + } + } +none: + *end = s; + return 0; +} + +/* + * fill the body of the command with the list of port ranges. + * At the moment it only understands numeric ranges. + */ +static int +fill_newports(ipfw_insn_u16 *cmd, char *av, int proto) +{ + u_int16_t *p = cmd->ports; + int i = 0; + + for (; *av ; i++, p +=2 ) { + u_int16_t a, b; + char *s; + + a = strtoport(av, &s, 0, proto); + if (s == av) /* no parameter */ + break; + if (*s == '-') { /* a range */ + av = s+1; + b = strtoport(av, &s, 0, proto); + if (s == av) /* no parameter */ + break; + p[0] = a; + p[1] = b; + } else if (*s == ',' || *s == '\0' ) { + p[0] = p[1] = a; + } else /* invalid separator */ + break; + av = s+1; + } + if (i > 0) { + if (i+1 > F_LEN_MASK) + errx(EX_DATAERR, "too many port range\n"); + cmd->o.len |= i+1; /* leave F_NOT and F_OR untouched */ + } + return i; +} + +#if 0 /* XXX not used yet */ +static struct _s_x icmpcodes[] = { + { "net", ICMP_UNREACH_NET }, + { "host", ICMP_UNREACH_HOST }, + { "protocol", ICMP_UNREACH_PROTOCOL }, + { "port", ICMP_UNREACH_PORT }, + { "needfrag", ICMP_UNREACH_NEEDFRAG }, + { "srcfail", ICMP_UNREACH_SRCFAIL }, + { "net-unknown", ICMP_UNREACH_NET_UNKNOWN }, + { "host-unknown", ICMP_UNREACH_HOST_UNKNOWN }, + { "isolated", ICMP_UNREACH_ISOLATED }, + { "net-prohib", ICMP_UNREACH_NET_PROHIB }, + { "host-prohib", ICMP_UNREACH_HOST_PROHIB }, + { "tosnet", ICMP_UNREACH_TOSNET }, + { "toshost", ICMP_UNREACH_TOSHOST }, + { "filter-prohib", ICMP_UNREACH_FILTER_PROHIB }, + { "host-precedence", ICMP_UNREACH_HOST_PRECEDENCE }, + { "precedence-cutoff", ICMP_UNREACH_PRECEDENCE_CUTOFF }, + { NULL, 0 } +}; + +static void +fill_reject_code(u_short *codep, char *str) +{ + int val; + char *s; + + val = strtoul(str, &s, 0); + if (s == str || *s != '\0' || val >= 0x100) + val = match_token(icmpcodes, str); + if (val <= 0) + errx(EX_DATAERR, "unknown ICMP unreachable code ``%s''", str); + *codep = val; + return; +} + +static void +print_reject_code(u_int32_t code) +{ + char *s = match_value(icmpcodes, code); + + if (s != NULL) + printf("%s", s); + else + printf("%u", code); +} +#endif /* XXX not used yet */ + +/* + * Returns the number of bits set (from left) in a contiguous bitmask, + * or -1 if the mask is not contiguous. + * XXX this needs a proper fix. + * This effectively works on masks in big-endian (network) format. + * when compiled on little endian architectures. + * + * First bit is bit 7 of the first byte -- note, for MAC addresses, + * the first bit on the wire is bit 0 of the first byte. + * len is the max length in bits. + */ +static int +contigmask(u_char *p, int len) +{ + int i, n; + for (i=0; iarg1 & 0xff; + u_char clear = (cmd->arg1 >> 8) & 0xff; + + if (list == f_tcpflags && set == TH_SYN && clear == TH_ACK) { + printf(" setup"); + return; + } + + printf(" %s ", name); + for (i=0; list[i].x != 0; i++) { + if (set & list[i].x) { + set &= ~list[i].x; + printf("%s%s", comma, list[i].s); + comma = ","; + } + if (clear & list[i].x) { + clear &= ~list[i].x; + printf("%s!%s", comma, list[i].s); + comma = ","; + } + } +} + +/* + * Print the ip address contained in a command. + */ +static void +print_ip(ipfw_insn_ip *cmd) +{ + struct hostent *he = NULL; + int mb; + + printf("%s ", cmd->o.len & F_NOT ? " not": ""); + + if (cmd->o.opcode == O_IP_SRC_ME || cmd->o.opcode == O_IP_DST_ME) { + printf("me"); + return; + } + if (cmd->o.opcode == O_IP_SRC_SET || cmd->o.opcode == O_IP_DST_SET) { + u_int32_t x, *d; + int i; + char comma = '{'; + + x = cmd->o.arg1 - 1; + x = htonl( ~x ); + cmd->addr.s_addr = htonl(cmd->addr.s_addr); + printf("%s/%d", inet_ntoa(cmd->addr), + contigmask((u_char *)&x, 32)); + x = cmd->addr.s_addr = htonl(cmd->addr.s_addr); + x &= 0xff; /* base */ + d = (u_int32_t *)&(cmd->mask); + for (i=0; i < cmd->o.arg1; i++) + if (d[ i/32] & (1<<(i & 31))) { + printf("%c%d", comma, i+x); + comma = ','; + } + printf("}"); + return; + } + if (cmd->o.opcode == O_IP_SRC || cmd->o.opcode == O_IP_DST) + mb = 32; + else + mb = contigmask((u_char *)&(cmd->mask.s_addr), 32); + if (mb == 32 && do_resolv) + he = gethostbyaddr((char *)&(cmd->addr.s_addr), + sizeof(u_long), AF_INET); + if (he != NULL) /* resolved to name */ + printf("%s", he->h_name); + else if (mb == 0) /* any */ + printf("any"); + else { /* numeric IP followed by some kind of mask */ + printf("%s", inet_ntoa(cmd->addr)); + if (mb < 0) + printf(":%s", inet_ntoa(cmd->mask)); + else if (mb < 32) + printf("/%d", mb); + } +} + +/* + * prints a MAC address/mask pair + */ +static void +print_mac(u_char *addr, u_char *mask) +{ + int l = contigmask(mask, 48); + + if (l == 0) + printf(" any"); + else { + printf(" %02x:%02x:%02x:%02x:%02x:%02x", + addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]); + if (l == -1) + printf("&%02x:%02x:%02x:%02x:%02x:%02x", + mask[0], mask[1], mask[2], + mask[3], mask[4], mask[5]); + else if (l < 48) + printf("/%d", l); + } +} + + +/* + * show_ipfw() prints the body of an ipfw rule. + * Because the standard rule has at least proto src_ip dst_ip, we use + * a helper function to produce these entries if not provided explicitly. + */ +#define HAVE_PROTO 1 +#define HAVE_SRCIP 2 +#define HAVE_DSTIP 4 +#define HAVE_MAC 8 + +static void +show_prerequisites(int *flags, int want) +{ + if ( !(*flags & HAVE_PROTO) && (want & HAVE_PROTO)) + printf(" ip"); + if ( !(*flags & HAVE_SRCIP) && (want & HAVE_SRCIP)) + printf(" from any"); + if ( !(*flags & HAVE_DSTIP) && (want & HAVE_DSTIP)) + printf(" to any"); + *flags |= want; +} + +static void +show_ipfw(struct ip_fw *rule) +{ + int l; + ipfw_insn *cmd; + int proto = 0; /* default */ + int flags = 0; /* prerequisites */ + ipfw_insn_log *logptr = NULL; /* set if we find an O_LOG */ + int or_block = 0; /* we are in an or block */ + + printf("%05u ", rule->rulenum); + + if (do_acct) + printf("%10qu %10qu ", rule->pcnt, rule->bcnt); + + if (do_time) { + if (rule->timestamp) { + char timestr[30]; + time_t t = _long_to_time(rule->timestamp); + + strcpy(timestr, ctime(&t)); + *strchr(timestr, '\n') = '\0'; + printf("%s ", timestr); + } else { + printf(" "); + } + } + + /* + * first print actions + */ + for (l = rule->cmd_len - rule->act_ofs, cmd = ACTION_PTR(rule); + l > 0 ; l -= F_LEN(cmd), cmd += F_LEN(cmd)) { + switch(cmd->opcode) { + case O_CHECK_STATE: + printf("check-state"); + /* avoid printing anything else */ + flags = HAVE_PROTO|HAVE_SRCIP|HAVE_DSTIP; + break; + + case O_PROB: + { + ipfw_insn_u32 *p = (ipfw_insn_u32 *)cmd; + double d = 1.0 * p->d[0]; + + d = 1 - (d / 0x7fffffff); + printf("prob %f ", d); + } + break; + + case O_ACCEPT: + printf("allow"); + break; + + case O_COUNT: + printf("count"); + break; + + case O_DENY: + printf("deny"); + break; + + case O_SKIPTO: + printf("skipto %u", cmd->arg1); + break; + + case O_PIPE: + printf("pipe %u", cmd->arg1); + break; + + case O_QUEUE: + printf("queue %u", cmd->arg1); + break; + + case O_DIVERT: + printf("divert %u", cmd->arg1); + break; + + case O_TEE: + printf("tee %u", cmd->arg1); + break; + + case O_FORWARD_IP: + { + ipfw_insn_sa *s = (ipfw_insn_sa *)cmd; + + printf("fwd %s", inet_ntoa(s->sa.sin_addr)); + if (s->sa.sin_port) + printf(",%d", ntohs(s->sa.sin_port)); + } + break; + + case O_LOG: /* O_LOG is printed last */ + logptr = (ipfw_insn_log *)cmd; + break; + + default: + printf("** unrecognized action %d len %d", + cmd->opcode, cmd->len); + } + } + if (logptr) { + if (logptr->max_log > 0) + printf(" log logamount %d ", logptr->max_log); + else + printf(" log "); + } + /* + * then print the body + */ + for (l = rule->act_ofs, cmd = rule->cmd ; + l > 0 ; l -= F_LEN(cmd) , cmd += F_LEN(cmd)) { + ipfw_insn_u32 *cmd32 = (ipfw_insn_u32 *)cmd; /* useful alias */ + + switch(cmd->opcode) { + case O_PROBE_STATE: + break; /* no need to print anything here */ + + case O_MACADDR2: { + ipfw_insn_mac *m = (ipfw_insn_mac *)cmd; + if ( (flags & HAVE_MAC) == 0) + printf(" MAC"); + flags |= HAVE_MAC; + if (cmd->len & F_NOT) + printf(" not"); + print_mac( m->addr, m->mask); + print_mac( m->addr + 6, m->mask + 6); + } + break; + + case O_MAC_TYPE: + print_newports((ipfw_insn_u16 *)cmd, IPPROTO_ETHERTYPE); + break; + + case O_IP_SRC: + case O_IP_SRC_MASK: + case O_IP_SRC_ME: + case O_IP_SRC_SET: + show_prerequisites(&flags, HAVE_PROTO); + if (!(flags & HAVE_SRCIP)) + printf(" from"); + if ((cmd->len & F_OR) && !or_block) + printf(" {"); + print_ip((ipfw_insn_ip *)cmd); + flags |= HAVE_SRCIP; + break; + + case O_IP_DST: + case O_IP_DST_MASK: + case O_IP_DST_ME: + case O_IP_DST_SET: + show_prerequisites(&flags, HAVE_PROTO|HAVE_SRCIP); + if (!(flags & HAVE_DSTIP)) + printf(" to"); + if ((cmd->len & F_OR) && !or_block) + printf(" {"); + print_ip((ipfw_insn_ip *)cmd); + flags |= HAVE_DSTIP; + break; + + case O_IP_DSTPORT: + show_prerequisites(&flags, + HAVE_PROTO|HAVE_SRCIP|HAVE_DSTIP); + case O_IP_SRCPORT: + show_prerequisites(&flags, HAVE_PROTO|HAVE_SRCIP); + print_newports((ipfw_insn_u16 *)cmd, proto); + break; + + case O_PROTO: { + struct protoent *pe; + + if ((cmd->len & F_OR) && !or_block) + printf(" {"); + if (cmd->len & F_NOT) + printf(" not"); + proto = cmd->arg1; + pe = getprotobynumber(cmd->arg1); + if (pe) + printf(" %s", pe->p_name); + else + printf(" %u", cmd->arg1); + } + flags |= HAVE_PROTO; + break; + + default: /*options ... */ + show_prerequisites(&flags, + HAVE_PROTO|HAVE_SRCIP|HAVE_DSTIP); + if ((cmd->len & F_OR) && !or_block) + printf(" {"); + if (cmd->len & F_NOT && cmd->opcode != O_IN) + printf(" not"); + switch(cmd->opcode) { + case O_FRAG: + printf(" frag"); + break; + + case O_IN: + printf(cmd->len & F_NOT ? " out" : " in"); + break; + + case O_LAYER2: + printf(" layer2"); + break; + case O_XMIT: + case O_RECV: + case O_VIA: { + char *s; + ipfw_insn_if *cmdif = (ipfw_insn_if *)cmd; + + if (cmd->opcode == O_XMIT) + s = "xmit"; + else if (cmd->opcode == O_RECV) + s = "recv"; + else if (cmd->opcode == O_VIA) + s = "via"; + if (cmdif->name[0] == '\0') + printf(" %s %s", s, inet_ntoa(cmdif->p.ip)); + else if (cmdif->p.unit == -1) + printf(" %s %s*", s, cmdif->name); + else + printf(" %s %s%d", s, cmdif->name, cmdif->p.unit); + } + break; + + case O_IPID: + printf(" ipid %u", cmd->arg1 ); + break; + + case O_IPTTL: + printf(" ipttl %u", cmd->arg1 ); + break; + + case O_IPVER: + printf(" ipver %u", cmd->arg1 ); + break; + + case O_IPLEN: + printf(" iplen %u", cmd->arg1 ); + break; + + case O_IPOPTS: + print_flags("ipoptions", cmd, f_ipopts); + break; + + case O_ESTAB: + printf(" established"); + break; + + case O_TCPFLAGS: + print_flags("tcpflags", cmd, f_tcpflags); + break; + + case O_TCPOPTS: + print_flags("tcpoptions", cmd, f_tcpopts); + break; + + case O_TCPWIN: + printf(" tcpwin %d", ntohs(cmd->arg1)); + break; + + case O_TCPACK: + printf(" tcpack %d", ntohl(cmd32->d[0])); + break; + + case O_TCPSEQ: + printf(" tcpseq %d", ntohl(cmd32->d[0])); + break; + + case O_UID: + { + struct passwd *pwd = getpwuid(cmd32->d[0]); + + if (pwd) + printf(" uid %s", pwd->pw_name); + else + printf(" uid %u", cmd32->d[0]); + } + break; + + case O_GID: + { + struct group *grp = getgrgid(cmd32->d[0]); + + if (grp) + printf(" gid %s", grp->gr_name); + else + printf(" gid %u", cmd32->d[0]); + } + break; + + case O_KEEP_STATE: + printf(" keep-state"); + break; + + case O_LIMIT: + { + struct _s_x *p = limit_masks; + ipfw_insn_limit *c = (ipfw_insn_limit *)cmd; + u_int8_t x = c->limit_mask; + char *comma = " "; + + printf(" limit"); + for ( ; p->x != 0 ; p++) + if (x & p->x) { + x &= ~p->x; + printf("%s%s", comma, p->s); + comma = ","; + } + printf(" %d", c->conn_limit); + } + break; + + default: + printf(" [opcode %d len %d]", + cmd->opcode, cmd->len); + } + } + if (cmd->len & F_OR) { + printf(" or"); + or_block = 1; + } else if (or_block) { + printf(" }"); + or_block = 0; + } + } + show_prerequisites(&flags, HAVE_PROTO|HAVE_SRCIP|HAVE_DSTIP); + +#if 0 /* old stuff */ + switch (chain->fw_flg & IP_FW_F_COMMAND) { + case IP_FW_F_REJECT: + if (chain->fw_reject_code == IP_FW_REJECT_RST) + printf("reset"); + else { + printf("unreach "); + print_reject_code(chain->fw_reject_code); + } + break; + } + +do_options: + if (chain->fw_ipflg & IP_FW_IF_IPOPT) + print_flags("ipopt", chain->fw_ipopt, chain->fw_ipnopt, + f_ipopts); + + if (chain->fw_ipflg & IP_FW_IF_IPPRE) + printf(" ipprecedence %u", (chain->fw_iptos & 0xe0) >> 5); + + if (chain->fw_ipflg & IP_FW_IF_IPTOS) + print_flags("iptos", chain->fw_iptos, chain->fw_ipntos, + f_iptos); + + if (chain->fw_flg & IP_FW_F_ICMPBIT) { + int i, first = 1; + unsigned j; + + printf(" icmptype"); + + for (i = 0; i < IP_FW_ICMPTYPES_DIM; ++i) + for (j = 0; j < sizeof(unsigned) * 8; ++j) + if (chain->fw_uar.fw_icmptypes[i] & (1 << j)) { + printf("%c%d", first ? ' ' : ',', + i * sizeof(unsigned) * 8 + j); + first = 0; + } + } +#endif /* XXX old stuff */ + printf("\n"); +} + +static void +show_dyn_ipfw(ipfw_dyn_rule *d) +{ + struct protoent *pe; + struct in_addr a; + + if (!do_expired) { + if (!d->expire && !(d->dyn_type == O_LIMIT_PARENT)) + return; + } + + printf("%05d %10qu %10qu (%ds)", + (int)(d->rule), d->pcnt, d->bcnt, d->expire); + switch (d->dyn_type) { + case O_LIMIT_PARENT: + printf(" PARENT %d", d->count); + break; + case O_LIMIT: + printf(" LIMIT"); + break; + case O_KEEP_STATE: /* bidir, no mask */ + printf(" STATE"); + break; + } + + if ((pe = getprotobynumber(d->id.proto)) != NULL) + printf(" %s", pe->p_name); + else + printf(" proto %u", d->id.proto); + + a.s_addr = htonl(d->id.src_ip); + printf(" %s %d", inet_ntoa(a), d->id.src_port); + + a.s_addr = htonl(d->id.dst_ip); + printf(" <-> %s %d", inet_ntoa(a), d->id.dst_port); + printf("\n"); +} + +int +sort_q(const void *pa, const void *pb) +{ + int rev = (do_sort < 0); + int field = rev ? -do_sort : do_sort; + long long res = 0; + const struct dn_flow_queue *a = pa; + const struct dn_flow_queue *b = pb; + + switch (field) { + case 1: /* pkts */ + res = a->len - b->len; + break; + case 2: /* bytes */ + res = a->len_bytes - b->len_bytes; + break; + + case 3: /* tot pkts */ + res = a->tot_pkts - b->tot_pkts; + break; + + case 4: /* tot bytes */ + res = a->tot_bytes - b->tot_bytes; + break; + } + if (res < 0) + res = -1; + if (res > 0) + res = 1; + return (int)(rev ? res : -res); +} + +static void +list_queues(struct dn_flow_set *fs, struct dn_flow_queue *q) +{ + int l; + + printf(" mask: 0x%02x 0x%08x/0x%04x -> 0x%08x/0x%04x\n", + fs->flow_mask.proto, + fs->flow_mask.src_ip, fs->flow_mask.src_port, + fs->flow_mask.dst_ip, fs->flow_mask.dst_port); + if (fs->rq_elements == 0) + return; + + printf("BKT Prot ___Source IP/port____ " + "____Dest. IP/port____ Tot_pkt/bytes Pkt/Byte Drp\n"); + if (do_sort != 0) + heapsort(q, fs->rq_elements, sizeof *q, sort_q); + for (l = 0; l < fs->rq_elements; l++) { + struct in_addr ina; + struct protoent *pe; + + ina.s_addr = htonl(q[l].id.src_ip); + printf("%3d ", q[l].hash_slot); + pe = getprotobynumber(q[l].id.proto); + if (pe) + printf("%-4s ", pe->p_name); + else + printf("%4u ", q[l].id.proto); + printf("%15s/%-5d ", + inet_ntoa(ina), q[l].id.src_port); + ina.s_addr = htonl(q[l].id.dst_ip); + printf("%15s/%-5d ", + inet_ntoa(ina), q[l].id.dst_port); + printf("%4qu %8qu %2u %4u %3u\n", + q[l].tot_pkts, q[l].tot_bytes, + q[l].len, q[l].len_bytes, q[l].drops); + if (verbose) + printf(" S %20qd F %20qd\n", + q[l].S, q[l].F); + } +} + +static void +print_flowset_parms(struct dn_flow_set *fs, char *prefix) +{ + int l; + char qs[30]; + char plr[30]; + char red[90]; /* Display RED parameters */ + + l = fs->qsize; + if (fs->flags_fs & DN_QSIZE_IS_BYTES) { + if (l >= 8192) + sprintf(qs, "%d KB", l / 1024); + else + sprintf(qs, "%d B", l); + } else + sprintf(qs, "%3d sl.", l); + if (fs->plr) + sprintf(plr, "plr %f", 1.0 * fs->plr / (double)(0x7fffffff)); + else + plr[0] = '\0'; + if (fs->flags_fs & DN_IS_RED) /* RED parameters */ + sprintf(red, + "\n\t %cRED w_q %f min_th %d max_th %d max_p %f", + (fs->flags_fs & DN_IS_GENTLE_RED) ? 'G' : ' ', + 1.0 * fs->w_q / (double)(1 << SCALE_RED), + SCALE_VAL(fs->min_th), + SCALE_VAL(fs->max_th), + 1.0 * fs->max_p / (double)(1 << SCALE_RED)); + else + sprintf(red, "droptail"); + + printf("%s %s%s %d queues (%d buckets) %s\n", + prefix, qs, plr, fs->rq_elements, fs->rq_size, red); +} + +static void +list_pipes(void *data, int nbytes, int ac, char *av[]) +{ + u_long rulenum; + void *next = data; + struct dn_pipe *p = (struct dn_pipe *) data; + struct dn_flow_set *fs; + struct dn_flow_queue *q; + int l; + + if (ac > 0) + rulenum = strtoul(*av++, NULL, 10); + else + rulenum = 0; + for (; nbytes >= sizeof *p; p = (struct dn_pipe *)next) { + double b = p->bandwidth; + char buf[30]; + char prefix[80]; + + if (p->next != (struct dn_pipe *)DN_IS_PIPE) + break; /* done with pipes, now queues */ + + /* + * compute length, as pipe have variable size + */ + l = sizeof(*p) + p->fs.rq_elements * sizeof(*q); + next = (void *)p + l; + nbytes -= l; + + if (rulenum != 0 && rulenum != p->pipe_nr) + continue; + + /* + * Print rate (or clocking interface) + */ + if (p->if_name[0] != '\0') + sprintf(buf, "%s", p->if_name); + else if (b == 0) + sprintf(buf, "unlimited"); + else if (b >= 1000000) + sprintf(buf, "%7.3f Mbit/s", b/1000000); + else if (b >= 1000) + sprintf(buf, "%7.3f Kbit/s", b/1000); + else + sprintf(buf, "%7.3f bit/s ", b); + + sprintf(prefix, "%05d: %s %4d ms ", + p->pipe_nr, buf, p->delay); + print_flowset_parms(&(p->fs), prefix); + if (verbose) + printf(" V %20qd\n", p->V >> MY_M); + + q = (struct dn_flow_queue *)(p+1); + list_queues(&(p->fs), q); + } + for (fs = next; nbytes >= sizeof *fs; fs = next) { + char prefix[80]; + + if (fs->next != (struct dn_flow_set *)DN_IS_QUEUE) + break; + l = sizeof(*fs) + fs->rq_elements * sizeof(*q); + next = (void *)fs + l; + nbytes -= l; + q = (struct dn_flow_queue *)(fs+1); + sprintf(prefix, "q%05d: weight %d pipe %d ", + fs->fs_nr, fs->weight, fs->parent_nr); + print_flowset_parms(fs, prefix); + list_queues(fs, q); + } +} + +static void +list(int ac, char *av[]) +{ + struct ip_fw *r; + ipfw_dyn_rule *dynrules, *d; + + void *lim, *data = NULL; + int n, nbytes, nstat, ndyn; + int exitval = EX_OK; + int lac; + char **lav; + u_long rnum; + char *endptr; + int seen = 0; + + const int ocmd = do_pipe ? IP_DUMMYNET_GET : IP_FW_GET; + int nalloc = 1024; /* start somewhere... */ + + ac--; + av++; + + /* get rules or pipes from kernel, resizing array as necessary */ + nbytes = nalloc; + + while (nbytes >= nalloc) { + nalloc = nalloc * 2 + 200; + nbytes = nalloc; + if ((data = realloc(data, nbytes)) == NULL) + err(EX_OSERR, "realloc"); + if (getsockopt(s, IPPROTO_IP, ocmd, data, &nbytes) < 0) + err(EX_OSERR, "getsockopt(IP_%s_GET)", + do_pipe ? "DUMMYNET" : "FW"); + } + + if (do_pipe) { + list_pipes(data, nbytes, ac, av); + goto done; + } + + /* + * Count static rules. They have variable size so we + * need to scan the list to count them. + */ + for (nstat = 1, r = data, lim = data + nbytes; + r->rulenum < 65535 && (void *)r < lim; + ++nstat, r = (void *)r + RULESIZE(r) ) + ; /* nothing */ + + /* + * Count dynamic rules. This is easier as they have + * fixed size. + */ + r = (void *)r + RULESIZE(r); + dynrules = (ipfw_dyn_rule *)r ; + n = (void *)r - data; + ndyn = (nbytes - n) / sizeof *dynrules; + + /* if no rule numbers were specified, list all rules */ + if (ac == 0) { + for (n = 0, r = data; n < nstat; + n++, r = (void *)r + RULESIZE(r) ) + show_ipfw(r); + + if (do_dynamic && ndyn) { + printf("## Dynamic rules (%d):\n", ndyn); + for (n = 0, d = dynrules; n < ndyn; n++, d++) + show_dyn_ipfw(d); + } + goto done; + } + + /* display specific rules requested on command line */ + + for (lac = ac, lav = av; lac != 0; lac--) { + /* convert command line rule # */ + rnum = strtoul(*lav++, &endptr, 10); + if (*endptr) { + exitval = EX_USAGE; + warnx("invalid rule number: %s", *(lav - 1)); + continue; + } + for (n = seen = 0, r = data; n < nstat; + n++, r = (void *)r + RULESIZE(r) ) { + if (r->rulenum > rnum) + break; + if (r->rulenum == rnum) { + show_ipfw(r); + seen = 1; + } + } + if (!seen) { + /* give precedence to other error(s) */ + if (exitval == EX_OK) + exitval = EX_UNAVAILABLE; + warnx("rule %lu does not exist", rnum); + } + } + + if (do_dynamic && ndyn) { + printf("## Dynamic rules:\n"); + for (lac = ac, lav = av; lac != 0; lac--) { + rnum = strtoul(*lav++, &endptr, 10); + if (*endptr) + /* already warned */ + continue; + for (n = 0, d = dynrules; n < ndyn; n++, d++) { + if ((int)(d->rule) > rnum) + break; + if ((int)(d->rule) == rnum) + show_dyn_ipfw(d); + } + } + } + + ac = 0; + +done: + free(data); + + if (exitval != EX_OK) + exit(exitval); +} + +static void +show_usage(void) +{ + fprintf(stderr, "usage: ipfw [options]\n" +" add [number] rule\n" +" pipe number config [pipeconfig]\n" +" queue number config [queueconfig]\n" +" [pipe] flush\n" +" [pipe] delete number ...\n" +" [pipe] {list|show} [number ...]\n" +" {zero|resetlog} [number ...]\n" +"do \"ipfw -h\" or see ipfw manpage for details\n" +); + + exit(EX_USAGE); +} + +static void +help(void) +{ + + fprintf(stderr, "ipfw syntax summary:\n" +"ipfw add [N] [prob {0..1}] ACTION [log [logamount N]] ADDR OPTIONS\n" +"ipfw {pipe|queue} N config BODY\n" +"ipfw [pipe] {zero|delete|show} [N{,N}]\n" +"\n" +"RULE: [1..] [PROB] BODY\n" +"RULENUM: INTEGER(1..65534)\n" +"PROB: prob REAL(0..1)\n" +"BODY: check-state [LOG] (no body) |\n" +" ACTION [LOG] MATCH_ADDR [OPTION_LIST]\n" +"ACTION: check-state | allow | count | deny | reject | skipto N |\n" +" {divert|tee} PORT | forward ADDR | pipe N | queue N\n" +"ADDR: [ MAC dst src ether_type ] \n" +" [ from IPLIST [ PORT ] to IPLIST [ PORTLIST ] ]\n" +"IPLIST: IPADDR | ( IPADDR or ... or IPADDR )\n" +"IPADDR: [not] { any | me | ip | ip/bits | ip:mask | ip/bits{x,y,z} }\n" +"OPTION_LIST: OPTION [,OPTION_LIST]\n" +); +exit(0); +} + + +static int +lookup_host (char *host, struct in_addr *ipaddr) +{ + struct hostent *he; + + if (!inet_aton(host, ipaddr)) { + if ((he = gethostbyname(host)) == NULL) + return(-1); + *ipaddr = *(struct in_addr *)he->h_addr_list[0]; + } + return(0); +} + +/* + * fills the addr and mask fields in the instruction as appropriate from av. + * Update length as appropriate. + * The following formats are allowed: + * any matches any IP. Actually returns an empty instruction. + * me returns O_IP_*_ME + * 1.2.3.4 single IP address + * 1.2.3.4:5.6.7.8 address:mask + * 1.2.3.4/24 address/mask + * 1.2.3.4/26{1,6,5,4,23} set of addresses in a subnet + */ +static void +fill_ip(ipfw_insn_ip *cmd, char *av) +{ + char *p = 0, md = 0; + u_int32_t i; + + cmd->o.len &= ~F_LEN_MASK; /* zero len */ + + if (!strncmp(av, "any", strlen(av))) + return; + + if (!strncmp(av, "me", strlen(av))) { + cmd->o.len |= F_INSN_SIZE(ipfw_insn); + return; + } + + p = strchr(av, '/'); + if (!p) + p = strchr(av, ':'); + if (p) { + md = *p; + *p++ = '\0'; + } + + if (lookup_host(av, &cmd->addr) != 0) + errx(EX_NOHOST, "hostname ``%s'' unknown", av); + switch (md) { + case ':': + if (!inet_aton(p, &cmd->mask)) + errx(EX_DATAERR, "bad netmask ``%s''", p); + break; + case '/': + i = atoi(p); + if (i == 0) + cmd->mask.s_addr = htonl(0); + else if (i > 32) + errx(EX_DATAERR, "bad width ``%s''", p); + else + cmd->mask.s_addr = htonl(~0 << (32 - i)); + break; + default: + cmd->mask.s_addr = htonl(~0); + break; + } + cmd->addr.s_addr &= cmd->mask.s_addr; + /* + * now look if we have a set of addresses. They are stored as follows: + * arg1 is the set size (powers of 2, 2..256) + * addr is the base address IN HOST FORMAT + * mask.. is an array of u_int32_t with bits set. + */ + if (p) + p = strchr(p, '{'); + if (p) { /* fetch addresses */ + u_int32_t *d; + int low, high; + int i = contigmask((u_char *)&(cmd->mask), 32); + + if (i < 24 || i > 31) { + fprintf(stderr, "invalid set with mask %d\n", + i); + exit(0); + } + cmd->o.arg1 = 1<<(32-i); + cmd->addr.s_addr = ntohl(cmd->addr.s_addr); + d = (u_int32_t *)&cmd->mask; + cmd->o.opcode = O_IP_DST_SET; /* default */ + cmd->o.len |= F_INSN_SIZE(ipfw_insn_u32) + (cmd->o.arg1+31)/32; + fprintf(stderr,"-- set size %d cmdlen %d\n", + cmd->o.arg1, cmd->o.len ); + for (i = 0; i < cmd->o.arg1/32 ; i++) + d[i] = 0; /* clear masks */ + + av = p+1; + low = cmd->addr.s_addr & 0xff; + high = low + cmd->o.arg1 - 1; + while (isdigit(*av)) { + char *s; + u_int16_t a = strtol(av, &s, 0); + + if (s == av) /* no parameter */ + break; + if (a < low || a > high) { + fprintf(stderr, "addr %d out of range [%d-%d]\n", + a, low, high); + exit(0); + } + a -= low; + d[ a/32] |= 1<<(a & 31); + if (*s != ',') + break; + av = s+1; + } + return; + } + + if (cmd->mask.s_addr == 0) { /* any */ + if (cmd->o.len & F_NOT) + errx(EX_DATAERR, "not any never matches"); + else /* useless, nuke it */ + return; + } else if (cmd->mask.s_addr == IP_MASK_ALL) /* one IP */ + cmd->o.len |= F_INSN_SIZE(ipfw_insn_u32); + else /* addr/mask */ + cmd->o.len |= F_INSN_SIZE(ipfw_insn_ip); +} + + +/* + * helper function to process a set of flags and set bits in the + * appropriate masks. + */ +static void +fill_flags(ipfw_insn *cmd, enum ipfw_opcodes opcode, + struct _s_x *flags, char *p) +{ + u_int8_t set=0, clear=0; + + while (p && *p) { + char *q; /* points to the separator */ + int val; + u_int8_t *which; /* mask we are working on */ + + if (*p == '!') { + p++; + which = &clear; + } else + which = &set; + q = strchr(p, ','); + if (q) + *q++ = '\0'; + val = match_token(flags, p); + if (val <= 0) + errx(EX_DATAERR, "invalid flag %s", p); + *which |= (u_int8_t)val; + p = q; + } + cmd->opcode = opcode; + cmd->len = (cmd->len & (F_NOT | F_OR)) | 1; + cmd->arg1 = (set & 0xff) | ( (clear & 0xff) << 8); +} + +#if 0 /* XXX todo */ +static void +fill_icmptypes(unsigned *types, char **vp, u_int *fw_flg) +{ + unsigned long icmptype; + char *c = *vp; + + while (*c) { + if (*c == ',') + ++c; + + icmptype = strtoul(c, &c, 0); + + if (*c != ',' && *c != '\0') + errx(EX_DATAERR, "invalid ICMP type"); + + if (icmptype >= IP_FW_ICMPTYPES_DIM * sizeof(unsigned) * 8) + errx(EX_DATAERR, "ICMP type out of range"); + + types[icmptype / (sizeof(unsigned) * 8)] |= + 1 << (icmptype % (sizeof(unsigned) * 8)); + *fw_flg |= IP_FW_F_ICMPBIT; + } +} +#endif /* XXX todo */ + +static void +delete(int ac, char *av[]) +{ + int rulenum; + struct dn_pipe pipe; + int i; + int exitval = EX_OK; + + memset(&pipe, 0, sizeof pipe); + + av++; ac--; + + /* Rule number */ + while (ac && isdigit(**av)) { + i = atoi(*av); av++; ac--; + if (do_pipe) { + if (do_pipe == 1) + pipe.pipe_nr = i; + else + pipe.fs.fs_nr = i; + i = setsockopt(s, IPPROTO_IP, IP_DUMMYNET_DEL, + &pipe, sizeof pipe); + if (i) { + exitval = 1; + warn("rule %u: setsockopt(IP_DUMMYNET_DEL)", + do_pipe == 1 ? pipe.pipe_nr : + pipe.fs.fs_nr); + } + } else { + rulenum = i; + i = setsockopt(s, IPPROTO_IP, IP_FW_DEL, &rulenum, + sizeof rulenum); + if (i) { + exitval = EX_UNAVAILABLE; + warn("rule %u: setsockopt(IP_FW_DEL)", + rulenum); + } + } + } + if (exitval != EX_OK) + exit(exitval); +} + + +/* + * fill the interface structure. We do not check the name as we can + * create interfaces dynamically, so checking them at insert time + * makes relatively little sense. + * A '*' following the name means any unit. + */ +static void +fill_iface(ipfw_insn_if *cmd, char *arg) +{ + cmd->name[0] = '\0'; + cmd->o.len |= F_INSN_SIZE(ipfw_insn_if); + + /* Parse the interface or address */ + if (!strcmp(arg, "any")) + cmd->o.len = 0; /* effectively ignore this command */ + else if (!isdigit(*arg)) { + char *q; + + strncpy(cmd->name, arg, sizeof(cmd->name)); + cmd->name[sizeof(cmd->name) - 1] = '\0'; + /* find first digit or wildcard */ + for (q = cmd->name; *q && !isdigit(*q) && *q != '*'; q++) + continue; + cmd->p.unit = (*q == '*') ? -1 : atoi(q); + *q = '\0'; + } else if (!inet_aton(arg, &cmd->p.ip)) + errx(EX_DATAERR, "bad ip address ``%s''", arg); +} + +/* + * the following macro returns an error message if we run out of + * arguments. + */ +#define NEED1(msg) {if (!ac) errx(EX_USAGE, msg);} + +static void +config_pipe(int ac, char **av) +{ + struct dn_pipe pipe; + int i; + char *end; + u_int32_t a; + void *par = NULL; + + memset(&pipe, 0, sizeof pipe); + + av++; ac--; + /* Pipe number */ + if (ac && isdigit(**av)) { + i = atoi(*av); av++; ac--; + if (do_pipe == 1) + pipe.pipe_nr = i; + else + pipe.fs.fs_nr = i; + } + while (ac > 1) { + double d; + int tok = match_token(dummynet_params, *av); + ac--; av++; + + switch(tok) { + case TOK_PLR: + NEED1("plr needs argument 0..1\n"); + d = strtod(av[0], NULL); + if (d > 1) + d = 1; + else if (d < 0) + d = 0; + pipe.fs.plr = (int)(d*0x7fffffff); + ac--; av++; + break; + + case TOK_QUEUE: + NEED1("queue needs queue size\n"); + end = NULL; + pipe.fs.qsize = strtoul(av[0], &end, 0); + if (*end == 'K' || *end == 'k') { + pipe.fs.flags_fs |= DN_QSIZE_IS_BYTES; + pipe.fs.qsize *= 1024; + } else if (*end == 'B' || !strncmp(end, "by", 2)) { + pipe.fs.flags_fs |= DN_QSIZE_IS_BYTES; + } + ac--; av++; + break; + + case TOK_BUCKETS: + NEED1("buckets needs argument\n"); + pipe.fs.rq_size = strtoul(av[0], NULL, 0); + ac--; av++; + break; + + case TOK_MASK: + NEED1("mask needs mask specifier\n"); + /* + * per-flow queue, mask is dst_ip, dst_port, + * src_ip, src_port, proto measured in bits + */ + par = NULL; + + pipe.fs.flow_mask.dst_ip = 0; + pipe.fs.flow_mask.src_ip = 0; + pipe.fs.flow_mask.dst_port = 0; + pipe.fs.flow_mask.src_port = 0; + pipe.fs.flow_mask.proto = 0; + end = NULL; + + while (ac >= 1) { + u_int32_t *p32 = NULL; + u_int16_t *p16 = NULL; + + tok = match_token(dummynet_params, *av); + ac--; av++; + switch(tok) { + case TOK_ALL: + /* + * special case, all bits significant + */ + pipe.fs.flow_mask.dst_ip = ~0; + pipe.fs.flow_mask.src_ip = ~0; + pipe.fs.flow_mask.dst_port = ~0; + pipe.fs.flow_mask.src_port = ~0; + pipe.fs.flow_mask.proto = ~0; + pipe.fs.flags_fs |= DN_HAVE_FLOW_MASK; + goto end_mask; + + case TOK_DSTIP: + p32 = &pipe.fs.flow_mask.dst_ip; + break; + + case TOK_SRCIP: + p32 = &pipe.fs.flow_mask.src_ip; + break; + + case TOK_DSTPORT: + p16 = &pipe.fs.flow_mask.dst_port; + break; + + case TOK_SRCPORT: + p16 = &pipe.fs.flow_mask.src_port; + break; + + case TOK_PROTO: + break; + + default: + ac++; av--; /* backtrack */ + goto end_mask; + } + if (ac < 1) + errx(EX_USAGE, "mask: value missing"); + if (*av[0] == '/') { + a = strtoul(av[0]+1, &end, 0); + a = (a == 32) ? ~0 : (1 << a) - 1; + } else + a = strtoul(av[1], &end, 0); + if (p32 != NULL) + *p32 = a; + else if (p16 != NULL) { + if (a > 65535) + errx(EX_DATAERR, + "mask: must be 16 bit"); + *p16 = (u_int16_t)a; + } else { + if (a > 255) + errx(EX_DATAERR, + "mask: must be 8 bit"); + pipe.fs.flow_mask.proto = (u_int8_t)a; + } + if (a != 0) + pipe.fs.flags_fs |= DN_HAVE_FLOW_MASK; + ac--; av++; + } /* end while, config masks */ +end_mask: + break; + + case TOK_RED: + case TOK_GRED: + NEED1("red/gred needs w_q/min_th/max_th/max_p\n"); + pipe.fs.flags_fs |= DN_IS_RED; + if (tok == TOK_GRED) + pipe.fs.flags_fs |= DN_IS_GENTLE_RED; + /* + * the format for parameters is w_q/min_th/max_th/max_p + */ + if ((end = strsep(&av[0], "/"))) { + double w_q = strtod(end, NULL); + if (w_q > 1 || w_q <= 0) + errx(EX_DATAERR, "0 < w_q <= 1"); + pipe.fs.w_q = (int) (w_q * (1 << SCALE_RED)); + } + if ((end = strsep(&av[0], "/"))) { + pipe.fs.min_th = strtoul(end, &end, 0); + if (*end == 'K' || *end == 'k') + pipe.fs.min_th *= 1024; + } + if ((end = strsep(&av[0], "/"))) { + pipe.fs.max_th = strtoul(end, &end, 0); + if (*end == 'K' || *end == 'k') + pipe.fs.max_th *= 1024; + } + if ((end = strsep(&av[0], "/"))) { + double max_p = strtod(end, NULL); + if (max_p > 1 || max_p <= 0) + errx(EX_DATAERR, "0 < max_p <= 1"); + pipe.fs.max_p = (int)(max_p * (1 << SCALE_RED)); + } + ac--; av++; + break; + + case TOK_DROPTAIL: + pipe.fs.flags_fs &= ~(DN_IS_RED|DN_IS_GENTLE_RED); + break; + + case TOK_BW: + NEED1("bw needs bandwidth or interface\n"); + if (do_pipe != 1) + errx(EX_DATAERR, "bandwidth only valid for pipes"); + /* + * set clocking interface or bandwidth value + */ + if (av[0][0] >= 'a' && av[0][0] <= 'z') { + int l = sizeof(pipe.if_name)-1; + /* interface name */ + strncpy(pipe.if_name, av[0], l); + pipe.if_name[l] = '\0'; + pipe.bandwidth = 0; + } else { + pipe.if_name[0] = '\0'; + pipe.bandwidth = strtoul(av[0], &end, 0); + if (*end == 'K' || *end == 'k') { + end++; + pipe.bandwidth *= 1000; + } else if (*end == 'M') { + end++; + pipe.bandwidth *= 1000000; + } + if (*end == 'B' || !strncmp(end, "by", 2)) + pipe.bandwidth *= 8; + if (pipe.bandwidth < 0) + errx(EX_DATAERR, "bandwidth too large"); + } + ac--; av++; + break; + + case TOK_DELAY: + if (do_pipe != 1) + errx(EX_DATAERR, "delay only valid for pipes"); + NEED1("delay needs argument 0..10000ms\n"); + pipe.delay = strtoul(av[0], NULL, 0); + ac--; av++; + break; + + case TOK_WEIGHT: + if (do_pipe == 1) + errx(EX_DATAERR,"weight only valid for queues"); + NEED1("weight needs argument 0..100\n"); + pipe.fs.weight = strtoul(av[0], &end, 0); + ac--; av++; + break; + + case TOK_PIPE: + if (do_pipe == 1) + errx(EX_DATAERR,"pipe only valid for queues"); + NEED1("pipe needs pipe_number\n"); + pipe.fs.parent_nr = strtoul(av[0], &end, 0); + ac--; av++; + break; + + default: + errx(EX_DATAERR, "unrecognised option ``%s''", *av); + } + } + if (do_pipe == 1) { + if (pipe.pipe_nr == 0) + errx(EX_DATAERR, "pipe_nr must be > 0"); + if (pipe.delay > 10000) + errx(EX_DATAERR, "delay must be < 10000"); + } else { /* do_pipe == 2, queue */ + if (pipe.fs.parent_nr == 0) + errx(EX_DATAERR, "pipe must be > 0"); + if (pipe.fs.weight >100) + errx(EX_DATAERR, "weight must be <= 100"); + } + if (pipe.fs.flags_fs & DN_QSIZE_IS_BYTES) { + if (pipe.fs.qsize > 1024*1024) + errx(EX_DATAERR, "queue size must be < 1MB"); + } else { + if (pipe.fs.qsize > 100) + errx(EX_DATAERR, "2 <= queue size <= 100"); + } + if (pipe.fs.flags_fs & DN_IS_RED) { + size_t len; + int lookup_depth, avg_pkt_size; + double s, idle, weight, w_q; + struct clockinfo clock; + int t; + + if (pipe.fs.min_th >= pipe.fs.max_th) + errx(EX_DATAERR, "min_th %d must be < than max_th %d", + pipe.fs.min_th, pipe.fs.max_th); + if (pipe.fs.max_th == 0) + errx(EX_DATAERR, "max_th must be > 0"); + + len = sizeof(int); + if (sysctlbyname("net.inet.ip.dummynet.red_lookup_depth", + &lookup_depth, &len, NULL, 0) == -1) + + errx(1, "sysctlbyname(\"%s\")", + "net.inet.ip.dummynet.red_lookup_depth"); + if (lookup_depth == 0) + errx(EX_DATAERR, "net.inet.ip.dummynet.red_lookup_depth" + " must be greater than zero"); + + len = sizeof(int); + if (sysctlbyname("net.inet.ip.dummynet.red_avg_pkt_size", + &avg_pkt_size, &len, NULL, 0) == -1) + + errx(1, "sysctlbyname(\"%s\")", + "net.inet.ip.dummynet.red_avg_pkt_size"); + if (avg_pkt_size == 0) + errx(EX_DATAERR, + "net.inet.ip.dummynet.red_avg_pkt_size must" + " be greater than zero"); + + len = sizeof(struct clockinfo); + if (sysctlbyname("kern.clockrate", &clock, &len, NULL, 0) == -1) + errx(1, "sysctlbyname(\"%s\")", "kern.clockrate"); + + /* + * Ticks needed for sending a medium-sized packet. + * Unfortunately, when we are configuring a WF2Q+ queue, we + * do not have bandwidth information, because that is stored + * in the parent pipe, and also we have multiple queues + * competing for it. So we set s=0, which is not very + * correct. But on the other hand, why do we want RED with + * WF2Q+ ? + */ + if (pipe.bandwidth==0) /* this is a WF2Q+ queue */ + s = 0; + else + s = clock.hz * avg_pkt_size * 8 / pipe.bandwidth; + + /* + * max idle time (in ticks) before avg queue size becomes 0. + * NOTA: (3/w_q) is approx the value x so that + * (1-w_q)^x < 10^-3. + */ + w_q = ((double)pipe.fs.w_q) / (1 << SCALE_RED); + idle = s * 3. / w_q; + pipe.fs.lookup_step = (int)idle / lookup_depth; + if (!pipe.fs.lookup_step) + pipe.fs.lookup_step = 1; + weight = 1 - w_q; + for (t = pipe.fs.lookup_step; t > 0; --t) + weight *= weight; + pipe.fs.lookup_weight = (int)(weight * (1 << SCALE_RED)); + } + i = setsockopt(s, IPPROTO_IP, IP_DUMMYNET_CONFIGURE, &pipe, + sizeof pipe); + if (i) + err(1, "setsockopt(%s)", "IP_DUMMYNET_CONFIGURE"); +} + +static void +get_mac_addr_mask(char *p, u_char *addr, u_char *mask) +{ + int i, l; + + for (i=0; i<6; i++) + addr[i] = mask[i] = 0; + if (!strcmp(p, "any")) + return; + + for (i=0; *p && i<6;i++, p++) { + addr[i] = strtol(p, &p, 16); + if (*p != ':') /* we start with the mask */ + break; + } + if (*p == '/') { /* mask len */ + l = strtol(p+1, &p, 0); + for (i=0; l>0; l -=8, i++) + mask[i] = (l >=8) ? 0xff : (~0) << (8-l); + } else if (*p == '&') { /* mask */ + for (i=0, p++; *p && i<6;i++, p++) { + mask[i] = strtol(p, &p, 16); + if (*p != ':') + break; + } + } else if (*p == '\0') { + for (i=0; i<6; i++) + mask[i] = 0xff; + } + for (i=0; i<6; i++) + addr[i] &= mask[i]; +} + +/* + * helper function, updates the pointer to cmd with the length + * of the current command, and also cleans up the first word of + * the new command in case it has been clobbered before. + */ +static ipfw_insn * +next_cmd(ipfw_insn *cmd) +{ + cmd += F_LEN(cmd); + bzero(cmd, sizeof(*cmd)); + return cmd; +} + +/* + * A function to fill simple commands of size 1. + * Existing flags are preserved. + */ +static void +fill_cmd(ipfw_insn *cmd, enum ipfw_opcodes opcode, int flags, u_int16_t arg) +{ + cmd->opcode = opcode; + cmd->len = ((cmd->len | flags) & (F_NOT | F_OR)) | 1; + cmd->arg1 = arg; +} + +/* + * Fetch and add the MAC address and type, with masks. This generates one or + * two microinstructions, and returns the pointer to the last one. + */ +static ipfw_insn * +add_mac(ipfw_insn *cmd, int ac, char *av[]) +{ + ipfw_insn_mac *mac; /* also *src */ + + if (ac <3) + errx(EX_DATAERR, "MAC dst src type"); + + cmd->opcode = O_MACADDR2; + cmd->len = (cmd->len & (F_NOT | F_OR)) | F_INSN_SIZE(ipfw_insn_mac); + + mac = (ipfw_insn_mac *)cmd; + get_mac_addr_mask(av[0], mac->addr, mac->mask); /* dst */ + get_mac_addr_mask(av[1], &(mac->addr[6]), &(mac->mask[6])); /* src */ + av += 2; + + if (strcmp(av[0], "any") != 0) { /* we have a non-null port */ + cmd += F_LEN(cmd); + + fill_newports((ipfw_insn_u16 *)cmd, av[0], IPPROTO_ETHERTYPE); + cmd->opcode = O_MAC_TYPE; + } + + return cmd; +} + +/* + * Parse arguments and assemble the microinstructions which make up a rule. + * Rules are added into the 'rulebuf' and then copied in the correct order + * into the actual rule. + * + * The syntax for a rule starts with the action, followed by an + * optional log action, and the various match patterns. + * In the assembled microcode, the first opcode must be a O_PROBE_STATE + * (generated if the rule includes a keep-state option), then the + * various match patterns, the "log" action, and the actual action. + * + */ +static void +add(int ac, char *av[]) +{ + /* + * rules are added into the 'rulebuf' and then copied in + * the correct order into the actual rule. + * Some things that need to go out of order (prob, action etc.) + * go into actbuf[]. + */ + static u_int32_t rulebuf[255], actbuf[255], cmdbuf[255]; + + ipfw_insn *src, *dst, *cmd, *action, *prev; + + struct ip_fw *rule; + + /* + * various flags used to record that we entered some fields. + */ + int have_mac = 0; /* set if we have a MAC address */ + int have_state = 0; /* check-state or keep-state */ + + int i; + + int open_par = 0; /* open parenthesis ( */ + + /* proto is here because it is used to fetch ports */ + u_char proto = IPPROTO_IP; /* default protocol */ + + bzero(actbuf, sizeof(actbuf)); /* actions go here */ + bzero(cmdbuf, sizeof(cmdbuf)); + bzero(rulebuf, sizeof(rulebuf)); + + rule = (struct ip_fw *)rulebuf; + cmd = (ipfw_insn *)cmdbuf; + action = (ipfw_insn *)actbuf; + + av++; ac--; + + /* [rule N] -- Rule number optional */ + if (ac && isdigit(**av)) { + rule->rulenum = atoi(*av); + av++; + ac--; + } + + /* [prob D] -- match probability, optional */ + if (ac > 1 && !strncmp(*av, "prob", strlen(*av))) { + double d = strtod(av[1], NULL); + + if (d <= 0 || d > 1) + errx(EX_DATAERR, "illegal match prob. %s", av[1]); + if (d != 1) { /* 1 means always match */ + action->opcode = O_PROB; + action->len = 2; + *((int32_t *)(action+1)) = + (int32_t)((1 - d) * 0x7fffffff); + action += action->len; + } + av += 2; ac -= 2; + } + + /* action -- mandatory */ + NEED1("missing action"); + i = match_token(rule_actions, *av); + ac--; av++; + action->len = 1; /* default */ + switch(i) { + case TOK_CHECKSTATE: + have_state = 1; + action->opcode = O_CHECK_STATE; + break; + + case TOK_ACCEPT: + action->opcode = O_ACCEPT; + break; + + case TOK_DENY: + action->opcode = O_DENY; + break; + + case TOK_COUNT: + action->opcode = O_COUNT; + break; + + case TOK_QUEUE: + case TOK_PIPE: + action->len = F_INSN_SIZE(ipfw_insn_pipe); + case TOK_SKIPTO: + if (i == TOK_QUEUE) + action->opcode = O_QUEUE; + else if (i == TOK_PIPE) + action->opcode = O_PIPE; + else if (i == TOK_SKIPTO) + action->opcode = O_SKIPTO; + NEED1("missing skipto/pipe/queue number"); + action->arg1 = strtoul(*av, NULL, 10); + av++; ac--; + break; + + case TOK_DIVERT: + case TOK_TEE: + action->opcode = (i == TOK_DIVERT) ? O_DIVERT : O_TEE; + NEED1("missing divert/tee port"); + action->arg1 = strtoul(*av, NULL, 0); + if (action->arg1 == 0) { + struct servent *s; + setservent(1); + s = getservbyname(av[0], "divert"); + if (s != NULL) + action->arg1 = ntohs(s->s_port); + else + errx(EX_DATAERR, "illegal divert/tee port"); + } + ac--; av++; + break; + + case TOK_FORWARD: { + ipfw_insn_sa *p = (ipfw_insn_sa *)action; + char *s, *end; + + NEED1("missing forward address[:port]"); + + action->opcode = O_FORWARD_IP; + action->len = F_INSN_SIZE(ipfw_insn_sa); + + p->sa.sin_len = sizeof(struct sockaddr_in); + p->sa.sin_family = AF_INET; + p->sa.sin_port = 0; + /* + * locate the address-port separator (':' or ',') + */ + s = strchr(*av, ':'); + if (s == NULL) + s = strchr(*av, ','); + if (s != NULL) { + *(s++) = '\0'; + i = strtoport(s, &end, 0 /* base */, 0 /* proto */); + if (s == end) + errx(EX_DATAERR, + "illegal forwarding port ``%s''", s); + p->sa.sin_port = htons( (u_short)i ); + } + lookup_host(*av, &(p->sa.sin_addr)); + } + ac--; av++; + break; + + default: + errx(EX_DATAERR, "invalid action %s\n", *av); + } + action = next_cmd(action); + +#if 0 + } else if (!strncmp(*av, "reject", strlen(*av))) { + rule.fw_flg |= IP_FW_F_REJECT; av++; ac--; + rule.fw_reject_code = ICMP_UNREACH_HOST; + } else if (!strncmp(*av, "reset", strlen(*av))) { + rule.fw_flg |= IP_FW_F_REJECT; av++; ac--; + rule.fw_reject_code = ICMP_REJECT_RST; /* check TCP later */ + } else if (!strncmp(*av, "unreach", strlen(*av))) { + rule.fw_flg |= IP_FW_F_REJECT; av++; ac--; + fill_reject_code(&rule.fw_reject_code, *av); av++; ac--; + } +#endif /* XXX other actions */ + + /* + * [log [logamount N]] -- log, optional + * + * If exists, it goes first in the cmdbuf, but then it is + * skipped in the copy section to the end of the buffer. + */ + if (ac && !strncmp(*av, "log", strlen(*av))) { + ipfw_insn_log *c = (ipfw_insn_log *)cmd; + + cmd->len = F_INSN_SIZE(ipfw_insn_log); + cmd->opcode = O_LOG; + av++; ac--; + if (ac && !strncmp(*av, "logamount", strlen(*av))) { + ac--; av++; + NEED1("logamount requires argument"); + c->max_log = atoi(*av); + if (c->max_log < 0) + errx(EX_DATAERR, "logamount must be positive"); + ac--; av++; + } + cmd = next_cmd(cmd); + } + + if (have_state) { + have_state = 0; + goto done; + } + +#define OR_START(target) \ + if (ac && (*av[0] == '(' || *av[0] == '{')) { \ + if (open_par) \ + errx(EX_USAGE, "nested \"(\" not allowed\n"); \ + open_par = 1; \ + if ( (av[0])[1] == '\0') { \ + ac--; av++; \ + } else \ + (*av)++; \ + } \ + target: \ + + +#define CLOSE_PAR \ + if (open_par) { \ + if (ac && ( \ + !strncmp(*av, ")", strlen(*av)) || \ + !strncmp(*av, "}", strlen(*av)) )) { \ + open_par = 0; \ + ac--; av++; \ + } else \ + errx(EX_USAGE, "missing \")\"\n"); \ + } + +#define NOT_BLOCK \ + if (ac && !strncmp(*av, "not", strlen(*av))) { \ + if (cmd->len & F_NOT) \ + errx(EX_USAGE, "double \"not\" not allowed\n"); \ + cmd->len |= F_NOT; \ + ac--; av++; \ + } + +#define OR_BLOCK(target) \ + if (ac && !strncmp(*av, "or", strlen(*av))) { \ + if (prev == NULL || open_par == 0) \ + errx(EX_DATAERR, "invalid OR block"); \ + prev->len |= F_OR; \ + ac--; av++; \ + goto target; \ + } \ + CLOSE_PAR; + + /* + * protocol, mandatory + */ + OR_START(get_proto); + NOT_BLOCK; + NEED1("missing protocol"); + { + struct protoent *pe; + + if (!strncmp(*av, "all", strlen(*av))) + ; /* same as "ip" */ + else if (!strncmp(*av, "MAC", strlen(*av))) { + /* need exactly 3 fields */ + cmd = add_mac(cmd, ac-1, av+1); /* exits in case of errors */ + ac -= 3; + av += 3; + have_mac = 1; + } else if ((proto = atoi(*av)) > 0) + ; /* all done! */ + else if ((pe = getprotobyname(*av)) != NULL) + proto = pe->p_proto; + else + errx(EX_DATAERR, "invalid protocol ``%s''", *av); + av++; ac--; + if (proto != IPPROTO_IP) + fill_cmd(cmd, O_PROTO, 0, proto); + } + cmd = next_cmd(cmd); + OR_BLOCK(get_proto); + + /* + * "from", mandatory (unless we have a MAC address) + */ + if (!ac || strncmp(*av, "from", strlen(*av))) { + if (have_mac) /* we do not need a "to" address */ + goto read_to; + errx(EX_USAGE, "missing ``from''"); + } + ac--; av++; + + /* + * source IP, mandatory + */ + OR_START(source_ip); + NOT_BLOCK; /* optional "not" */ + NEED1("missing source address"); + + /* source -- mandatory */ + fill_ip((ipfw_insn_ip *)cmd, *av); + if (cmd->opcode == O_IP_DST_SET) /* set */ + cmd->opcode = O_IP_SRC_SET; + else if (F_LEN(cmd) == F_INSN_SIZE(ipfw_insn)) /* me */ + cmd->opcode = O_IP_SRC_ME; + else if (F_LEN(cmd) == F_INSN_SIZE(ipfw_insn_u32)) /* one IP */ + cmd->opcode = O_IP_SRC; + else if (F_LEN(cmd) == F_INSN_SIZE(ipfw_insn_ip)) /* addr/mask */ + cmd->opcode = O_IP_SRC_MASK; + /* otherwise len will be zero and the command skipped */ + ac--; av++; + prev = cmd; /* in case we need to backtrack */ + cmd = next_cmd(cmd); + OR_BLOCK(source_ip); + + /* + * source ports, optional + */ + NOT_BLOCK; /* optional "not" */ + if (ac && fill_newports((ipfw_insn_u16 *)cmd, *av, proto)) { + /* XXX todo: check that we have a protocol with ports */ + cmd->opcode = O_IP_SRCPORT; + ac--; + av++; + cmd = next_cmd(cmd); + } + +read_to: + /* + * "to", mandatory (unless we have a MAC address + */ + if (!ac || strncmp(*av, "to", strlen(*av))) { + if (have_mac) + goto read_options; + errx(EX_USAGE, "missing ``to''"); + } + av++; ac--; + + /* + * destination, mandatory + */ + OR_START(dest_ip); + NOT_BLOCK; /* optional "not" */ + NEED1("missing dst address"); + fill_ip((ipfw_insn_ip *)cmd, *av); + if (cmd->opcode == O_IP_DST_SET) /* set */ + ; + else if (F_LEN(cmd) == F_INSN_SIZE(ipfw_insn)) /* me */ + cmd->opcode = O_IP_DST_ME; + else if (F_LEN(cmd) == F_INSN_SIZE(ipfw_insn_u32)) /* one IP */ + cmd->opcode = O_IP_DST; + else if (F_LEN(cmd) == F_INSN_SIZE(ipfw_insn_ip)) /* addr/mask */ + cmd->opcode = O_IP_DST_MASK; + ac--; + av++; + prev = cmd; + cmd = next_cmd(cmd); + OR_BLOCK(dest_ip); + + /* + * dest. ports, optional + */ + NOT_BLOCK; /* optional "not" */ + if (ac && fill_newports((ipfw_insn_u16 *)cmd, *av, proto)) { + /* XXX todo: check that we have a protocol with ports */ + cmd->opcode = O_IP_DSTPORT; + ac--; + av++; + cmd += F_LEN(cmd); + } + +read_options: + prev = NULL; + while (ac) { + char *s = *av; + ipfw_insn_u32 *cmd32 = (ipfw_insn_u32 *)cmd; /* alias */ + + if (*s == '!') { /* alternate syntax for NOT */ + if (cmd->len & F_NOT) + errx(EX_USAGE, "double \"not\" not allowed\n"); + cmd->len = F_NOT; + s++; + } + i = match_token(rule_options, s); + ac--; av++; + switch(i) { + case TOK_NOT: + if (cmd->len & F_NOT) + errx(EX_USAGE, "double \"not\" not allowed\n"); + cmd->len = F_NOT; + break; + + case TOK_OR: + if (prev == NULL) + errx(EX_USAGE, "invalid \"or\" block\n"); + prev->len |= F_OR; + break; + + case TOK_IN: + fill_cmd(cmd, O_IN, 0, 0); + break; + + case TOK_OUT: + cmd->len ^= F_NOT; /* toggle F_NOT */ + fill_cmd(cmd, O_IN, 0, 0); + break; + + case TOK_FRAG: + fill_cmd(cmd, O_FRAG, 0, 0); + break; + + case TOK_LAYER2: + fill_cmd(cmd, O_LAYER2, 0, 0); + break; + + case TOK_XMIT: + case TOK_RECV: + case TOK_VIA: + NEED1("recv, xmit, via require interface name" + " or address"); + fill_iface((ipfw_insn_if *)cmd, av[0]); + ac--; av++; + if (F_LEN(cmd) == 0) /* not a valid address */ + break; + if (i == TOK_XMIT) + cmd->opcode = O_XMIT; + else if (i == TOK_RECV) + cmd->opcode = O_RECV; + else if (i == TOK_VIA) + cmd->opcode = O_VIA; + break; + + case TOK_IPTTL: + NEED1("ipttl requires TTL"); + fill_cmd(cmd, O_IPTTL, 0, strtoul(*av, NULL, 0)); + ac--; av++; + break; + + case TOK_IPID: + NEED1("ipid requires length"); + fill_cmd(cmd, O_IPID, 0, strtoul(*av, NULL, 0)); + ac--; av++; + break; + + case TOK_IPLEN: + NEED1("iplen requires length"); + fill_cmd(cmd, O_IPLEN, 0, strtoul(*av, NULL, 0)); + ac--; av++; + break; + + case TOK_IPVER: + NEED1("ipver requires version"); + fill_cmd(cmd, O_IPVER, 0, strtoul(*av, NULL, 0)); + ac--; av++; + break; + + case TOK_IPOPTS: + NEED1("missing argument for ipoptions"); + fill_flags(cmd, O_IPOPTS, f_ipopts, *av); + ac--; av++; + break; + + case TOK_UID: + NEED1("uid requires argument"); + { + char *end; + uid_t uid; + struct passwd *pwd; + + cmd->opcode = O_UID; + uid = strtoul(*av, &end, 0); + pwd = (*end == '\0') ? getpwuid(uid) : getpwnam(*av); + if (pwd == NULL) + errx(EX_DATAERR, "uid \"%s\" nonexistent", *av); + cmd32->d[0] = uid; + cmd->len = F_INSN_SIZE(ipfw_insn_u32); + ac--; av++; + } + break; + + case TOK_GID: + NEED1("gid requires argument"); + { + char *end; + gid_t gid; + struct group *grp; + + cmd->opcode = O_GID; + gid = strtoul(*av, &end, 0); + grp = (*end == '\0') ? getgrgid(gid) : getgrnam(*av); + if (grp == NULL) + errx(EX_DATAERR, "gid \"%s\" nonexistent", *av); + + cmd32->d[0] = gid; + cmd->len = F_INSN_SIZE(ipfw_insn_u32); + ac--; av++; + } + break; + + case TOK_ESTAB: + fill_cmd(cmd, O_ESTAB, 0, 0); + break; + + case TOK_SETUP: + fill_cmd(cmd, O_TCPFLAGS, 0, + (TH_SYN) | ( (TH_ACK) & 0xff) <<8 ); + break; + + case TOK_TCPOPTS: + NEED1("missing argument for tcpoptions"); + fill_flags(cmd, O_TCPOPTS, f_tcpopts, *av); + ac--; av++; + break; + + case TOK_TCPSEQ: + case TOK_TCPACK: + NEED1("tcpseq/tcpack requires argument"); + cmd->len = F_INSN_SIZE(ipfw_insn_u32); + cmd->opcode = (i == TOK_TCPSEQ) ? O_TCPSEQ : O_TCPACK; + cmd32->d[0] = htonl(strtoul(*av, NULL, 0)); + ac--; av++; + break; + + case TOK_TCPWIN: + NEED1("tcpwin requires length"); + fill_cmd(cmd, O_TCPWIN, 0, + htons(strtoul(*av, NULL, 0))); + ac--; av++; + break; + + case TOK_TCPFLAGS: + NEED1("missing argument for tcpflags"); + cmd->opcode = O_TCPFLAGS; + fill_flags(cmd, O_TCPFLAGS, f_tcpflags, *av); + ac--; av++; + break; + + case TOK_KEEPSTATE: + have_state = 1; + fill_cmd(cmd, O_KEEP_STATE, 0, 0); + break; + + case TOK_LIMIT: + NEED1("limit needs mask and # of connections"); + { + ipfw_insn_limit *c = (ipfw_insn_limit *)cmd; + + cmd->len = F_INSN_SIZE(ipfw_insn_limit); + cmd->opcode = O_LIMIT; + c->limit_mask = 0; + c->conn_limit = 0; + for (; ac >1 ;) { + int val; + + val = match_token(limit_masks, *av); + if (val <= 0) + break; + c->limit_mask |= val; + ac--; av++; + } + c->conn_limit = atoi(*av); + if (c->conn_limit == 0) + errx(EX_USAGE, "limit: limit must be >0"); + if (c->limit_mask == 0) + errx(EX_USAGE, "missing limit mask"); + ac--; av++; + have_state = 1; + } + break; + + default: + errx(EX_USAGE, "unrecognised option [%d] %s\n", i, s); + } + if (F_LEN(cmd) > 0) { /* prepare to advance */ + prev = cmd; + cmd = next_cmd(cmd); + } + } +#if 0 /* XXX todo */ +do_options: + while (ac) { + } else if (!strncmp(*av, "ipprecedence", strlen(*av))) { + u_long ippre; + char *c; + + av++; ac--; + NEED1("missing argument for ``ipprecedence''"); + ippre = strtoul(*av, &c, 0); + if (*c != '\0') + errx(EX_DATAERR, "argument to ipprecedence" + " must be numeric"); + if (ippre > 7) + errx(EX_DATAERR, "argument to ipprecedence" + " out of range"); + rule.fw_ipflg |= IP_FW_IF_IPPRE; + rule.fw_iptos |= (u_short)(ippre << 5); + av++; ac--; + } else if (!strncmp(*av, "iptos", strlen(*av))) { + av++; ac--; + NEED1("missing argument for ``iptos''"); + rule.fw_ipflg |= IP_FW_IF_IPTOS; + fill_flags(&rule.fw_iptos, &rule.fw_ipntos, + f_iptos, av); + av++; ac--; + } else if (rule.fw_prot == IPPROTO_ICMP) { + if (!strncmp(*av, "icmptypes", strlen(*av))) { + av++; ac--; + NEED1("missing argument for ``icmptypes''"); + fill_icmptypes(rule.fw_uar.fw_icmptypes, + av, &rule.fw_flg); + av++; ac--; + } + } + } +#endif /* XXX todo */ +done: + /* + * Now copy stuff into the rule. + * If we have a keep-state option, the first instruction + * must be a PROBE_STATE (which is generated here). + * If we have a LOG option, it was stored as the first command, + * and now must be moved to the top of the action part. + */ + dst = (ipfw_insn *)rule->cmd; + + /* + * generate O_PROBE_STATE if necessary + */ + if (have_state) { + fill_cmd(dst, O_PROBE_STATE, 0, 0); + dst = next_cmd(dst); + } + /* + * copy all commands but O_LOG + */ + for (src = (ipfw_insn *)cmdbuf; src != cmd; src += i) { + i = F_LEN(src); + + if (src->opcode != O_LOG) { + bcopy(src, dst, i * sizeof(u_int32_t)); + dst += i; + } + } + + /* + * start action section + */ + rule->act_ofs = dst - rule->cmd; + + /* + * put back O_LOG if necessary + */ + src = (ipfw_insn *)cmdbuf; + if ( src->opcode == O_LOG ) { + i = F_LEN(src); + bcopy(src, dst, i * sizeof(u_int32_t)); + dst += i; + } + /* + * copy all other actions + */ + for (src = (ipfw_insn *)actbuf; src != action; src += i) { + i = F_LEN(src); + bcopy(src, dst, i * sizeof(u_int32_t)); + dst += i; + } + + rule->cmd_len = (u_int32_t *)dst - (u_int32_t *)(rule->cmd); + i = (void *)dst - (void *)rule; + if (!do_quiet) + show_ipfw(rule); + if (getsockopt(s, IPPROTO_IP, IP_FW_ADD, rule, &i) == -1) + err(EX_UNAVAILABLE, "getsockopt(%s)", "IP_FW_ADD"); + if (!do_quiet) + show_ipfw(rule); +} + +static void +zero (int ac, char *av[]) +{ + int rulenum; + int failed = EX_OK; + + av++; ac--; + + if (!ac) { + /* clear all entries */ + if (setsockopt(s, IPPROTO_IP, IP_FW_ZERO, NULL, 0) < 0) + err(EX_UNAVAILABLE, "setsockopt(%s)", "IP_FW_ZERO"); + if (!do_quiet) + printf("Accounting cleared.\n"); + + return; + } + + while (ac) { + /* Rule number */ + if (isdigit(**av)) { + rulenum = atoi(*av); + av++; + ac--; + if (setsockopt(s, IPPROTO_IP, + IP_FW_ZERO, &rulenum, sizeof rulenum)) { + warn("rule %u: setsockopt(IP_FW_ZERO)", + rulenum); + failed = EX_UNAVAILABLE; + } else if (!do_quiet) + printf("Entry %d cleared\n", rulenum); + } else { + errx(EX_USAGE, "invalid rule number ``%s''", *av); + } + } + if (failed != EX_OK) + exit(failed); +} + +static void +resetlog (int ac, char *av[]) +{ + int rulenum; + int failed = EX_OK; + + av++; ac--; + + if (!ac) { + /* clear all entries */ + if (setsockopt(s, IPPROTO_IP, IP_FW_RESETLOG, NULL, 0) < 0) + err(EX_UNAVAILABLE, "setsockopt(IP_FW_RESETLOG)"); + if (!do_quiet) + printf("Logging counts reset.\n"); + + return; + } + + while (ac) { + /* Rule number */ + if (isdigit(**av)) { + rulenum = atoi(*av); + av++; + ac--; + if (setsockopt(s, IPPROTO_IP, + IP_FW_RESETLOG, &rulenum, sizeof rulenum)) { + warn("rule %u: setsockopt(IP_FW_RESETLOG)", + rulenum); + failed = EX_UNAVAILABLE; + } else if (!do_quiet) + printf("Entry %d logging count reset\n", + rulenum); + } else { + errx(EX_DATAERR, "invalid rule number ``%s''", *av); + } + } + if (failed != EX_OK) + exit(failed); +} + +static void +flush() +{ + int cmd = do_pipe ? IP_DUMMYNET_FLUSH : IP_FW_FLUSH; + + if (!do_force && !do_quiet) { /* need to ask user */ + int c; + + printf("Are you sure? [yn] "); + fflush(stdout); + do { + c = toupper(getc(stdin)); + while (c != '\n' && getc(stdin) != '\n') + if (feof(stdin)) + return; /* and do not flush */ + } while (c != 'Y' && c != 'N'); + printf("\n"); + if (c == 'N') /* user said no */ + return; + } + if (setsockopt(s, IPPROTO_IP, cmd, NULL, 0) < 0) + err(EX_UNAVAILABLE, "setsockopt(IP_%s_FLUSH)", + do_pipe ? "DUMMYNET" : "FW"); + if (!do_quiet) + printf("Flushed all %s.\n", do_pipe ? "pipes" : "rules"); +} + +static int +ipfw_main(int ac, char **av) +{ + int ch; + + if (ac == 1) + show_usage(); + + /* Set the force flag for non-interactive processes */ + do_force = !isatty(STDIN_FILENO); + + optind = optreset = 1; + while ((ch = getopt(ac, av, "hs:adefNqtv")) != -1) + switch (ch) { + case 'h': /* help */ + help(); + break; /* NOTREACHED */ + + case 's': /* sort */ + do_sort = atoi(optarg); + break; + case 'a': + do_acct = 1; + break; + case 'd': + do_dynamic = 1; + break; + case 'e': + do_expired = 1; + break; + case 'f': + do_force = 1; + break; + case 'N': + do_resolv = 1; + break; + case 'q': + do_quiet = 1; + break; + case 't': + do_time = 1; + break; + case 'v': /* verbose */ + verbose++; + break; + default: + show_usage(); + } + + ac -= optind; + av += optind; + NEED1("bad arguments, for usage summary ``ipfw''"); + + /* + * optional: pipe or queue + */ + if (!strncmp(*av, "pipe", strlen(*av))) { + do_pipe = 1; + ac--; + av++; + } else if (!strncmp(*av, "queue", strlen(*av))) { + do_pipe = 2; + ac--; + av++; + } + NEED1("missing command"); + + /* + * for pipes and queues we normally say 'pipe NN config' + * but the code is easier to parse as 'pipe config NN' + * so we swap the two arguments. + */ + if (do_pipe > 0 && ac > 1 && *av[0] >= '0' && *av[0] <= '9') { + char *p = av[0]; + av[0] = av[1]; + av[1] = p; + } + if (!strncmp(*av, "add", strlen(*av))) + add(ac, av); + else if (do_pipe && !strncmp(*av, "config", strlen(*av))) + config_pipe(ac, av); + else if (!strncmp(*av, "delete", strlen(*av))) + delete(ac, av); + else if (!strncmp(*av, "flush", strlen(*av))) + flush(); + else if (!strncmp(*av, "zero", strlen(*av))) + zero(ac, av); + else if (!strncmp(*av, "resetlog", strlen(*av))) + resetlog(ac, av); + else if (!strncmp(*av, "print", strlen(*av)) || + !strncmp(*av, "list", strlen(*av))) + list(ac, av); + else if (!strncmp(*av, "show", strlen(*av))) { + do_acct++; + list(ac, av); + } else + errx(EX_USAGE, "bad command `%s'", *av); + return 0; +} + + +static void +ipfw_readfile(int ac, char *av[]) +{ +#define MAX_ARGS 32 +#define WHITESP " \t\f\v\n\r" + char buf[BUFSIZ]; + char *a, *p, *args[MAX_ARGS], *cmd = NULL; + char linename[10]; + int i=0, lineno=0, qflag=0, pflag=0, status; + FILE *f = NULL; + pid_t preproc = 0; + int c; + + while ((c = getopt(ac, av, "D:U:p:q")) != -1) + switch(c) { + case 'D': + if (!pflag) + errx(EX_USAGE, "-D requires -p"); + if (i > MAX_ARGS - 2) + errx(EX_USAGE, + "too many -D or -U options"); + args[i++] = "-D"; + args[i++] = optarg; + break; + + case 'U': + if (!pflag) + errx(EX_USAGE, "-U requires -p"); + if (i > MAX_ARGS - 2) + errx(EX_USAGE, + "too many -D or -U options"); + args[i++] = "-U"; + args[i++] = optarg; + break; + + case 'p': + pflag = 1; + cmd = optarg; + args[0] = cmd; + i = 1; + break; + + case 'q': + qflag = 1; + break; + + default: + errx(EX_USAGE, "bad arguments, for usage" + " summary ``ipfw''"); + } + + av += optind; + ac -= optind; + if (ac != 1) + errx(EX_USAGE, "extraneous filename arguments"); + + if ((f = fopen(av[0], "r")) == NULL) + err(EX_UNAVAILABLE, "fopen: %s", av[0]); + + if (pflag) { + /* pipe through preprocessor (cpp or m4) */ + int pipedes[2]; + + args[i] = 0; + + if (pipe(pipedes) == -1) + err(EX_OSERR, "cannot create pipe"); + + switch((preproc = fork())) { + case -1: + err(EX_OSERR, "cannot fork"); + + case 0: + /* child */ + if (dup2(fileno(f), 0) == -1 + || dup2(pipedes[1], 1) == -1) + err(EX_OSERR, "dup2()"); + fclose(f); + close(pipedes[1]); + close(pipedes[0]); + execvp(cmd, args); + err(EX_OSERR, "execvp(%s) failed", cmd); + + default: + /* parent */ + fclose(f); + close(pipedes[1]); + if ((f = fdopen(pipedes[0], "r")) == NULL) { + int savederrno = errno; + + (void)kill(preproc, SIGTERM); + errno = savederrno; + err(EX_OSERR, "fdopen()"); + } + } + } + + while (fgets(buf, BUFSIZ, f)) { + lineno++; + sprintf(linename, "Line %d", lineno); + args[0] = linename; + + if (*buf == '#') + continue; + if ((p = strchr(buf, '#')) != NULL) + *p = '\0'; + i = 1; + if (qflag) + args[i++] = "-q"; + for (a = strtok(buf, WHITESP); + a && i < MAX_ARGS; a = strtok(NULL, WHITESP), i++) + args[i] = a; + if (i == (qflag? 2: 1)) + continue; + if (i == MAX_ARGS) + errx(EX_USAGE, "%s: too many arguments", + linename); + args[i] = NULL; + + ipfw_main(i, args); + } + fclose(f); + if (pflag) { + if (waitpid(preproc, &status, 0) == -1) + errx(EX_OSERR, "waitpid()"); + if (WIFEXITED(status) && WEXITSTATUS(status) != EX_OK) + errx(EX_UNAVAILABLE, + "preprocessor exited with status %d", + WEXITSTATUS(status)); + else if (WIFSIGNALED(status)) + errx(EX_UNAVAILABLE, + "preprocessor exited with signal %d", + WTERMSIG(status)); + } +} + +int +main(int ac, char *av[]) +{ + s = socket(AF_INET, SOCK_RAW, IPPROTO_RAW); + if (s < 0) + err(EX_UNAVAILABLE, "socket"); + + setbuf(stdout, 0); + + /* + * If the last argument is an absolute pathname, interpret it + * as a file to be preprocessed. + */ + + if (ac > 1 && av[ac - 1][0] == '/' && access(av[ac - 1], R_OK) == 0) + ipfw_readfile(ac, av); + else + ipfw_main(ac, av); + return EX_OK; +} diff --git a/sys/conf/files b/sys/conf/files index 413da1dc4492..1cff41f5ca6f 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -1164,7 +1164,7 @@ netinet/ip_ecn.c optional inet6 netinet/ip_encap.c optional inet netinet/ip_encap.c optional inet6 netinet/ip_flow.c optional inet -netinet/ip_fw.c optional ipfirewall +netinet/ip_fw2.c optional ipfirewall netinet/ip_icmp.c optional inet netinet/ip_input.c optional inet netinet/ip_mroute.c optional inet diff --git a/sys/netinet/ip_dummynet.c b/sys/netinet/ip_dummynet.c index 6006b650df5e..7f8b241aa15d 100644 --- a/sys/netinet/ip_dummynet.c +++ b/sys/netinet/ip_dummynet.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2001 Luigi Rizzo, Universita` di Pisa + * Copyright (c) 1998-2002 Luigi Rizzo, Universita` di Pisa * Portions Copyright (c) 2000 Akamba Corp. * All rights reserved * @@ -61,7 +61,6 @@ #include #include #include -#include /* XXX */ #include #include #include @@ -166,12 +165,6 @@ static void dn_rule_delete(void *); int if_tx_rdy(struct ifnet *ifp); -/* - * ip_fw_chain_head is used when deleting a pipe, because ipfw rules can - * hold references to the pipe. - */ -extern LIST_HEAD (ip_fw_head, ip_fw) ip_fw_chain_head; - static void rt_unref(struct rtentry *rt) { @@ -1023,9 +1016,13 @@ static __inline struct dn_flow_set * locate_flowset(int pipe_nr, struct ip_fw *rule) { - struct dn_flow_set *fs = NULL ; + ipfw_insn_pipe *cmd = (ipfw_insn_pipe *)(rule->cmd + rule->act_ofs); + struct dn_flow_set *fs = (struct dn_flow_set *)(cmd->pipe_ptr); - if ( (rule->fw_flg & IP_FW_F_COMMAND) == IP_FW_F_QUEUE ) + if (fs != NULL) + return fs; + + if ( cmd->o.opcode == O_QUEUE ) for (fs=all_flow_sets; fs && fs->fs_nr != pipe_nr; fs=fs->next) ; else { @@ -1035,8 +1032,7 @@ locate_flowset(int pipe_nr, struct ip_fw *rule) if (p1 != NULL) fs = &(p1->fs) ; } - if (fs != NULL) - rule->pipe_ptr = fs ; /* record for the future */ + (struct dn_flow_set *)(cmd->pipe_ptr) = fs; /* record for the future */ return fs ; } @@ -1065,16 +1061,18 @@ dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa) u_int64_t len = m->m_pkthdr.len ; struct dn_flow_queue *q = NULL ; int s ; + int action = fwa->rule->cmd[fwa->rule->act_ofs].opcode; s = splimp(); pipe_nr &= 0xffff ; - if ( (fs = fwa->rule->pipe_ptr) == NULL ) { - fs = locate_flowset(pipe_nr, fwa->rule); - if (fs == NULL) - goto dropit ; /* this queue/pipe does not exist! */ - } + /* + * this is a dummynet rule, so we expect a O_PIPE or O_QUEUE rule + */ + fs = locate_flowset(pipe_nr, fwa->rule); + if (fs == NULL) + goto dropit ; /* this queue/pipe does not exist! */ pipe = fs->pipe ; if (pipe == NULL) { /* must be a queue, try find a matching pipe */ for (pipe = all_pipes; pipe && pipe->pipe_nr != fs->parent_nr; @@ -1152,7 +1150,7 @@ dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa) * to schedule it. This involves different actions for fixed-rate or * WF2Q queues. */ - if ( (fwa->rule->fw_flg & IP_FW_F_COMMAND) == IP_FW_F_PIPE ) { + if ( action == O_PIPE ) { /* * Fixed-rate queue: just insert into the ready_heap. */ @@ -1302,15 +1300,13 @@ static void dummynet_flush() { struct dn_pipe *curr_p, *p ; - struct ip_fw *rule ; struct dn_flow_set *fs, *curr_fs; int s ; s = splimp() ; /* remove all references to pipes ...*/ - LIST_FOREACH(rule, &ip_fw_chain_head, next) - rule->pipe_ptr = NULL ; + flush_pipe_ptrs(NULL); /* prevent future matches... */ p = all_pipes ; all_pipes = NULL ; @@ -1375,8 +1371,8 @@ dn_rule_delete(void *r) fs = &(p->fs) ; dn_rule_delete_fs(fs, r); for (pkt = p->head ; pkt ; pkt = DN_NEXT(pkt) ) - if (pkt->rule == r) - pkt->rule = ip_fw_default_rule ; + if (pkt->hdr.mh_data == r) + pkt->hdr.mh_data = (void *)ip_fw_default_rule ; } } @@ -1663,7 +1659,6 @@ static int delete_pipe(struct dn_pipe *p) { int s ; - struct ip_fw *rule ; if (p->pipe_nr == 0 && p->fs.fs_nr == 0) return EINVAL ; @@ -1687,9 +1682,7 @@ delete_pipe(struct dn_pipe *p) else a->next = b->next ; /* remove references to this pipe from the ip_fw rules. */ - LIST_FOREACH(rule, &ip_fw_chain_head, next) - if (rule->pipe_ptr == &(b->fs)) - rule->pipe_ptr = NULL ; + flush_pipe_ptrs(&(b->fs)); /* remove all references to this pipe from flow_sets */ for (fs = all_flow_sets; fs; fs= fs->next ) @@ -1721,9 +1714,7 @@ delete_pipe(struct dn_pipe *p) else a->next = b->next ; /* remove references to this flow_set from the ip_fw rules. */ - LIST_FOREACH(rule, &ip_fw_chain_head, next) - if (rule->pipe_ptr == b) - rule->pipe_ptr = NULL ; + flush_pipe_ptrs(b); if (b->pipe != NULL) { /* Update total weight on parent pipe and cleanup parent heaps */ @@ -1847,9 +1838,14 @@ ip_dn_ctl(struct sockopt *sopt) /* Disallow sets in really-really secure mode. */ if (sopt->sopt_dir == SOPT_SET) { +#if __FreeBSD_version >= 500034 error = securelevel_ge(sopt->sopt_td->td_ucred, 3); if (error) return (error); +#else + if (securelevel >= 3) + return (EPERM); +#endif } switch (sopt->sopt_name) { diff --git a/sys/netinet/ip_fw.h b/sys/netinet/ip_fw.h index dcb3bcf783fe..508cc767b43b 100644 --- a/sys/netinet/ip_fw.h +++ b/sys/netinet/ip_fw.h @@ -1,183 +1,305 @@ /* - * Copyright (c) 1993 Daniel Boulet - * Copyright (c) 1994 Ugen J.S.Antsilevich + * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa * - * Redistribution and use in source forms, with and without modification, - * are permitted provided that this entire comment appears intact. + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. * - * Redistribution in binary form may occur without any restrictions. - * Obviously, it would be nice if you gave credit where credit is due - * but requiring it would be too onerous. - * - * This software is provided ``AS IS'' without any warranties of any kind. + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. * * $FreeBSD$ */ -#ifndef _IP_FW_H -#define _IP_FW_H - -#include +#ifndef _IPFW2_H +#define _IPFW2_H /* - * This union structure identifies an interface, either explicitly - * by name or implicitly by IP address. The flags IP_FW_F_IIFNAME - * and IP_FW_F_OIFNAME say how to interpret this structure. An - * interface unit number of -1 matches any unit number, while an - * IP address of 0.0.0.0 indicates matches any interface. + * The kernel representation of ipfw rules is made of a list of + * 'instructions' (for all practical purposes equivalent to BPF + * instructions), which specify which fields of the packet + * (or its metatada) should be analysed. * - * The receive and transmit interfaces are only compared against the - * the packet if the corresponding bit (IP_FW_F_IIFACE or IP_FW_F_OIFACE) - * is set. Note some packets lack a receive or transmit interface - * (in which case the missing "interface" never matches). + * Each instruction is stored in a structure which begins with + * "ipfw_insn", and can contain extra fields depending on the + * instruction type (listed below). + * + * "enum ipfw_opcodes" are the opcodes supported. We can have up + * to 256 different opcodes. */ -union ip_fw_if { - struct in_addr fu_via_ip; /* Specified by IP address */ - struct { /* Specified by interface name */ -#define FW_IFNLEN 10 /* need room ! was IFNAMSIZ */ - char name[FW_IFNLEN]; - short unit; /* -1 means match any unit */ - } fu_via_if; +enum ipfw_opcodes { /* arguments (4 byte each) */ + O_NOP, + + O_IP_SRC, /* u32 = IP */ + O_IP_SRC_MASK, /* ip = IP/mask */ + O_IP_SRC_ME, /* none */ + O_IP_SRC_SET, /* u32=base, arg1=len, bitmap */ + + O_IP_DST, /* u32 = IP */ + O_IP_DST_MASK, /* ip = IP/mask */ + O_IP_DST_ME, /* none */ + O_IP_DST_SET, /* u32=base, arg1=len, bitmap */ + + O_IP_SRCPORT, /* (n)port list:mask 4 byte ea */ + O_IP_DSTPORT, /* (n)port list:mask 4 byte ea */ + O_PROTO, /* arg1=protocol */ + + O_MACADDR2, /* 2 mac addr:mask */ + O_MAC_TYPE, /* same as srcport */ + + O_LAYER2, /* none */ + O_IN, /* none */ + O_FRAG, /* none */ + + O_RECV, /* none */ + O_XMIT, /* none */ + O_VIA, /* none */ + + O_IPOPT, /* arg1 = 2*u8 bitmap */ + O_IPLEN, /* arg1 = len */ + O_IPID, /* arg1 = id */ + + O_IPPRE, /* arg1 = id */ + O_IPTOS, /* arg1 = id */ + O_IPTTL, /* arg1 = TTL */ + + O_IPVER, /* arg1 = version */ + O_UID, /* u32 = id */ + O_GID, /* u32 = id */ + O_ESTAB, /* none (tcp established) */ + O_TCPFLAGS, /* arg1 = 2*u8 bitmap */ + O_TCPWIN, /* arg1 = desired win */ + O_TCPSEQ, /* u32 = desired seq. */ + O_TCPACK, /* u32 = desired seq. */ + O_ICMPTYPE, /* u32 = icmp bitmap */ + O_TCPOPTS, /* arg1 = 2*u8 bitmap */ + O_IPOPTS, /* arg1 = 2*u8 bitmap */ + + O_PROBE_STATE, /* none */ + O_KEEP_STATE, /* none */ + O_LIMIT, /* ipfw_insn_limit */ + O_LIMIT_PARENT, /* dyn_type, not an opcode. */ + /* + * these are really 'actions', and must be last in the list. + */ + + O_LOG, /* ipfw_insn_log */ + O_PROB, /* u32 = match probability */ + + O_CHECK_STATE, /* none */ + O_ACCEPT, /* none */ + O_DENY, /* none */ + O_REJECT, /* arg1=icmp arg (same as deny) */ + O_COUNT, /* none */ + O_SKIPTO, /* arg1=next rule number */ + O_PIPE, /* arg1=pipe number */ + O_QUEUE, /* arg1=queue number */ + O_DIVERT, /* arg1=port number */ + O_TEE, /* arg1=port number */ + O_FORWARD_IP, /* fwd sockaddr */ + O_FORWARD_MAC, /* fwd mac */ + O_LAST_OPCODE /* not an opcode! */ }; /* - * Format of an IP firewall descriptor + * Template for instructions. + * + * ipfw_insn is used for all instructions which require no operands, + * a single 16-bit value (arg1), or a couple of 8-bit values. + * + * For other instructions which require different/larger arguments + * we have derived structures, ipfw_insn_*. + * + * The size of the instruction (in 32-bit words) is in the low + * 6 bits of "len". The 2 remaining bits are used to implement + * NOT and OR on individual instructions. Given a type, you can + * compute the length to be put in "len" using F_INSN_SIZE(t) + * + * F_NOT negates the match result of the instruction. + * + * F_OR is used to build or blocks. By default, instructions + * are evaluated as part of a logical AND. An "or" block + * { X or Y or Z } contains F_OR set in all but the last + * instruction of the block. A match will cause the code + * to skip past the last instruction of the block. + * + * NOTA BENE: in a couple of places we assume that + * sizeof(ipfw_insn) == sizeof(u_int32_t) + * this needs to be fixed. * - * fw_src, fw_dst, fw_smsk, fw_dmsk are always stored in network byte order. - * fw_flg and fw_n*p are stored in host byte order (of course). - * Port numbers are stored in HOST byte order. */ +typedef struct _ipfw_insn { /* template for instructions */ + enum ipfw_opcodes opcode:8; + u_int8_t len; /* numer of 32-byte words */ +#define F_NOT 0x80 +#define F_OR 0x40 +#define F_LEN_MASK 0x3f +#define F_LEN(cmd) ((cmd)->len & F_LEN_MASK) + + u_int16_t arg1; +} ipfw_insn; /* - * To match MAC headers: - * 12 bytes at fw_mac_hdr contain the dst-src MAC address after masking. - * 12 bytes at fw_mac_mask contain the mask to apply to dst-src - * 2 bytes at fw_mac_type contain the mac type after mask (in net format) - * 2 bytes at fw_mac_type_mask contain the mac type mask - * If IP_FW_F_SRNG, the two contain the low-high of a range of types. - * IP_FW_F_DRNG is used to indicare we want to match a vlan. + * The F_INSN_SIZE(type) computes the size, in 4-byte words, of + * a given type. */ -#define fw_mac_hdr fw_src -#define fw_mac_mask fw_uar -#define fw_mac_type fw_iplen -#define fw_mac_mask_type fw_ipid +#define F_INSN_SIZE(t) ((sizeof (t))/sizeof(u_int32_t)) -struct ip_fw { - LIST_ENTRY(ip_fw) next; /* bidirectional list of rules */ - u_int fw_flg; /* Operational Flags word */ - u_int64_t fw_pcnt; /* Packet counters */ - u_int64_t fw_bcnt; /* Byte counters */ +/* + * This is used to store an array of 16-bit entries (ports etc.) + */ +typedef struct _ipfw_insn_u16 { + ipfw_insn o; + u_int16_t ports[2]; /* there may be more */ +} ipfw_insn_u16; - struct in_addr fw_src; /* Source IP address */ - struct in_addr fw_dst; /* Destination IP address */ - struct in_addr fw_smsk; /* Mask for source IP address */ - struct in_addr fw_dmsk; /* Mask for destination address */ - u_short fw_number; /* Rule number */ - u_char fw_prot; /* IP protocol */ -#if 1 - u_char fw_nports; /* # of src/dst port in array */ -#define IP_FW_GETNSRCP(rule) ((rule)->fw_nports & 0x0f) -#define IP_FW_SETNSRCP(rule, n) do { \ - (rule)->fw_nports &= ~0x0f; \ - (rule)->fw_nports |= (n); \ - } while (0) -#define IP_FW_GETNDSTP(rule) ((rule)->fw_nports >> 4) -#define IP_FW_SETNDSTP(rule, n) do { \ - (rule)->fw_nports &= ~0xf0; \ - (rule)->fw_nports |= (n) << 4;\ - } while (0) -#define IP_FW_HAVEPORTS(rule) ((rule)->fw_nports != 0) -#else - u_char __pad[1]; - u_int _nsrcp; - u_int _ndstp; -#define IP_FW_GETNSRCP(rule) (rule)->_nsrcp -#define IP_FW_SETNSRCP(rule,n) (rule)->_nsrcp = n -#define IP_FW_GETNDSTP(rule) (rule)->_ndstp -#define IP_FW_SETNDSTP(rule,n) (rule)->_ndstp = n -#define IP_FW_HAVEPORTS(rule) ((rule)->_ndstp + (rule)->_nsrcp != 0) -#endif -#define IP_FW_MAX_PORTS 10 /* A reasonable maximum */ - union { - u_short fw_pts[IP_FW_MAX_PORTS]; /* port numbers to match */ -#define IP_FW_ICMPTYPES_MAX 128 -#define IP_FW_ICMPTYPES_DIM (IP_FW_ICMPTYPES_MAX / (sizeof(unsigned) * 8)) - unsigned fw_icmptypes[IP_FW_ICMPTYPES_DIM]; /*ICMP types bitmap*/ - } fw_uar; +/* + * This is used to store an array of 32-bit entries + * (uid, single IPv4 addresses etc.) + */ +typedef struct _ipfw_insn_u32 { + ipfw_insn o; + u_int32_t d[1]; /* one or more */ +} ipfw_insn_u32; - u_int fw_ipflg; /* IP flags word */ - u_short fw_iplen; /* IP length */ - u_short fw_ipid; /* Identification */ - u_char fw_ipopt; /* IP options set */ - u_char fw_ipnopt; /* IP options unset */ - u_char fw_iptos; /* IP type of service set */ - u_char fw_ipntos; /* IP type of service unset */ - u_char fw_ipttl; /* IP time to live */ - u_int fw_ipver:4; /* IP version */ - u_char fw_tcpopt; /* TCP options set */ - u_char fw_tcpnopt; /* TCP options unset */ - u_char fw_tcpf; /* TCP flags set */ - u_char fw_tcpnf; /* TCP flags unset */ - u_short fw_tcpwin; /* TCP window size */ - u_int32_t fw_tcpseq; /* TCP sequence */ - u_int32_t fw_tcpack; /* TCP acknowledgement */ - long timestamp; /* timestamp (tv_sec) of last match */ - union ip_fw_if fw_in_if; /* Incoming interfaces */ - union ip_fw_if fw_out_if; /* Outgoing interfaces */ - union { - u_short fu_divert_port; /* Divert/tee port (options IPDIVERT) */ - u_short fu_pipe_nr; /* queue number (option DUMMYNET) */ - u_short fu_skipto_rule; /* SKIPTO command rule number */ - u_short fu_reject_code; /* REJECT response code */ - struct sockaddr_in fu_fwd_ip; - } fw_un; - void *pipe_ptr; /* flow_set ptr for dummynet pipe */ - void *next_rule_ptr; /* next rule in case of match */ - uid_t fw_uid; /* uid to match */ - gid_t fw_gid; /* gid to match */ - int fw_logamount; /* amount to log */ - u_int64_t fw_loghighest; /* highest number packet to log */ +/* + * This is used to store IP addr-mask pairs. + */ +typedef struct _ipfw_insn_ip { + ipfw_insn o; + struct in_addr addr; + struct in_addr mask; +} ipfw_insn_ip; - long dont_match_prob; /* 0x7fffffff means 1.0, always fail */ - u_char dyn_type; /* type for dynamic rule */ +/* + * This is used to forward to a given address (ip) + */ +typedef struct _ipfw_insn_sa { + ipfw_insn o; + struct sockaddr_in sa; +} ipfw_insn_sa; -#define DYN_KEEP_STATE 0 /* type for keep-state rules */ -#define DYN_LIMIT 1 /* type for limit connection rules */ -#define DYN_LIMIT_PARENT 2 /* parent entry for limit connection rules */ +/* + * This is used for MAC addr-mask pairs. + */ +typedef struct _ipfw_insn_mac { + ipfw_insn o; + u_char addr[12]; /* dst[6] + src[6] */ + u_char mask[12]; /* dst[6] + src[6] */ +} ipfw_insn_mac; - /* following two fields are used to limit number of connections - * basing on either src, srcport, dst, dstport. - */ - u_char limit_mask; /* mask type for limit rule, can - * have many. - */ +/* + * This is used for interface match rules (recv xx, xmit xx) + */ +typedef struct _ipfw_insn_if { + ipfw_insn o; + union { + struct in_addr ip; + int unit; + } p; + char name[IFNAMSIZ]; +} ipfw_insn_if; + +/* + * This is used for pipe and queue actions, which need to store + * a single pointer (which can have different size on different + * architectures. + */ +typedef struct _ipfw_insn_pipe { + ipfw_insn o; + void *pipe_ptr; +} ipfw_insn_pipe; + +/* + * This is used for limit rules. + */ +typedef struct _ipfw_insn_limit { + ipfw_insn o; + u_int8_t _pad; + u_int8_t limit_mask; /* combination of DYN_* below */ #define DYN_SRC_ADDR 0x1 #define DYN_SRC_PORT 0x2 #define DYN_DST_ADDR 0x4 #define DYN_DST_PORT 0x8 - u_short conn_limit; /* # of connections for limit rule */ -}; - -#define fw_divert_port fw_un.fu_divert_port -#define fw_skipto_rule fw_un.fu_skipto_rule -#define fw_reject_code fw_un.fu_reject_code -#define fw_pipe_nr fw_un.fu_pipe_nr -#define fw_fwd_ip fw_un.fu_fwd_ip + u_int16_t conn_limit; +} ipfw_insn_limit; /* + * This is used for log instructions + */ +typedef struct _ipfw_insn_log { + ipfw_insn o; + u_int32_t max_log; /* how many do we log -- 0 = all */ + u_int32_t log_left; /* how many left to log */ +} ipfw_insn_log; + +/* + * Here we have the structure representing an ipfw rule. * - * rule_ptr -------------+ - * V - * [ next.le_next ]---->[ next.le_next ]---- [ next.le_next ]---> - * [ next.le_prev ]<----[ next.le_prev ]<----[ next.le_prev ]<--- - * [ body ] [ body ] [ body ] + * It starts with a general area (with link fields and counters) + * followed by an array of one or more instructions, which the code + * accesses as an array of 32-bit values. * + * Given a rule pointer r: + * + * r->cmd is the start of the first instruction. + * ACTION_PTR(r) is the start of the first action (things to do + * once a rule matched). + * + * When assembling instruction, remember the following: + * + * + if a rule has a "keep-state" (or "limit") option, then the + * first instruction (at r->cmd) MUST BE an O_PROBE_STATE + * + if a rule has a "log" option, then the first action + * (at ACTION_PTR(r)) MUST be O_LOG + * + * NOTE: we use a simple linked list of rules because we never need + * to delete a rule without scanning the list. We do not use + * queue(3) macros for portability and readability. */ +struct ip_fw { + struct ip_fw *next; /* linked list of rules */ + u_int16_t act_ofs; /* offset of action in 32-bit units */ + u_int16_t cmd_len; /* # of 32-bit words in cmd */ + u_int16_t rulenum; /* rule number */ + u_int16_t _pad; /* padding */ + + /* These fields are present in all rules. */ + u_int64_t pcnt; /* Packet counter */ + u_int64_t bcnt; /* Byte counter */ + u_int32_t timestamp; /* tv_sec of last match */ + + struct ip_fw *next_rule; /* ptr to next rule */ + + ipfw_insn cmd[1]; /* storage for commands */ +}; + +#define ACTION_PTR(rule) \ + (ipfw_insn *)( (u_int32_t *)((rule)->cmd) + ((rule)->act_ofs) ) + +#define RULESIZE(rule) (sizeof(struct ip_fw) + \ + ((struct ip_fw *)(rule))->cmd_len * 4 - 4) + /* - * Flow mask/flow id for each queue. + * This structure is used as a flow mask and a flow id for various + * parts of the code. */ struct ipfw_flow_id { u_int32_t dst_ip; @@ -191,107 +313,24 @@ struct ipfw_flow_id { /* * dynamic ipfw rule */ -struct ipfw_dyn_rule { - struct ipfw_dyn_rule *next; - struct ipfw_flow_id id; /* (masked) flow id */ - struct ip_fw *rule; /* pointer to rule */ - struct ipfw_dyn_rule *parent; /* pointer to parent rule */ - u_int32_t expire; /* expire time */ - u_int64_t pcnt; /* packet match counters */ - u_int64_t bcnt; /* byte match counters */ - u_int32_t bucket; /* which bucket in hash table */ +typedef struct _ipfw_dyn_rule ipfw_dyn_rule; + +struct _ipfw_dyn_rule { + ipfw_dyn_rule *next; /* linked list of rules. */ + struct ipfw_flow_id id; /* (masked) flow id */ + struct ip_fw *rule; /* pointer to rule */ + ipfw_dyn_rule *parent; /* pointer to parent rule */ + u_int32_t expire; /* expire time */ + u_int64_t pcnt; /* packet match counter */ + u_int64_t bcnt; /* byte match counter */ + u_int32_t bucket; /* which bucket in hash table */ u_int32_t state; /* state of this rule (typically a * combination of TCP flags) */ - u_int16_t dyn_type; /* rule type */ - u_int16_t count; /* refcount */ + u_int16_t dyn_type; /* rule type */ + u_int16_t count; /* refcount */ }; -/* - * Values for "flags" field . - */ -#define IP_FW_F_COMMAND 0x000000ff /* Mask for type of chain entry: */ -#define IP_FW_F_DENY 0x00000000 /* This is a deny rule */ -#define IP_FW_F_REJECT 0x00000001 /* Deny and send a response packet */ -#define IP_FW_F_ACCEPT 0x00000002 /* This is an accept rule */ -#define IP_FW_F_COUNT 0x00000003 /* This is a count rule */ -#define IP_FW_F_DIVERT 0x00000004 /* This is a divert rule */ -#define IP_FW_F_TEE 0x00000005 /* This is a tee rule */ -#define IP_FW_F_SKIPTO 0x00000006 /* This is a skipto rule */ -#define IP_FW_F_FWD 0x00000007 /* This is a "change forwarding - * address" rule - */ -#define IP_FW_F_PIPE 0x00000008 /* This is a dummynet rule */ -#define IP_FW_F_QUEUE 0x00000009 /* This is a dummynet queue */ - -#define IP_FW_F_IN 0x00000100 /* Check inbound packets */ -#define IP_FW_F_OUT 0x00000200 /* Check outbound packets */ -#define IP_FW_F_IIFACE 0x00000400 /* Apply inbound interface test */ -#define IP_FW_F_OIFACE 0x00000800 /* Apply outbound interface test */ -#define IP_FW_F_PRN 0x00001000 /* Print if this rule matches */ -#define IP_FW_F_SRNG 0x00002000 /* The first two src ports are a min - * and max range (stored in host byte - * order). - */ -#define IP_FW_F_DRNG 0x00004000 /* The first two dst ports are a min - * and max range (stored in host byte - * order). - */ -#define IP_FW_F_FRAG 0x00008000 /* Fragment */ -#define IP_FW_F_IIFNAME 0x00010000 /* In interface by name/unit (not IP) */ -#define IP_FW_F_OIFNAME 0x00020000 /* Out interface by name/unit (not IP)*/ -#define IP_FW_F_INVSRC 0x00040000 /* Invert sense of src check */ -#define IP_FW_F_INVDST 0x00080000 /* Invert sense of dst check */ -#define IP_FW_F_ICMPBIT 0x00100000 /* ICMP type bitmap is valid */ -#define IP_FW_F_UID 0x00200000 /* filter by uid */ -#define IP_FW_F_GID 0x00400000 /* filter by gid */ -#define IP_FW_F_RND_MATCH 0x00800000 /* probabilistic rule match */ -#define IP_FW_F_SMSK 0x01000000 /* src-port + mask */ -#define IP_FW_F_DMSK 0x02000000 /* dst-port + mask */ -#define IP_FW_BRIDGED 0x04000000 /* only match bridged packets */ -#define IP_FW_F_KEEP_S 0x08000000 /* keep state */ -#define IP_FW_F_CHECK_S 0x10000000 /* check state */ -#define IP_FW_F_SME 0x20000000 /* source = me */ -#define IP_FW_F_DME 0x40000000 /* destination = me */ -#define IP_FW_F_MAC 0x80000000 /* match MAC header */ - -#define IP_FW_F_MASK 0xFFFFFFFF /* All possible flag bits mask */ - -/* - * Flags for the 'fw_ipflg' field, for comparing values - * of ip and its protocols. - */ -#define IP_FW_IF_TCPOPT 0x00000001 /* tcp options */ -#define IP_FW_IF_TCPFLG 0x00000002 /* tcp flags */ -#define IP_FW_IF_TCPSEQ 0x00000004 /* tcp sequence number */ -#define IP_FW_IF_TCPACK 0x00000008 /* tcp acknowledgement number */ -#define IP_FW_IF_TCPWIN 0x00000010 /* tcp window size */ -#define IP_FW_IF_TCPEST 0x00000020 /* established TCP connection */ -#define IP_FW_IF_TCPMSK 0x0000003f /* mask of all tcp values */ -#define IP_FW_IF_IPOPT 0x00000100 /* ip options */ -#define IP_FW_IF_IPLEN 0x00000200 /* ip length */ -#define IP_FW_IF_IPID 0x00000400 /* ip identification */ -#define IP_FW_IF_IPTOS 0x00000800 /* ip type of service */ -#define IP_FW_IF_IPTTL 0x00001000 /* ip time to live */ -#define IP_FW_IF_IPVER 0x00002000 /* ip version */ -#define IP_FW_IF_IPPRE 0x00004000 /* ip precedence */ -#define IP_FW_IF_IPMSK 0x00007f00 /* mask of all ip values */ -#define IP_FW_IF_MSK 0x0000ffff /* All possible bits mask */ - -/* - * For backwards compatibility with rules specifying "via iface" but - * not restricted to only "in" or "out" packets, we define this combination - * of bits to represent this configuration. - */ - -#define IF_FW_F_VIAHACK (IP_FW_F_IN|IP_FW_F_OUT|IP_FW_F_IIFACE|IP_FW_F_OIFACE) - -/* - * Definitions for REJECT response codes. - * Values less than 256 correspond to ICMP unreachable codes. - */ -#define IP_FW_REJECT_RST 0x0100 /* TCP packets: send RST */ - /* * Definitions for IP option names. */ @@ -309,6 +348,8 @@ struct ipfw_dyn_rule { #define IP_FW_TCPOPT_TS 0x08 #define IP_FW_TCPOPT_CC 0x10 +#define ICMP_REJECT_RST 0x100 /* fake ICMP code (send a TCP RST) */ + /* * Main firewall chains definitions and global var's definitions. */ @@ -319,7 +360,7 @@ struct ipfw_dyn_rule { #define IP_FW_PORT_DENY_FLAG 0x40000 /* - * arguments for calling ip_fw_chk() and dummynet_io(). We put them + * arguments for calling ipfw_chk() and dummynet_io(). We put them * all into a structure because this way it is easier and more * efficient to pass variables around and extend the interface. */ @@ -342,11 +383,13 @@ struct ip_fw_args { /* * Function definitions. */ -void ip_fw_init(void); /* Firewall hooks */ -struct ip; struct sockopt; +struct dn_flow_set; + +void flush_pipe_ptrs(struct dn_flow_set *match); /* used by dummynet */ + typedef int ip_fw_chk_t (struct ip_fw_args *args); typedef int ip_fw_ctl_t (struct sockopt *); extern ip_fw_chk_t *ip_fw_chk_ptr; @@ -356,4 +399,4 @@ extern int fw_enable; #define IPFW_LOADED (ip_fw_chk_ptr != NULL) #endif /* _KERNEL */ -#endif /* _IP_FW_H */ +#endif /* _IPFW2_H */ diff --git a/sys/netinet/ip_fw2.c b/sys/netinet/ip_fw2.c new file mode 100644 index 000000000000..194f62895f48 --- /dev/null +++ b/sys/netinet/ip_fw2.c @@ -0,0 +1,2519 @@ +/* + * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#define DEB(x) +#define DDB(x) x + +/* + * Implement IP packet firewall + */ + +#if !defined(KLD_MODULE) +#include "opt_ipfw.h" +#include "opt_ipdn.h" +#include "opt_ipdivert.h" +#include "opt_inet.h" +#ifndef INET +#error IPFIREWALL requires INET. +#endif /* INET */ +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include /* XXX for ETHERTYPE_IP */ + +static int fw_verbose = 0; +static int verbose_limit = 0; + +#define IPFW_DEFAULT_RULE 65535 + +/* + * list of rules for layer 3 + */ +static struct ip_fw *layer3_chain; + +MALLOC_DEFINE(M_IPFW, "IpFw/IpAcct", "IpFw/IpAcct chain's"); + +static int fw_debug = 1; +int fw_one_pass = 1; +static int autoinc_step = 100; /* bounded to 1..1000 in add_rule() */ + +#ifdef SYSCTL_NODE +SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall"); +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, enable, CTLFLAG_RW, + &fw_enable, 0, "Enable ipfw"); +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, autoinc_step, CTLFLAG_RW, + &autoinc_step, 0, "Rule number autincrement step"); +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO,one_pass,CTLFLAG_RW, + &fw_one_pass, 0, + "Only do a single pass through ipfw when using dummynet(4)"); +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, debug, CTLFLAG_RW, + &fw_debug, 0, "Enable printing of debug ip_fw statements"); +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose, CTLFLAG_RW, + &fw_verbose, 0, "Log matches to ipfw rules"); +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit, CTLFLAG_RW, + &verbose_limit, 0, "Set upper limit of matches of ipfw rules logged"); + +/* + * Description of dynamic rules. + * + * Dynamic rules are stored in lists accessed through a hash table + * (ipfw_dyn_v) whose size is curr_dyn_buckets. This value can + * be modified through the sysctl variable dyn_buckets which is + * updated when the table becomes empty. + * + * XXX currently there is only one list, ipfw_dyn. + * + * When a packet is received, its address fields are first masked + * with the mask defined for the rule, then hashed, then matched + * against the entries in the corresponding list. + * Dynamic rules can be used for different purposes: + * + stateful rules; + * + enforcing limits on the number of sessions; + * + in-kernel NAT (not implemented yet) + * + * The lifetime of dynamic rules is regulated by dyn_*_lifetime, + * measured in seconds and depending on the flags. + * + * The total number of dynamic rules is stored in dyn_count. + * The max number of dynamic rules is dyn_max. When we reach + * the maximum number of rules we do not create anymore. This is + * done to avoid consuming too much memory, but also too much + * time when searching on each packet (ideally, we should try instead + * to put a limit on the length of the list on each bucket...). + * + * Each dynamic rule holds a pointer to the parent ipfw rule so + * we know what action to perform. Dynamic rules are removed when + * the parent rule is deleted. XXX we should make them survive. + * + * There are some limitations with dynamic rules -- we do not + * obey the 'randomized match', and we do not do multiple + * passes through the firewall. XXX check the latter!!! + */ +static ipfw_dyn_rule **ipfw_dyn_v = NULL; +static u_int32_t dyn_buckets = 256; /* must be power of 2 */ +static u_int32_t curr_dyn_buckets = 256; /* must be power of 2 */ + +/* + * Timeouts for various events in handing dynamic rules. + */ +static u_int32_t dyn_ack_lifetime = 300; +static u_int32_t dyn_syn_lifetime = 20; +static u_int32_t dyn_fin_lifetime = 1; +static u_int32_t dyn_rst_lifetime = 1; +static u_int32_t dyn_udp_lifetime = 10; +static u_int32_t dyn_short_lifetime = 5; + +/* + * After reaching 0, dynamic rules are considered still valid for + * an additional grace time, unless there is lack of resources. + * XXX not implemented yet. + */ +static u_int32_t dyn_grace_time = 10; + +static u_int32_t static_count = 0; /* # of static rules */ +static u_int32_t static_len = 0; /* size in bytes of static rules */ +static u_int32_t dyn_count = 0; /* # of dynamic rules */ +static u_int32_t dyn_max = 1000; /* max # of dynamic rules */ + +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_buckets, CTLFLAG_RW, + &dyn_buckets, 0, "Number of dyn. buckets"); +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, curr_dyn_buckets, CTLFLAG_RD, + &curr_dyn_buckets, 0, "Current Number of dyn. buckets"); +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_count, CTLFLAG_RD, + &dyn_count, 0, "Number of dyn. rules"); +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_max, CTLFLAG_RW, + &dyn_max, 0, "Max number of dyn. rules"); +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, static_count, CTLFLAG_RD, + &static_count, 0, "Number of static rules"); +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_ack_lifetime, CTLFLAG_RW, + &dyn_ack_lifetime, 0, "Lifetime of dyn. rules for acks"); +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_syn_lifetime, CTLFLAG_RW, + &dyn_syn_lifetime, 0, "Lifetime of dyn. rules for syn"); +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_fin_lifetime, CTLFLAG_RW, + &dyn_fin_lifetime, 0, "Lifetime of dyn. rules for fin"); +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_rst_lifetime, CTLFLAG_RW, + &dyn_rst_lifetime, 0, "Lifetime of dyn. rules for rst"); +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_udp_lifetime, CTLFLAG_RW, + &dyn_udp_lifetime, 0, "Lifetime of dyn. rules for UDP"); +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_short_lifetime, CTLFLAG_RW, + &dyn_short_lifetime, 0, "Lifetime of dyn. rules for other situations"); +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_grace_time, CTLFLAG_RD, + &dyn_grace_time, 0, "Grace time for dyn. rules"); + +#endif /* SYSCTL_NODE */ + + +static ip_fw_chk_t ipfw_chk; + +ip_dn_ruledel_t *ip_dn_ruledel_ptr = NULL; /* hook into dummynet */ + +/* + * This macro maps an ip pointer into a layer3 header pointer of type T + */ +#define L3HDR(T, ip) ((T *)((u_int32_t *)(ip) + (ip)->ip_hl)) + +static int +icmptype_match(struct ip *ip, ipfw_insn_u32 *cmd) +{ + int type = L3HDR(struct icmp,ip)->icmp_type; + + return (type <= ICMP_MAXTYPE && (cmd->d[0] & (1<icmp_type; + return (type <= ICMP_MAXTYPE && (TT & (1<arg1 or cmd->d[0]. + * + * We scan options and store the bits we find set. We succeed if + * + * (want_set & ~bits) == 0 && (want_clear & ~bits) == want_clear + * + * The code is sometimes optimized not to store additional variables. + */ + +static int +flags_match(ipfw_insn *cmd, u_int8_t bits) +{ + u_char want_clear; + bits = ~bits; + + if ( ((cmd->arg1 & 0xff) & bits) != 0) + return 0; /* some bits we want set were clear */ + want_clear = (cmd->arg1 >> 8) & 0xff; + if ( (want_clear & bits) != want_clear) + return 0; /* some bits we want clear were set */ + return 1; +} + +static int +ipopts_match(struct ip *ip, ipfw_insn *cmd) +{ + int optlen, bits = 0; + u_char *cp = (u_char *)(ip + 1); + int x = (ip->ip_hl << 2) - sizeof (struct ip); + + for (; x > 0; x -= optlen, cp += optlen) { + int opt = cp[IPOPT_OPTVAL]; + + if (opt == IPOPT_EOL) + break; + if (opt == IPOPT_NOP) + optlen = 1; + else { + optlen = cp[IPOPT_OLEN]; + if (optlen <= 0 || optlen > x) + return 0; /* invalid or truncated */ + } + switch (opt) { + + default: + break; + + case IPOPT_LSRR: + bits |= IP_FW_IPOPT_LSRR; + break; + + case IPOPT_SSRR: + bits |= IP_FW_IPOPT_SSRR; + break; + + case IPOPT_RR: + bits |= IP_FW_IPOPT_RR; + break; + + case IPOPT_TS: + bits |= IP_FW_IPOPT_TS; + break; + } + } + return (flags_match(cmd, bits)); +} + +static int +tcpopts_match(struct ip *ip, ipfw_insn *cmd) +{ + int optlen, bits = 0; + struct tcphdr *tcp = L3HDR(struct tcphdr,ip); + u_char *cp = (u_char *)(tcp + 1); + int x = (tcp->th_off << 2) - sizeof(struct tcphdr); + + for (; x > 0; x -= optlen, cp += optlen) { + int opt = cp[0]; + if (opt == TCPOPT_EOL) + break; + if (opt == TCPOPT_NOP) + optlen = 1; + else { + optlen = cp[1]; + if (optlen <= 0) + break; + } + + switch (opt) { + + default: + break; + + case TCPOPT_MAXSEG: + bits |= IP_FW_TCPOPT_MSS; + break; + + case TCPOPT_WINDOW: + bits |= IP_FW_TCPOPT_WINDOW; + break; + + case TCPOPT_SACK_PERMITTED: + case TCPOPT_SACK: + bits |= IP_FW_TCPOPT_SACK; + break; + + case TCPOPT_TIMESTAMP: + bits |= IP_FW_TCPOPT_TS; + break; + + case TCPOPT_CC: + case TCPOPT_CCNEW: + case TCPOPT_CCECHO: + bits |= IP_FW_TCPOPT_CC; + break; + } + } + return (flags_match(cmd, bits)); +} + +/* + * XXX done + */ +static int +iface_match(struct ifnet *ifp, ipfw_insn_if *cmd) +{ + if (ifp == NULL) /* no iface with this packet, match fails */ + return 0; + /* Check by name or by IP address */ + if (cmd->name[0] != '\0') { /* XXX by name */ + /* Check unit number (-1 is wildcard) */ + if (cmd->p.unit != -1 && cmd->p.unit != ifp->if_unit) + return(0); + /* Check name */ + if (!strncmp(ifp->if_name, cmd->name, IFNAMSIZ)) + return(1); + } else { + struct ifaddr *ia; + + TAILQ_FOREACH(ia, &ifp->if_addrhead, ifa_link) { + if (ia->ifa_addr == NULL) + continue; + if (ia->ifa_addr->sa_family != AF_INET) + continue; + if (cmd->p.ip.s_addr == ((struct sockaddr_in *) + (ia->ifa_addr))->sin_addr.s_addr) + return(1); /* match */ + } + } + return(0); /* no match, fail ... */ +} + +static u_int64_t norule_counter; /* counter for ipfw_log(NULL...) */ + +#define SNPARGS(buf, len) buf + len, sizeof(buf) > len ? sizeof(buf) - len : 0 +/* + * We enter here when we have a rule with O_LOG. + */ +static void +ipfw_log(struct ip_fw *f, u_int hlen, struct ether_header *eh, + struct mbuf *m, struct ifnet *oif) +{ + char *action; + char action2[32], proto[47], fragment[27]; + int limit_reached = 0; + + fragment[0] = '\0'; + proto[0] = '\0'; + + if (f == NULL) { /* bogus pkt */ + if (verbose_limit != 0 && norule_counter >= verbose_limit) + return; + norule_counter++; + if (norule_counter == verbose_limit) + limit_reached = verbose_limit; + action = "Refuse"; + } else { /* O_LOG is the first action, find the real one */ + ipfw_insn *cmd = ACTION_PTR(f); + ipfw_insn_log *l = (ipfw_insn_log *)cmd; + + if (l->max_log != 0 && l->log_left == 0) + return; + l->log_left--; + if (l->log_left == 0) + limit_reached = l->max_log; + cmd += F_LEN(cmd); /* point to first action */ + if (cmd->opcode == O_PROB) + cmd += F_LEN(cmd); + + action = action2; + switch (cmd->opcode) { + case O_DENY: + action = "Deny"; + break; + case O_REJECT: + action = (cmd->arg1==ICMP_REJECT_RST) ? + "Reset" : "Unreach"; + break; + case O_ACCEPT: + action = "Accept"; + break; + case O_COUNT: + action = "Count"; + break; + case O_DIVERT: + snprintf(SNPARGS(action2, 0), "Divert %d", + cmd->arg1); + break; + case O_TEE: + snprintf(SNPARGS(action2, 0), "Tee %d", + cmd->arg1); + break; + case O_SKIPTO: + snprintf(SNPARGS(action2, 0), "SkipTo %d", + cmd->arg1); + break; + case O_PIPE: + snprintf(SNPARGS(action2, 0), "Pipe %d", + cmd->arg1); + break; + case O_QUEUE: + snprintf(SNPARGS(action2, 0), "Queue %d", + cmd->arg1); + break; + case O_FORWARD_IP: { + ipfw_insn_sa *sa = (ipfw_insn_sa *)cmd; + + if (sa->sa.sin_port) + snprintf(SNPARGS(action2, 0), + "Forward to %s:%d", + inet_ntoa(sa->sa.sin_addr), + sa->sa.sin_port); + else + snprintf(SNPARGS(action2, 0), "Forward to %s", + inet_ntoa(sa->sa.sin_addr)); + } + break; + default: + action = "UNKNOWN"; + break; + } + } + + if (hlen == 0) { /* non-ip */ + snprintf(SNPARGS(proto, 0), "MAC"); + } else { + struct ip *ip = mtod(m, struct ip *); + /* these three are all aliases to the same thing */ + struct icmp *const icmp = L3HDR(struct icmp, ip); + struct tcphdr *const tcp = (struct tcphdr *)icmp; + struct udphdr *const udp = (struct udphdr *)icmp; + + int ip_off, offset, ip_len; + + int len; + + if (eh != NULL) { /* layer 2 packets are as on the wire */ + ip_off = ntohs(ip->ip_off); + ip_len = ntohs(ip->ip_len); + } else { + ip_off = ip->ip_off; + ip_len = ip->ip_len; + } + offset = ip_off & IP_OFFMASK; + switch (ip->ip_p) { + case IPPROTO_TCP: + len = snprintf(SNPARGS(proto, 0), "TCP %s", + inet_ntoa(ip->ip_src)); + if (offset == 0) + len += snprintf(SNPARGS(proto, len), ":%d ", + ntohs(tcp->th_sport)); + else + len += snprintf(SNPARGS(proto, len), " "); + len += snprintf(SNPARGS(proto, len), "%s", + inet_ntoa(ip->ip_dst)); + if (offset == 0) + snprintf(SNPARGS(proto, len), ":%d", + ntohs(tcp->th_dport)); + break; + + case IPPROTO_UDP: + len = snprintf(SNPARGS(proto, 0), "UDP %s", + inet_ntoa(ip->ip_src)); + if (offset == 0) + len += snprintf(SNPARGS(proto, len), ":%d ", + ntohs(udp->uh_sport)); + else + len += snprintf(SNPARGS(proto, len), " "); + len += snprintf(SNPARGS(proto, len), "%s", + inet_ntoa(ip->ip_dst)); + if (offset == 0) + snprintf(SNPARGS(proto, len), ":%d", + ntohs(udp->uh_dport)); + break; + + case IPPROTO_ICMP: + if (offset == 0) + len = snprintf(SNPARGS(proto, 0), + "ICMP:%u.%u ", + icmp->icmp_type, icmp->icmp_code); + else + len = snprintf(SNPARGS(proto, 0), "ICMP "); + len += snprintf(SNPARGS(proto, len), "%s", + inet_ntoa(ip->ip_src)); + snprintf(SNPARGS(proto, len), " %s", + inet_ntoa(ip->ip_dst)); + break; + + default: + len = snprintf(SNPARGS(proto, 0), "P:%d %s", ip->ip_p, + inet_ntoa(ip->ip_src)); + snprintf(SNPARGS(proto, len), " %s", + inet_ntoa(ip->ip_dst)); + break; + } + + if (ip_off & (IP_MF | IP_OFFMASK)) + snprintf(SNPARGS(fragment, 0), " (frag %d:%d@%d%s)", + ntohs(ip->ip_id), ip_len - (ip->ip_hl << 2), + offset << 3, + (ip_off & IP_MF) ? "+" : ""); + } + if (oif || m->m_pkthdr.rcvif) + log(LOG_SECURITY | LOG_INFO, + "ipfw: %d %s %s %s via %s%d%s\n", + f ? f->rulenum : -1, + action, proto, oif ? "out" : "in", + oif ? oif->if_name : m->m_pkthdr.rcvif->if_name, + oif ? oif->if_unit : m->m_pkthdr.rcvif->if_unit, + fragment); + else + log(LOG_SECURITY | LOG_INFO, + "ipfw: %d %s %s [no if info]%s\n", + f ? f->rulenum : -1, + action, proto, fragment); + if (limit_reached) + log(LOG_SECURITY | LOG_NOTICE, + "ipfw: limit %d reached on entry %d\n", + limit_reached, f ? f->rulenum : -1); +} + +/* + * IMPORTANT: the hash function for dynamic rules must be commutative + * in * source and destination (ip,port), because rules are bidirectional + * and we want to find both in the same bucket. + */ +static __inline int +hash_packet(struct ipfw_flow_id *id) +{ + u_int32_t i; + + i = (id->dst_ip) ^ (id->src_ip) ^ (id->dst_port) ^ (id->src_port); + i &= (curr_dyn_buckets - 1); + return i; +} + +/** + * unlink a dynamic rule from a chain. prev is a pointer to + * the previous one, q is a pointer to the rule to delete, + * head is a pointer to the head of the queue. + * Modifies q and potentially also head. + */ +#define UNLINK_DYN_RULE(prev, head, q) { \ + ipfw_dyn_rule *old_q = q; \ + \ + /* remove a refcount to the parent */ \ + if (q->dyn_type == O_LIMIT) \ + q->parent->count--; \ + DEB(printf("-- unlink entry 0x%08x %d -> 0x%08x %d, %d left\n", \ + (q->id.src_ip), (q->id.src_port), \ + (q->id.dst_ip), (q->id.dst_port), dyn_count-1 ); ) \ + if (prev != NULL) \ + prev->next = q = q->next; \ + else \ + head = q = q->next; \ + dyn_count--; \ + free(old_q, M_IPFW); } + +#define TIME_LEQ(a,b) ((int)((a)-(b)) <= 0) + +/** + * Remove dynamic rules pointing to "rule", or all of them if rule == NULL. + * + * If keep_me == NULL, rules are deleted even if not expired, + * otherwise only expired rules are removed. + * + * The value of the second parameter is also used to point to identify + * a rule we absolutely do not want to remove (e.g. because we are + * holding a reference to it -- this is the case with O_LIMIT_PARENT + * rules). The pointer is only used for comparison, so any non-null + * value will do. + */ +static void +remove_dyn_rule(struct ip_fw *rule, ipfw_dyn_rule *keep_me) +{ + static u_int32_t last_remove = 0; + +#define FORCE (keep_me == NULL) + + ipfw_dyn_rule *prev, *q; + int i, pass = 0, max_pass = 0; + + if (ipfw_dyn_v == NULL || dyn_count == 0) + return; + /* do not expire more than once per second, it is useless */ + if (!FORCE && last_remove == time_second) + return; + last_remove = time_second; + + /* + * because O_LIMIT refer to parent rules, during the first pass only + * remove child and mark any pending LIMIT_PARENT, and remove + * them in a second pass. + */ +next_pass: + for (i = 0 ; i < curr_dyn_buckets ; i++) { + for (prev=NULL, q = ipfw_dyn_v[i] ; q ; ) { + /* + * Logic can become complex here, so we split tests. + */ + if (q == keep_me) + goto next; + if (rule != NULL && rule != q->rule) + goto next; /* not the one we are looking for */ + if (q->dyn_type == O_LIMIT_PARENT) { + /* + * handle parent in the second pass, + * record we need one. + */ + max_pass = 1; + if (pass == 0) + goto next; + if (FORCE && q->count != 0 ) { + /* XXX should not happen! */ + printf( "OUCH! cannot remove rule," + " count %d\n", q->count); + } + } else { + if (!FORCE && + !TIME_LEQ( q->expire, time_second )) + goto next; + } + UNLINK_DYN_RULE(prev, ipfw_dyn_v[i], q); + continue; +next: + prev=q; + q=q->next; + } + } + if (pass++ < max_pass) + goto next_pass; +} + + +/** + * lookup a dynamic rule. + */ +static ipfw_dyn_rule * +lookup_dyn_rule(struct ipfw_flow_id *pkt, int *match_direction) +{ + /* + * stateful ipfw extensions. + * Lookup into dynamic session queue + */ +#define MATCH_REVERSE 0 +#define MATCH_FORWARD 1 +#define MATCH_NONE 2 +#define MATCH_UNKNOWN 3 + int i, dir = MATCH_NONE; + ipfw_dyn_rule *prev, *q=NULL; + + if (ipfw_dyn_v == NULL) + goto done; /* not found */ + i = hash_packet( pkt ); + for (prev=NULL, q = ipfw_dyn_v[i] ; q != NULL ; ) { + if (q->dyn_type == O_LIMIT_PARENT) + goto next; + if (TIME_LEQ( q->expire, time_second)) { /* expire entry */ + UNLINK_DYN_RULE(prev, ipfw_dyn_v[i], q); + continue; + } + if ( pkt->proto == q->id.proto) { + if (pkt->src_ip == q->id.src_ip && + pkt->dst_ip == q->id.dst_ip && + pkt->src_port == q->id.src_port && + pkt->dst_port == q->id.dst_port ) { + dir = MATCH_FORWARD; + break; + } + if (pkt->src_ip == q->id.dst_ip && + pkt->dst_ip == q->id.src_ip && + pkt->src_port == q->id.dst_port && + pkt->dst_port == q->id.src_port ) { + dir = MATCH_REVERSE; + break; + } + } +next: + prev = q; + q = q->next; + } + if (q == NULL) + goto done; /* q = NULL, not found */ + + if ( prev != NULL) { /* found and not in front */ + prev->next = q->next; + q->next = ipfw_dyn_v[i]; + ipfw_dyn_v[i] = q; + } + if (pkt->proto == IPPROTO_TCP) { /* update state according to flags */ + u_char flags = pkt->flags & (TH_FIN|TH_SYN|TH_RST); + +#define BOTH_SYN (TH_SYN | (TH_SYN << 8)) +#define BOTH_FIN (TH_FIN | (TH_FIN << 8)) + q->state |= (dir == MATCH_FORWARD ) ? flags : (flags << 8); + switch (q->state) { + case TH_SYN: /* opening */ + q->expire = time_second + dyn_syn_lifetime; + break; + case BOTH_SYN: /* move to established */ + q->expire = time_second + dyn_ack_lifetime; + break; + case BOTH_SYN | TH_FIN : /* one side tries to close */ + case BOTH_SYN | (TH_FIN << 8) : + q->expire = time_second + dyn_ack_lifetime; + break; + case BOTH_SYN | BOTH_FIN: /* both sides closed */ + q->expire = time_second + dyn_fin_lifetime; + break; + default: +#if 0 + /* + * reset or some invalid combination, but can also + * occur if we use keep-state the wrong way. + */ + if ( (q->state & ((TH_RST << 8)|TH_RST)) == 0) + printf("invalid state: 0x%x\n", q->state); +#endif + q->expire = time_second + dyn_rst_lifetime; + break; + } + } else if (pkt->proto == IPPROTO_UDP) { + q->expire = time_second + dyn_udp_lifetime; + } else { + /* other protocols */ + q->expire = time_second + dyn_short_lifetime; + } +done: + if (match_direction) + *match_direction = dir; + return q; +} + +static void +realloc_dynamic_table(void) +{ + /* try reallocation, make sure we have a power of 2 */ + + if ((dyn_buckets & (dyn_buckets-1)) != 0) { /* not a power of 2 */ + dyn_buckets = curr_dyn_buckets; /* reset */ + return; + } + curr_dyn_buckets = dyn_buckets; + if (ipfw_dyn_v != NULL) + free(ipfw_dyn_v, M_IPFW); + ipfw_dyn_v = malloc(curr_dyn_buckets * sizeof(ipfw_dyn_rule *), + M_IPFW, M_DONTWAIT | M_ZERO); +} + +/** + * Install state of type 'type' for a dynamic session. + * The hash table contains two type of rules: + * - regular rules (O_KEEP_STATE) + * - rules for sessions with limited number of sess per user + * (O_LIMIT). When they are created, the parent is + * increased by 1, and decreased on delete. In this case, + * the third parameter is the parent rule and not the chain. + * - "parent" rules for the above (O_LIMIT_PARENT). + */ +static ipfw_dyn_rule * +add_dyn_rule(struct ipfw_flow_id *id, u_int8_t dyn_type, struct ip_fw *rule) +{ + ipfw_dyn_rule *r; + int i; + + if (ipfw_dyn_v == NULL || + (dyn_count == 0 && dyn_buckets != curr_dyn_buckets)) { + realloc_dynamic_table(); + if (ipfw_dyn_v == NULL) + return NULL; /* failed ! */ + } + i = hash_packet(id); + + r = malloc(sizeof *r, M_IPFW, M_DONTWAIT | M_ZERO); + if (r == NULL) { + printf ("sorry cannot allocate state\n"); + return NULL; + } + + /* increase refcount on parent, and set pointer */ + if (dyn_type == O_LIMIT) { + ipfw_dyn_rule *parent = (ipfw_dyn_rule *)rule; + if ( parent->dyn_type != O_LIMIT_PARENT) + panic("invalid parent"); + parent->count++; + r->parent = parent; + rule = parent->rule; + } + + r->id = *id; + r->expire = time_second + dyn_syn_lifetime; + r->rule = rule; + r->dyn_type = dyn_type; + r->pcnt = r->bcnt = 0; + r->count = 0; + + r->bucket = i; + r->next = ipfw_dyn_v[i]; + ipfw_dyn_v[i] = r; + dyn_count++; + DEB(printf("-- add dyn entry ty %d 0x%08x %d -> 0x%08x %d, total %d\n", + dyn_type, + (r->id.src_ip), (r->id.src_port), + (r->id.dst_ip), (r->id.dst_port), + dyn_count ); ) + return r; +} + +/** + * lookup dynamic parent rule using pkt and rule as search keys. + * If the lookup fails, then install one. + */ +static ipfw_dyn_rule * +lookup_dyn_parent(struct ipfw_flow_id *pkt, struct ip_fw *rule) +{ + ipfw_dyn_rule *q; + int i; + + if (ipfw_dyn_v) { + i = hash_packet( pkt ); + for (q = ipfw_dyn_v[i] ; q != NULL ; q=q->next) + if (q->dyn_type == O_LIMIT_PARENT && + rule== q->rule && + pkt->proto == q->id.proto && + pkt->src_ip == q->id.src_ip && + pkt->dst_ip == q->id.dst_ip && + pkt->src_port == q->id.src_port && + pkt->dst_port == q->id.dst_port) { + q->expire = time_second + dyn_short_lifetime; + DEB(printf("lookup_dyn_parent found 0x%p\n",q);) + return q; + } + } + return add_dyn_rule(pkt, O_LIMIT_PARENT, rule); +} + +/** + * Install dynamic state for rule type cmd->o.opcode + * + * Returns 1 (failure) if state is not installed because of errors or because + * session limitations are enforced. + */ +static int +install_state(struct ip_fw *rule, ipfw_insn_limit *cmd, + struct ip_fw_args *args) +{ + static int last_log; + + ipfw_dyn_rule *q; + + DEB(printf("-- install state type %d 0x%08x %u -> 0x%08x %u\n", + cmd->o.opcode, + (args->f_id.src_ip), (args->f_id.src_port), + (args->f_id.dst_ip), (args->f_id.dst_port) );) + + q = lookup_dyn_rule(&args->f_id, NULL); + + if (q != NULL) { /* should never occur */ + if (last_log != time_second) { + last_log = time_second; + printf(" install_state: entry already present, done\n"); + } + return 0; + } + + if (dyn_count >= dyn_max) + /* + * Run out of slots, try to remove any expired rule. + */ + remove_dyn_rule(NULL, (ipfw_dyn_rule *)1); + + if (dyn_count >= dyn_max) { + if (last_log != time_second) { + last_log = time_second; + printf("install_state: Too many dynamic rules\n"); + } + return 1; /* cannot install, notify caller */ + } + + switch (cmd->o.opcode) { + case O_KEEP_STATE: /* bidir rule */ + add_dyn_rule(&args->f_id, O_KEEP_STATE, rule); + break; + + case O_LIMIT: /* limit number of sessions */ + { + u_int16_t limit_mask = cmd->limit_mask; + struct ipfw_flow_id id; + ipfw_dyn_rule *parent; + + DEB(printf("installing dyn-limit rule %d\n", cmd->conn_limit);) + + id.dst_ip = id.src_ip = 0; + id.dst_port = id.src_port = 0; + id.proto = args->f_id.proto; + + if (limit_mask & DYN_SRC_ADDR) + id.src_ip = args->f_id.src_ip; + if (limit_mask & DYN_DST_ADDR) + id.dst_ip = args->f_id.dst_ip; + if (limit_mask & DYN_SRC_PORT) + id.src_port = args->f_id.src_port; + if (limit_mask & DYN_DST_PORT) + id.dst_port = args->f_id.dst_port; + parent = lookup_dyn_parent(&id, rule); + if (parent == NULL) { + printf("add parent failed\n"); + return 1; + } + if (parent->count >= cmd->conn_limit) { + /* + * See if we can remove some expired rule. + */ + remove_dyn_rule(rule, parent); + if (parent->count >= cmd->conn_limit) { + if (fw_verbose && last_log != time_second) { + last_log = time_second; + printf( + "drop session, too many entries\n"); + } + return 1; + } + } + add_dyn_rule(&args->f_id, O_LIMIT, (struct ip_fw *)parent); + } + break; + default: + printf("unknown dynamic rule type %u\n", cmd->o.opcode); + return 1; + } + lookup_dyn_rule(&args->f_id, NULL); /* XXX just set the lifetime */ + return 0; +} + +/* + * sends a reject message, consuming the mbuf passed as an argument. + */ +static void +send_reject(struct mbuf *m, int code, int offset, int ip_len) +{ + if (code != ICMP_REJECT_RST) /* Send an ICMP unreach */ + icmp_error(m, ICMP_UNREACH, code, 0L, 0); + else { + /* XXX warning, this code writes into the mbuf */ + struct ip *ip = mtod(m, struct ip *); + struct tcphdr *const tcp = L3HDR(struct tcphdr, ip); + struct tcpiphdr ti, *const tip = (struct tcpiphdr *) ip; + int hlen = ip->ip_hl << 2; + + if (offset != 0 || (tcp->th_flags & TH_RST)) { + m_freem(m); /* free the mbuf */ + return; + } + ti.ti_i = *((struct ipovly *) ip); + ti.ti_t = *tcp; + bcopy(&ti, ip, sizeof(ti)); + tip->ti_seq = ntohl(tip->ti_seq); + tip->ti_ack = ntohl(tip->ti_ack); + tip->ti_len = ip_len - hlen - (tip->ti_off << 2); + if (tcp->th_flags & TH_ACK) { + tcp_respond(NULL, (void *)ip, tcp, m, + 0, tcp->th_ack, TH_RST); + } else { + if (tcp->th_flags & TH_SYN) + tip->ti_len++; + tcp_respond(NULL, (void *)ip, tcp, m, + tip->ti_seq + tip->ti_len, 0, TH_RST|TH_ACK); + } + } +} + +/** + * + * Given an ip_fw *, lookup_next_rule will return a pointer + * to the next rule, which can be either the jump + * target (for skipto instructions) or the next one in the list (in + * all other cases including a missing jump target). + * The result is also written in the "next_rule" field of the rule. + * Backward jumps are not allowed, so start looking from the next + * rule... + * + * This never returns NULL -- in case we do not have an exact match, + * the next rule is returned. When the ruleset is changed, + * pointers are flushed so we are always correct. + */ + +static struct ip_fw * +lookup_next_rule(struct ip_fw *me) +{ + struct ip_fw *rule = NULL; + ipfw_insn *cmd; + + /* look for action, in case it is a skipto */ + cmd = ACTION_PTR(me); + if ( cmd->opcode == O_SKIPTO ) + for (rule = me->next; rule ; rule = rule->next) + if (rule->rulenum >= cmd->arg1) + break; + if (rule == NULL) /* failure or not a skipto */ + rule = me->next; + me->next_rule = rule; + return rule; +} + +/* + * The main check routine for the firewall. + * + * All arguments are in args so we can modify them and return them + * back to the caller. + * + * Parameters: + * + * args->m (in/out) The packet; we set to NULL when/if we nuke it. + * Starts with the IP header. + * args->eh (in) Mac header if present, or NULL for layer3 packet. + * args->oif Outgoing interface, or NULL if packet is incoming. + * The incoming interface is in the mbuf. (in) + * args->divert_rule (in/out) + * Skip up to the first rule past this rule number; + * upon return, non-zero port number for divert or tee. + * + * args->rule Pointer to the last matching rule (in/out) + * args->next_hop Socket we are forwarding to (out). + * args->f_id Addresses grabbed from the packet (out) + * + * Return value: + * + * IP_FW_PORT_DENY_FLAG the packet must be dropped. + * 0 The packet is to be accepted and routed normally OR + * the packet was denied/rejected and has been dropped; + * in the latter case, *m is equal to NULL upon return. + * port Divert the packet to port, with these caveats: + * + * - If IP_FW_PORT_TEE_FLAG is set, tee the packet instead + * of diverting it (ie, 'ipfw tee'). + * + * - If IP_FW_PORT_DYNT_FLAG is set, interpret the lower + * 16 bits as a dummynet pipe number instead of diverting + */ + +static int +ipfw_chk(struct ip_fw_args *args) +{ + /* + * Local variables hold state during the processing of a packet. + * + * IMPORTANT NOTE: to speed up the processing of rules, there + * are some assumption on the values of the variables, which + * are documented here. Should you change them, please check + * the implementation of the various instructions to make sure + * that they still work. + */ + /* + * args->eh The MAC header. It is non-null for a layer2 + * packet, it is NULL for a layer-3 packet. + * + * m | args->m Pointer to the mbuf, as received from the caller. + * It may change if ipfw_chk() does an m_pullup, or if it + * consumes the packet because it calls send_reject(). + * XXX This has to change, so that ipfw_chk() never modifies + * or consumes the buffer. + * ip is simply an alias of the value of m, and it is kept + * in sync with it (the packet is supposed to start with + * the ip header). + */ + struct mbuf *m = args->m; + struct ip *ip = mtod(m, struct ip *); + + /* + * oif | args->oif If NULL, ipfw_chk has been called on the + * inbound path (ether_input, bdg_forward, ip_input). + * If non-NULL, ipfw_chk has been called on the outbound path + * (ether_output, ip_output). + */ + struct ifnet *oif = args->oif; + + struct ip_fw *f = NULL; /* matching rule */ + int retval = 0; + + /* + * hlen The length of the IPv4 header. + * hlen >0 means we have an IPv4 packet. + */ + u_int hlen = 0; /* hlen >0 means we have an IP pkt */ + + /* + * offset The offset of a fragment. offset != 0 means that + * we have a fragment at this offset of an IPv4 packet. + * offset == 0 means that (if this is an IPv4 packet) + * this is the first or only fragment. + */ + u_short offset = 0; + + /* + * Local copies of addresses. They are only valid if we have + * an IP packet. + * + * proto The protocol. Set to 0 for non-ip packets, + * or to the protocol read from the packet otherwise. + * proto != 0 means that we have an IPv4 packet. + * + * src_port, dst_port port numbers, in HOST format. Only + * valid for TCP and UDP packets. + * + * src_ip, dst_ip ip addresses, in NETWORK format. + * Only valid for IPv4 packets. + */ + u_int8_t proto; + u_int16_t src_port = 0, dst_port = 0; /* NOTE: host format */ + struct in_addr src_ip, dst_ip; /* NOTE: network format */ + u_int16_t ip_len=0; + int dyn_dir = MATCH_UNKNOWN; + ipfw_dyn_rule *q = NULL; + + /* + * dyn_dir = MATCH_UNKNOWN when rules unchecked, + * MATCH_NONE when checked and not matched (q = NULL), + * MATCH_FORWARD or MATCH_REVERSE otherwise (q != NULL) + */ + + if (args->eh == NULL || /* layer 3 packet */ + ( m->m_pkthdr.len >= sizeof(struct ip) && + ntohs(args->eh->ether_type) == ETHERTYPE_IP)) + hlen = ip->ip_hl << 2; + + /* + * Collect parameters into local variables for faster matching. + */ + if (hlen == 0) { /* do not grab addresses for non-ip pkts */ + proto = args->f_id.proto = 0; /* mark f_id invalid */ + goto after_ip_checks; + } + + proto = args->f_id.proto = ip->ip_p; + src_ip = ip->ip_src; + dst_ip = ip->ip_dst; + if (args->eh != NULL) { /* layer 2 packets are as on the wire */ + offset = ntohs(ip->ip_off) & IP_OFFMASK; + ip_len = ntohs(ip->ip_len); + } else { + offset = ip->ip_off & IP_OFFMASK; + ip_len = ip->ip_len; + } + +#define PULLUP_TO(len) \ + do { \ + if ((m)->m_len < (len)) { \ + args->m = m = m_pullup(m, (len)); \ + if (m == 0) \ + goto pullup_failed; \ + ip = mtod(m, struct ip *); \ + } \ + } while (0) + + if (offset == 0) { + switch (proto) { + case IPPROTO_TCP: + { + struct tcphdr *tcp; + + PULLUP_TO(hlen + sizeof(struct tcphdr)); + tcp = L3HDR(struct tcphdr, ip); + dst_port = tcp->th_dport; + src_port = tcp->th_sport; + args->f_id.flags = tcp->th_flags; + } + break; + + case IPPROTO_UDP: + { + struct udphdr *udp; + + PULLUP_TO(hlen + sizeof(struct udphdr)); + udp = L3HDR(struct udphdr, ip); + dst_port = udp->uh_dport; + src_port = udp->uh_sport; + } + break; + + case IPPROTO_ICMP: + PULLUP_TO(hlen + 4); /* type, code and checksum. */ + args->f_id.flags = L3HDR(struct icmp, ip)->icmp_type; + break; + + default: + break; + } +#undef PULLUP_TO + } + + args->f_id.src_ip = ntohl(src_ip.s_addr); + args->f_id.dst_ip = ntohl(dst_ip.s_addr); + args->f_id.src_port = src_port = ntohs(src_port); + args->f_id.dst_port = dst_port = ntohs(dst_port); + +after_ip_checks: + if (args->rule) { + /* + * Packet has already been tagged. Look for the next rule + * to restart processing. + * + * If fw_one_pass != 0 then just accept it. + * XXX should not happen here, but optimized out in + * the caller. + */ + if (fw_one_pass) + return 0; + + f = args->rule->next_rule; + if (f == NULL) + f = lookup_next_rule(args->rule); + } else { + /* + * Find the starting rule. It can be either the first + * one, or the one after divert_rule if asked so. + */ + int skipto = args->divert_rule; + + f = layer3_chain; + if (args->eh == NULL && skipto != 0) { + if (skipto >= IPFW_DEFAULT_RULE) + return(IP_FW_PORT_DENY_FLAG); /* invalid */ + while (f && f->rulenum <= skipto) + f = f->next; + if (f == NULL) /* drop packet */ + return(IP_FW_PORT_DENY_FLAG); + } + } + args->divert_rule = 0; /* reset to avoid confusion later */ + + /* + * Now scan the rules, and parse microinstructions for each rule. + */ + for (; f; f = f->next) { + int l, cmdlen; + ipfw_insn *cmd; + int skip_or; /* skip rest of OR block */ + +again: + skip_or = 0; + for (l = f->cmd_len, cmd = f->cmd ; l > 0 ; + l -= cmdlen, cmd += cmdlen) { + + /* + * check_body is a jump target used when we find a + * CHECK_STATE, and need to jump to the body of + * the target rule. + */ + +check_body: + cmdlen = F_LEN(cmd); + /* + * An OR block (insn_1 || .. || insn_n) has the + * F_OR bit set in all but the last instruction. + * The first match will set "skip_or", and cause + * the following instructions to be skipped until + * past the one with the F_OR bit clear. + */ + if (skip_or) { /* skip this instruction */ + if ((cmd->len & F_OR) == 0) + skip_or = 0; /* next one is good */ + continue; + } + switch (cmd->opcode) { + case O_NOP: + goto cmd_match; /* That's easy */ + + case O_IPPRE: + case O_FORWARD_MAC: + printf("ipfw: opcode %d unimplemented\n", + cmd->opcode); + goto cmd_fail; + + case O_GID: + case O_UID: + /* + * We only check offset == 0 && proto != 0, + * as this ensures that we have an IPv4 + * packet with the ports info. + */ + if (offset!=0) + goto cmd_fail; + { + struct inpcbinfo *pi; + int wildcard; + struct inpcb *pcb; + + if (proto == IPPROTO_TCP) { + wildcard = 0; + pi = &tcbinfo; + } else if (proto == IPPROTO_UDP) { + wildcard = 1; + pi = &udbinfo; + } else + goto cmd_fail; + + pcb = (oif) ? + in_pcblookup_hash(pi, + dst_ip, htons(dst_port), + src_ip, htons(src_port), + wildcard, oif) : + in_pcblookup_hash(pi, + src_ip, htons(src_port), + dst_ip, htons(dst_port), + wildcard, NULL); + + if (pcb == NULL || pcb->inp_socket == NULL) + goto cmd_fail; + if (cmd->opcode == O_UID) { +#if __FreeBSD_version >= 500034 + if (socheckuid(pcb->inp_socket, + (uid_t)((ipfw_insn_u32 *)cmd)->d[0] + )) +#else + if (pcb->inp_socket->so_cred->cr_uid != + (uid_t)((ipfw_insn_u32 *)cmd)->d[0]) +#endif + goto cmd_match; + } else { + if (groupmember( + (uid_t)((ipfw_insn_u32 *)cmd)->d[0], + pcb->inp_socket->so_cred)) + goto cmd_match; + } + } + goto cmd_fail; + + case O_RECV: + if (iface_match(m->m_pkthdr.rcvif, + (ipfw_insn_if *)cmd)) + goto cmd_match; + goto cmd_fail; + + case O_XMIT: + if (iface_match(oif, (ipfw_insn_if *)cmd)) + goto cmd_match; + goto cmd_fail; + + case O_VIA: + if (iface_match(oif ? oif : m->m_pkthdr.rcvif, + (ipfw_insn_if *)cmd)) + goto cmd_match; + goto cmd_fail; + + case O_MACADDR2: + if (args->eh != NULL) { /* have MAC header */ + u_int32_t *want = (u_int32_t *) + ((ipfw_insn_mac *)cmd)->addr; + u_int32_t *mask = (u_int32_t *) + ((ipfw_insn_mac *)cmd)->mask; + u_int32_t *hdr = (u_int32_t *)args->eh; + + if ( want[0] == (hdr[0] & mask[0]) && + want[1] == (hdr[1] & mask[1]) && + want[2] == (hdr[2] & mask[2]) ) + goto cmd_match; + } + goto cmd_fail; + + case O_MAC_TYPE: + if (args->eh != NULL) { + u_int16_t type = + ntohs(args->eh->ether_type); + u_int16_t *p = + ((ipfw_insn_u16 *)cmd)->ports; + int i; + + for (i = cmdlen - 1; i>0; i--) + if (type>=p[0] && type<=p[1]) + goto cmd_match; + else + p += 2; + } + goto cmd_fail; + + case O_FRAG: + /* XXX check this -- MF bit ? */ + if (hlen == 0 || offset != 0) + goto cmd_fail; + goto cmd_match; + + case O_IN: /* "out" is "not in" */ + if (oif != NULL) + goto cmd_fail; + goto cmd_match; + + case O_LAYER2: + if (args->eh == NULL) + goto cmd_fail; + goto cmd_match; + + case O_PROTO: + /* + * We do not allow an arg of 0 so the + * check of "proto" only suffices. + */ + if (proto == cmd->arg1) + goto cmd_match; + goto cmd_fail; + + case O_IP_SRC: + if (hlen > 0 && + ((ipfw_insn_ip *)cmd)->addr.s_addr == + src_ip.s_addr) + goto cmd_match; + goto cmd_fail; + + case O_IP_SRC_MASK: + if (hlen > 0 && + ((ipfw_insn_ip *)cmd)->addr.s_addr == + (src_ip.s_addr & + ((ipfw_insn_ip *)cmd)->mask.s_addr)) + goto cmd_match; + goto cmd_fail; + + case O_IP_SRC_ME: + if (hlen == 0) + goto cmd_fail; + { + struct ifnet *tif; + + + INADDR_TO_IFP(src_ip, tif); + if (tif != NULL) + goto cmd_match; + } + goto cmd_fail; + + case O_IP_DST_SET: + case O_IP_SRC_SET: + if (hlen == 0) + goto cmd_fail; + { + u_int32_t *d = (u_int32_t *)(cmd+1); + u_int32_t a = + cmd->opcode == O_IP_DST_SET ? + args->f_id.src_ip : args->f_id.dst_ip; + + if (a < d[0]) + goto cmd_fail; + a -= d[0]; + if (a >= cmd->arg1) + goto cmd_fail; + if (d[ 1 + (a>>5)] & (1<<(a & 0x1f)) ) + goto cmd_match; + } + goto cmd_fail; + + case O_IP_DST: + if (hlen > 0 && + ((ipfw_insn_ip *)cmd)->addr.s_addr == + dst_ip.s_addr) + goto cmd_match; + goto cmd_fail; + + case O_IP_DST_MASK: + if (hlen == 0) + goto cmd_fail; + if (((ipfw_insn_ip *)cmd)->addr.s_addr == + (dst_ip.s_addr & + ((ipfw_insn_ip *)cmd)->mask.s_addr)) + goto cmd_match; + goto cmd_fail; + + case O_IP_DST_ME: + if (hlen == 0) + goto cmd_fail; + { + struct ifnet *tif; + INADDR_TO_IFP(dst_ip, tif); + if (tif != NULL) + goto cmd_match; + } + goto cmd_fail; + + case O_IP_SRCPORT: + case O_IP_DSTPORT: + /* + * offset == 0 && proto != 0 is enough + * to guarantee that we have an IPv4 + * packet with port info. + */ + if (offset != 0) + goto cmd_fail; + if (proto==IPPROTO_UDP || + proto==IPPROTO_TCP) { + u_int16_t port = + (cmd->opcode == O_IP_SRCPORT) ? + src_port : dst_port ; + u_int16_t *p = + ((ipfw_insn_u16 *)cmd)->ports; + int i; + + for (i = cmdlen - 1; i>0; i--) + if (port>=p[0] && port<=p[1]) + goto cmd_match; + else + p += 2; + } + goto cmd_fail; + + case O_ICMPTYPE: + if (offset > 0 || + proto != IPPROTO_ICMP || + !icmptype_match(ip, (ipfw_insn_u32 *)cmd) ) + goto cmd_fail; + goto cmd_match; + + case O_IPOPT: + if (hlen == 0 || + !ipopts_match(ip, cmd) ) + goto cmd_fail; + goto cmd_match; + + case O_IPVER: + if (hlen == 0 || cmd->arg1 != ip->ip_v) + goto cmd_fail; + goto cmd_match; + + case O_IPTTL: + if (hlen == 0 || cmd->arg1 != ip->ip_ttl) + goto cmd_fail; + goto cmd_match; + + case O_IPID: + if (hlen == 0 || cmd->arg1 != ntohs(ip->ip_id)) + goto cmd_fail; + goto cmd_match; + + case O_IPLEN: + if (hlen == 0 || cmd->arg1 != ip_len) + goto cmd_fail; + goto cmd_match; + + case O_IPTOS: + if (hlen == 0 || + !flags_match(cmd, ip->ip_tos)) + goto cmd_fail; + goto cmd_match; + + case O_TCPFLAGS: + if (proto != IPPROTO_TCP || + offset > 0 || + !flags_match(cmd, + L3HDR(struct tcphdr,ip)->th_flags)) + goto cmd_fail; + goto cmd_match; + + case O_TCPOPTS: + if (proto != IPPROTO_TCP || + offset > 0 || + !tcpopts_match(ip, cmd)) + goto cmd_fail; + goto cmd_match; + + case O_TCPSEQ: + if (proto != IPPROTO_TCP || offset > 0 || + ((ipfw_insn_u32 *)cmd)->d[0] != + L3HDR(struct tcphdr,ip)->th_seq) + goto cmd_fail; + goto cmd_match; + + case O_TCPACK: + if (proto != IPPROTO_TCP || offset > 0 || + ((ipfw_insn_u32 *)cmd)->d[0] != + L3HDR(struct tcphdr,ip)->th_ack) + goto cmd_fail; + goto cmd_match; + + case O_TCPWIN: + if (proto != IPPROTO_TCP || offset > 0 || + cmd->arg1 != + L3HDR(struct tcphdr,ip)->th_win) + goto cmd_fail; + goto cmd_match; + + case O_ESTAB: + if (proto != IPPROTO_TCP || offset > 0) + goto cmd_fail; + + /* reject packets which have SYN only */ + if ((L3HDR(struct tcphdr,ip)->th_flags & + (TH_RST | TH_ACK | TH_SYN)) == TH_SYN) + goto cmd_fail; + goto cmd_match; + + case O_LOG: + ipfw_log(f, hlen, args->eh, m, oif); + goto cmd_match; + + case O_PROB: /* XXX check */ + if (random() < ((ipfw_insn_u32 *)cmd)->d[0] ) + goto cmd_match; + goto cmd_fail; + + case O_LIMIT: + case O_KEEP_STATE: + if (install_state(f, + (ipfw_insn_limit *)cmd, args)) + goto deny; /* error/limit violation */ + goto cmd_match; + + case O_PROBE_STATE: + case O_CHECK_STATE: + /* + * dynamic rules are checked at the first + * keep-state or check-state occurrence. + * The compiler introduces a probe-state + * instruction for us when we have a + * keep-state (because probe-state needs + * to be run first). + */ + if (dyn_dir == MATCH_UNKNOWN) { + q = lookup_dyn_rule(&args->f_id, + &dyn_dir); + if (q != NULL) { + f = q->rule; + q->pcnt++; + q->bcnt += ip_len; + /* go to ACTION */ + cmd = ACTION_PTR(f); + l = f->cmd_len - f->act_ofs; + goto check_body; + } + } + if (cmd->opcode == O_CHECK_STATE) + goto next_rule; + else + goto cmd_match; + + case O_ACCEPT: + retval = 0; /* accept */ + goto accept; + + case O_PIPE: + case O_QUEUE: + args->rule = f; /* report matching rule */ + retval = cmd->arg1 | IP_FW_PORT_DYNT_FLAG; + goto accept; + + case O_DIVERT: + case O_TEE: + if (args->eh) /* not on layer 2 */ + goto cmd_fail; + args->divert_rule = f->rulenum; + if (cmd->opcode == O_DIVERT) + retval = cmd->arg1; + else + retval = cmd->arg1|IP_FW_PORT_TEE_FLAG; + goto accept; + + case O_COUNT: + case O_SKIPTO: + f->pcnt++; /* update stats */ + f->bcnt += ip_len; + f->timestamp = time_second; + if (cmd->opcode == O_COUNT) + goto next_rule; + /* handle skipto */ + if (f->next_rule == NULL) + lookup_next_rule(f); + f = f->next_rule; + goto again; + + case O_REJECT: + /* + * Drop the packet and send a reject notice + * if the packet is not ICMP (or is an ICMP + * query), and it is not multicast/broadcast. + */ + if (hlen > 0 && + (proto != IPPROTO_ICMP || + is_icmp_query(ip)) && + !(m->m_flags & (M_BCAST|M_MCAST)) && + !IN_MULTICAST(dst_ip.s_addr)) { + send_reject(m,cmd->arg1,offset,ip_len); + args->m = m = NULL; + } + goto deny; + + case O_FORWARD_IP: + if (args->eh) /* not valid on layer2 pkts */ + goto cmd_fail; + if (!q || dyn_dir == MATCH_FORWARD) + args->next_hop = + &((ipfw_insn_sa *)cmd)->sa; + retval = 0; + goto accept; + + case O_DENY: + goto deny; + + default: + panic("-- unknown opcode %d\n", cmd->opcode); + } + panic("ipfw_chk: end of inner loop"); + + /* + * This code is a bit spaghetti, but we have + * 4 cases to handle: + * INSN FAIL, no F_NOT --> insn_fail + * INSN FAIL, but we have F_NOT --> cmd_success + * INSN MATCH, no F_NOT --> cmd_success + * INSN MATCH, but we have F_NOT --> insn_fail + * + * after this: + * cmd_success, F_OR --> set skip_or + * cmd_success, not F_OR --> try next insn + * insn_fail, F_OR --> try next insn + * insn_fail, not F_OR --> rule does not match + */ +cmd_fail: + if (cmd->len & F_NOT) /* NOT fail is a success */ + goto cmd_success; + else + goto insn_fail; + +cmd_match: + if (cmd->len & F_NOT) { /* NOT match is a failure. */ +insn_fail: + if (cmd->len & F_OR) /* If an or block */ + continue; /* try next insn */ + else + break; /* otherwise next rule */ + } + +cmd_success: + if (cmd->len & F_OR) + skip_or = 1; + } /* end of inner for, scan opcodes */ + +next_rule: /* try next rule */ + + } /* end of outer for, scan rules */ + +deny: + retval = IP_FW_PORT_DENY_FLAG; + +accept: + /* Update statistics */ + f->pcnt++; + f->bcnt += ip_len; + f->timestamp = time_second; + return retval; + +pullup_failed: + if (fw_verbose) + printf("pullup failed\n"); + return(IP_FW_PORT_DENY_FLAG); +} + +#if 0 /* XXX old instructions not implemented yet XXX */ +bogusfrag: + if (fw_verbose) { + if (*m != NULL) + ipfw_report(NULL, ip, ip_off, ip_len, (*m)->m_pkthdr.rcvif, oif); + } + return(IP_FW_PORT_DENY_FLAG); + + if (f->fw_ipflg & IP_FW_IF_IPPRE && + (f->fw_iptos & 0xe0) != (ip->ip_tos & 0xe0)) + continue; + +#endif /* XXX old instructions not implemented yet */ + +/* + * When a rule is added/deleted, clear the next_rule pointers in all rules. + * These will be reconstructed on the fly as packets are matched. + * Must be called at splimp(). + */ +static void +flush_rule_ptrs(void) +{ + struct ip_fw *rule; + + for (rule = layer3_chain; rule; rule = rule->next) + rule->next_rule = NULL; +} + +/* + * When pipes/queues are deleted, clear the "pipe_ptr" pointer to a given + * pipe/queue, or to all of them (match == NULL). + * Must be called at splimp(). + */ +void +flush_pipe_ptrs(struct dn_flow_set *match) +{ + struct ip_fw *rule; + + for (rule = layer3_chain; rule; rule = rule->next) { + ipfw_insn_pipe *cmd = (ipfw_insn_pipe *)ACTION_PTR(rule); + + if (cmd->o.opcode != O_PIPE && cmd->o.opcode != O_QUEUE) + continue; + + if (match == NULL || cmd->pipe_ptr == match) + cmd->pipe_ptr = NULL; + } +} + +/* + * Add a new rule to the list. Copy the rule into a malloc'ed area, then + * possibly create a rule number and add the rule to the list. + * Update the rule_number in the input struct so the caller knows it as well. + */ +static int +add_rule(struct ip_fw **head, struct ip_fw *input_rule) +{ + struct ip_fw *rule, *f, *prev; + int s; + int l = RULESIZE(input_rule); + + if (*head == NULL && input_rule->rulenum != IPFW_DEFAULT_RULE) + return (EINVAL); + + rule = malloc(l, M_IPFW, M_DONTWAIT | M_ZERO); + if (rule == NULL) + return (ENOSPC); + + bcopy(input_rule, rule, l); + + rule->next = NULL; + rule->next_rule = NULL; + + rule->pcnt = 0; + rule->bcnt = 0; + rule->timestamp = 0; + + s = splimp(); + + if (*head == NULL) { /* default rule */ + *head = rule; + goto done; + } + + /* + * If rulenum is 0, find highest numbered rule before the + * default rule, and add autoinc_step + */ + if (autoinc_step < 1) + autoinc_step = 1; + else if (autoinc_step > 1000) + autoinc_step = 1000; + if (rule->rulenum == 0) { + /* + * locate the highest numbered rule before default + */ + for (f = *head; f; f = f->next) { + if (f->rulenum == IPFW_DEFAULT_RULE) + break; + rule->rulenum = f->rulenum; + } + if (rule->rulenum < IPFW_DEFAULT_RULE - autoinc_step) + rule->rulenum += autoinc_step; + input_rule->rulenum = rule->rulenum; + } + + /* + * Now insert the new rule in the right place in the sorted list. + */ + for (prev = NULL, f = *head; f; prev = f, f = f->next) { + if (f->rulenum > rule->rulenum) { /* found the location */ + if (prev) { + rule->next = f; + prev->next = rule; + } else { /* head insert */ + rule->next = *head; + *head = rule; + } + break; + } + } + flush_rule_ptrs(); +done: + static_count++; + static_len += l; + splx(s); + DEB(printf("++ installed rule %d, static count now %d\n", + rule->rulenum, static_count);) + return (0); +} + +/** + * Free storage associated with a static rule (including derived + * dynamic rules). + * The caller is in charge of clearing rule pointers to avoid + * dangling pointers. + * @return a pointer to the next entry. + * Arguments are not checked, so they better be correct. + * Must be called at splimp(). + */ +static struct ip_fw * +delete_rule(struct ip_fw **head, struct ip_fw *prev, struct ip_fw *rule) +{ + struct ip_fw *n; + int l = RULESIZE(rule); + + n = rule->next; + remove_dyn_rule(rule, NULL /* force removal */); + if (prev == NULL) + *head = n; + else + prev->next = n; + static_count--; + static_len -= l; + + if (DUMMYNET_LOADED) + ip_dn_ruledel_ptr(rule); + free(rule, M_IPFW); + return n; +} + +/* + * Deletes all rules from a chain (including the default rule + * if the second argument is set). + * Must be called at splimp(). + */ +static void +free_chain(struct ip_fw **chain, int kill_default) +{ + struct ip_fw *rule; + + flush_rule_ptrs(); /* more efficient to do outside the loop */ + + while ( (rule = *chain) != NULL && + (kill_default || rule->rulenum != IPFW_DEFAULT_RULE) ) + delete_rule(chain, NULL, rule); +} + +/** + * Remove all rules with given number. + */ +static int +del_entry(struct ip_fw **chain, u_short rulenum) +{ + struct ip_fw *prev, *rule; + int s; + + if (rulenum == IPFW_DEFAULT_RULE) + return EINVAL; + + /* + * locate first rule to delete + */ + for (prev = NULL, rule = *chain; rule && rule->rulenum < rulenum; + prev = rule, rule = rule->next) + ; + if (rule->rulenum != rulenum) + return EINVAL; + + s = splimp(); /* no access to rules while removing */ + flush_rule_ptrs(); /* more efficient to do outside the loop */ + /* + * prev remains the same throughout the cycle + */ + while (rule && rule->rulenum == rulenum) + rule = delete_rule(chain, prev, rule); + splx(s); + return 0; +} + +/* + * Clear counters for a specific rule. + */ +static void +clear_counters(struct ip_fw *rule, int log_only) +{ + ipfw_insn_log *l = (ipfw_insn_log *)ACTION_PTR(rule); + + if (log_only == 0) { + rule->bcnt = rule->pcnt = 0; + rule->timestamp = 0; + } + if (l->o.opcode == O_LOG) + l->log_left = l->max_log; +} + +/** + * Reset some or all counters on firewall rules. + * @arg frwl is null to clear all entries, or contains a specific + * rule number. + * @arg log_only is 1 if we only want to reset logs, zero otherwise. + */ +static int +zero_entry(int rulenum, int log_only) +{ + struct ip_fw *rule; + int s; + char *msg; + + if (rulenum == 0) { + s = splimp(); + norule_counter = 0; + for (rule = layer3_chain; rule; rule = rule->next) + clear_counters(rule, log_only); + splx(s); + msg = log_only ? "ipfw: All logging counts reset.\n" : + "ipfw: Accounting cleared.\n"; + } else { + int cleared = 0; + /* + * We can have multiple rules with the same number, so we + * need to clear them all. + */ + for (rule = layer3_chain; rule; rule = rule->next) + if (rule->rulenum == rulenum) { + s = splimp(); + while (rule && rule->rulenum == rulenum) { + clear_counters(rule, log_only); + rule = rule->next; + } + splx(s); + cleared = 1; + break; + } + if (!cleared) /* we did not find any matching rules */ + return (EINVAL); + msg = log_only ? "ipfw: Entry %d logging count reset.\n" : + "ipfw: Entry %d cleared.\n"; + } + if (fw_verbose) + log(LOG_SECURITY | LOG_NOTICE, msg, rulenum); + return (0); +} + +/* + * Check validity of the structure before insert. + * Fortunately rules are simple, so this mostly need to check rule sizes. + */ +static int +check_ipfw_struct(struct ip_fw *rule, int size) +{ + int l, cmdlen = 0; + int have_action=0; + ipfw_insn *cmd; + + if (size < sizeof(*rule)) { + printf("kipfw: rule too short\n"); + return (EINVAL); + } + /* first, check for valid size */ + l = RULESIZE(rule); + if (l != size) { + printf("kipfw: size mismatch (have %d want %d)\n", size, l); + return (EINVAL); + } + /* + * Now go for the individual checks. Very simple ones, basically only + * instruction sizes. + */ + for (l = rule->cmd_len, cmd = rule->cmd ; + l > 0 ; l -= cmdlen, cmd += cmdlen) { + cmdlen = F_LEN(cmd); + if (cmdlen > l) { + printf("kipfw: opcode %d size truncated\n", + cmd->opcode); + return EINVAL; + } + DEB(printf("kipfw: opcode %d\n", cmd->opcode);) + switch (cmd->opcode) { + case O_NOP: + case O_PROBE_STATE: + case O_KEEP_STATE: + case O_PROTO: + case O_IP_SRC_ME: + case O_IP_DST_ME: + case O_LAYER2: + case O_IN: + case O_FRAG: + case O_IPOPT: + case O_IPLEN: + case O_IPID: + case O_IPPRE: + case O_IPTOS: + case O_IPTTL: + case O_IPVER: + case O_TCPWIN: + case O_TCPFLAGS: + case O_TCPOPTS: + case O_ESTAB: + if (cmdlen != F_INSN_SIZE(ipfw_insn)) + goto bad_size; + break; + + case O_UID: + case O_GID: + case O_IP_SRC: + case O_IP_DST: + case O_TCPSEQ: + case O_TCPACK: + case O_PROB: + case O_ICMPTYPE: + if (cmdlen != F_INSN_SIZE(ipfw_insn_u32)) + goto bad_size; + break; + + case O_LIMIT: + if (cmdlen != F_INSN_SIZE(ipfw_insn_limit)) + goto bad_size; + break; + + case O_LOG: + if (cmdlen != F_INSN_SIZE(ipfw_insn_log)) + goto bad_size; + + ((ipfw_insn_log *)cmd)->log_left = + ((ipfw_insn_log *)cmd)->max_log; + + break; + + case O_IP_SRC_MASK: + case O_IP_DST_MASK: + if (cmdlen != F_INSN_SIZE(ipfw_insn_ip)) + goto bad_size; + if (((ipfw_insn_ip *)cmd)->mask.s_addr == 0) { + printf("kipfw: opcode %d, useless rule\n", + cmd->opcode); + return EINVAL; + } + break; + + case O_IP_SRC_SET: + case O_IP_DST_SET: + if (cmd->arg1 == 0 || cmd->arg1 > 256) { + printf("kipfw: invalid set size %d\n", + cmd->arg1); + return EINVAL; + } + if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) + + (cmd->arg1+31)/32 ) + goto bad_size; + break; + + case O_MACADDR2: + if (cmdlen != F_INSN_SIZE(ipfw_insn_mac)) + goto bad_size; + break; + + case O_MAC_TYPE: + case O_IP_SRCPORT: + case O_IP_DSTPORT: /* XXX artificial limit, 15 port pairs */ + if (cmdlen < 2 || cmdlen > 15) + goto bad_size; + break; + + case O_RECV: + case O_XMIT: + case O_VIA: + if (cmdlen != F_INSN_SIZE(ipfw_insn_if)) + goto bad_size; + break; + + case O_PIPE: + case O_QUEUE: + if (cmdlen != F_INSN_SIZE(ipfw_insn_pipe)) + goto bad_size; + goto check_action; + + case O_FORWARD_IP: /* XXX no! */ + if (cmdlen != F_INSN_SIZE(ipfw_insn_sa)) + goto bad_size; + goto check_action; + + case O_FORWARD_MAC: /* XXX no! */ + case O_CHECK_STATE: + case O_COUNT: + case O_ACCEPT: + case O_DENY: + case O_REJECT: + case O_SKIPTO: + case O_DIVERT: + case O_TEE: + if (cmdlen != F_INSN_SIZE(ipfw_insn)) + goto bad_size; +check_action: + if (have_action) { + printf("kipfw: opcode %d, multiple actions" + " not allowed\n", + cmd->opcode); + return EINVAL; + } + have_action = 1; + if (l != cmdlen) { + printf("kipfw: opcode %d, action must be" + " last opcode\n", + cmd->opcode); + return EINVAL; + } + break; + default: + printf("kipfw: opcode %d, unknown opcode\n", + cmd->opcode); + return EINVAL; + } + } + if (have_action == 0) { + printf("kipfw: missing action\n"); + return EINVAL; + } + return 0; + +bad_size: + printf("kipfw: opcode %d size %d wrong\n", + cmd->opcode, cmdlen); + return EINVAL; +} + + +/** + * {set|get}sockopt parser. + */ +static int +ipfw_ctl(struct sockopt *sopt) +{ + int error, s, rulenum; + size_t size; + struct ip_fw *bp , *buf, *rule; + + static u_int32_t rule_buf[255]; /* we copy the data here */ + + /* + * Disallow modifications in really-really secure mode, but still allow + * the logging counters to be reset. + */ + if (sopt->sopt_name == IP_FW_ADD || + (sopt->sopt_dir == SOPT_SET && sopt->sopt_name != IP_FW_RESETLOG)) { +#if __FreeBSD_version >= 500034 + error = securelevel_ge(sopt->sopt_td->td_ucred, 3); + if (error) + return (error); +#else /* FreeBSD 4.x */ + if (securelevel >= 3) + return (EPERM); +#endif + } + + error = 0; + + switch (sopt->sopt_name) { + case IP_FW_GET: + /* + * pass up a copy of the current rules. Static rules + * come first (the last of which has number IPFW_DEFAULT_RULE), + * followed by a possibly empty list of dynamic rule. + * The last dynamic rule has NULL in the "next" field. + */ + s = splimp(); + size = static_len; /* size of static rules */ + if (ipfw_dyn_v) /* add size of dyn.rules */ + size += (dyn_count * sizeof(ipfw_dyn_rule)); + + /* + * XXX todo: if the user passes a short length just to know + * how much room is needed, do not bother filling up the + * buffer, just jump to the sooptcopyout. + */ + buf = malloc(size, M_TEMP, M_WAITOK); + if (buf == 0) { + splx(s); + error = ENOBUFS; + break; + } + + bp = buf; + for (rule = layer3_chain; rule ; rule = rule->next) { + int i = RULESIZE(rule); + bcopy(rule, bp, i); + bp = (struct ip_fw *)((char *)bp + i); + } + if (ipfw_dyn_v) { + int i; + ipfw_dyn_rule *p, *dst, *last = NULL; + + dst = (ipfw_dyn_rule *)bp; + for (i = 0 ; i < curr_dyn_buckets ; i++ ) + for ( p = ipfw_dyn_v[i] ; p != NULL ; + p = p->next, dst++ ) { + bcopy(p, dst, sizeof *p); + (int)dst->rule = p->rule->rulenum ; + /* + * store a non-null value in "next". + * The userland code will interpret a + * NULL here as a marker + * for the last dynamic rule. + */ + dst->next = dst ; + last = dst ; + dst->expire = + TIME_LEQ(dst->expire, time_second) ? + 0 : dst->expire - time_second ; + } + if (last != NULL) /* mark last dynamic rule */ + last->next = NULL; + } + splx(s); + + error = sooptcopyout(sopt, buf, size); + free(buf, M_TEMP); + break; + + case IP_FW_FLUSH: + /* + * Normally we cannot release the lock on each iteration. + * We could do it here only because we start from the head all + * the times so there is no risk of missing some entries. + * On the other hand, the risk is that we end up with + * a very inconsistent ruleset, so better keep the lock + * around the whole cycle. + * + * XXX this code can be improved by resetting the head of + * the list to point to the default rule, and then freeing + * the old list without the need for a lock. + */ + + s = splimp(); + free_chain(&layer3_chain, 0 /* keep default rule */); + splx(s); + break; + + case IP_FW_ADD: + rule = (struct ip_fw *)rule_buf; /* XXX do a malloc */ + error = sooptcopyin(sopt, rule, sizeof(rule_buf), + sizeof(struct ip_fw) ); + size = sopt->sopt_valsize; + if (error || (error = check_ipfw_struct(rule, size))) + break; + + error = add_rule(&layer3_chain, rule); + size = RULESIZE(rule); + if (!error && sopt->sopt_dir == SOPT_GET) + error = sooptcopyout(sopt, rule, size); + break; + + case IP_FW_DEL: /* argument is an int, the rule number */ + error = sooptcopyin(sopt, &rulenum, sizeof(int), sizeof(int)); + if (error) + break; + if (rulenum == IPFW_DEFAULT_RULE) { + if (fw_debug) + printf("ipfw: can't delete rule %u\n", + (unsigned)IPFW_DEFAULT_RULE); + error = EINVAL; + } else + error = del_entry(&layer3_chain, rulenum); + break; + + case IP_FW_ZERO: + case IP_FW_RESETLOG: /* argument is an int, the rule number */ + rulenum=0; + + if (sopt->sopt_val != 0) { + error = sooptcopyin(sopt, &rulenum, + sizeof(int), sizeof(int)); + if (error) + break; + } + error = zero_entry(rulenum, sopt->sopt_name == IP_FW_RESETLOG); + break; + + default: + printf("ipfw_ctl invalid option %d\n", sopt->sopt_name); + error = EINVAL; + } + + return (error); +} + +/** + * dummynet needs a reference to the default rule, because rules can be + * deleted while packets hold a reference to them. When this happens, + * dummynet changes the reference to the default rule (it could well be a + * NULL pointer, but this way we do not need to check for the special + * case, plus here he have info on the default behaviour). + */ +struct ip_fw *ip_fw_default_rule; + +static void +ipfw_init(void) +{ + struct ip_fw default_rule; + + ip_fw_chk_ptr = ipfw_chk; + ip_fw_ctl_ptr = ipfw_ctl; + layer3_chain = NULL; + + bzero(&default_rule, sizeof default_rule); + + default_rule.act_ofs = 0; + default_rule.rulenum = IPFW_DEFAULT_RULE; + default_rule.cmd_len = 1; + + default_rule.cmd[0].len = 1; + default_rule.cmd[0].opcode = +#ifdef IPFIREWALL_DEFAULT_TO_ACCEPT + 1 ? O_ACCEPT : +#endif + O_DENY; + + add_rule(&layer3_chain, &default_rule); + + ip_fw_default_rule = layer3_chain; + printf("IP packet filtering initialized, divert %s, " + "rule-based forwarding %s, default to %s, logging ", +#ifdef IPDIVERT + "enabled", +#else + "disabled", +#endif + "enabled", + default_rule.cmd[0].opcode == O_ACCEPT ? "accept" : "deny"); + +#ifdef IPFIREWALL_VERBOSE + fw_verbose = 1; +#endif +#ifdef IPFIREWALL_VERBOSE_LIMIT + verbose_limit = IPFIREWALL_VERBOSE_LIMIT; +#endif + printf("logging "); + if (fw_verbose == 0) + printf("disabled\n"); + else if (verbose_limit == 0) + printf("unlimited\n"); + else + printf("limited to %d packets/entry by default\n", + verbose_limit); +} + +static int +ipfw_modevent(module_t mod, int type, void *unused) +{ + int s; + int err = 0; + + switch (type) { + case MOD_LOAD: + s = splimp(); + if (IPFW_LOADED) { + splx(s); + printf("IP firewall already loaded\n"); + err = EEXIST; + } else { + ipfw_init(); + splx(s); + } + break; + + case MOD_UNLOAD: +#if !defined(KLD_MODULE) + printf("ipfw statically compiled, cannot unload\n"); + err = EBUSY; +#else + s = splimp(); + ip_fw_chk_ptr = NULL; + ip_fw_ctl_ptr = NULL; + free_chain(&layer3_chain, 1 /* kill default rule */); + splx(s); + printf("IP firewall unloaded\n"); +#endif + break; + default: + break; + } + return err; +} + +static moduledata_t ipfwmod = { + "ipfw", + ipfw_modevent, + 0 +}; +DECLARE_MODULE(ipfw, ipfwmod, SI_SUB_PSEUDO, SI_ORDER_ANY); +MODULE_VERSION(ipfw, 1); diff --git a/sys/netinet/libalias/alias_db.c b/sys/netinet/libalias/alias_db.c index 52384b3f6a36..f5b04056eaba 100644 --- a/sys/netinet/libalias/alias_db.c +++ b/sys/netinet/libalias/alias_db.c @@ -2641,6 +2641,82 @@ PacketAliasCheckNewLink(void) #include #include +#define NEW_IPFW 1 /* use new ipfw code */ +#ifdef NEW_IPFW +/* + * A function to fill simple commands of size 1. + * Existing flags are preserved. + */ +static void +fill_cmd(ipfw_insn *cmd, enum ipfw_opcodes opcode, int flags, u_int16_t arg) +{ + cmd->opcode = opcode; + cmd->len = ((cmd->len | flags) & (F_NOT | F_OR)) | 1; + cmd->arg1 = arg; +} + +/* + * helper function, updates the pointer to cmd with the length + * of the current command, and also cleans up the first word of + * the new command in case it has been clobbered before. + */ +static ipfw_insn * +next_cmd(ipfw_insn *cmd) +{ + cmd += F_LEN(cmd); + bzero(cmd, sizeof(*cmd)); + return cmd; +} + +static void +fill_ip(ipfw_insn_ip *cmd, enum ipfw_opcodes opcode, u_int32_t addr) +{ + cmd->o.opcode = opcode; + cmd->o.len = F_INSN_SIZE(ipfw_insn_u32); + cmd->addr.s_addr = addr; +} + +static void +fill_one_port(ipfw_insn_u16 *cmd, enum ipfw_opcodes opcode, u_int16_t port) +{ + cmd->o.opcode = opcode; + cmd->o.len = F_INSN_SIZE(ipfw_insn_u16); + cmd->ports[0] = cmd->ports[1] = port; +} + +static int +fill_rule(void *buf, int bufsize, int rulenum, + enum ipfw_opcodes action, int proto, + struct in_addr sa, u_int16_t sp, struct in_addr da, u_int32_t dp) +{ + struct ip_fw *rule = (struct ip_fw *)buf; + ipfw_insn *cmd = (ipfw_insn *)rule->cmd; + + bzero(buf, bufsize); + rule->rulenum = rulenum; + + fill_cmd(cmd, O_PROTO, 0, proto); + cmd = next_cmd(cmd); + + fill_ip((ipfw_insn_ip *)cmd, O_IP_SRC, sa.s_addr); + cmd = next_cmd(cmd); + + fill_one_port((ipfw_insn_u16 *)cmd, O_IP_SRCPORT, sp); + cmd = next_cmd(cmd); + + fill_ip((ipfw_insn_ip *)cmd, O_IP_DST, da.s_addr); + cmd = next_cmd(cmd); + + fill_one_port((ipfw_insn_u16 *)cmd, O_IP_DSTPORT, dp); + cmd = next_cmd(cmd); + + fill_cmd(cmd, O_ACCEPT, 0, 0); + cmd = next_cmd(cmd); + + return ((void *)cmd - buf); +} +#endif /* NEW_IPFW */ + static void ClearAllFWHoles(void); static int fireWallBaseNum; /* The first firewall entry free for our use */ @@ -2724,6 +2800,35 @@ PunchFWHole(struct alias_link *link) { /* Start next search at next position */ fireWallActiveNum = fwhole+1; +#ifdef NEW_IPFW + if (GetOriginalPort(link) != 0 && GetDestPort(link) != 0) { + /* + * generate two rules of the form + * + * add fwhole accept tcp from OAddr OPort to DAddr DPort + * add fwhole accept tcp from DAddr DPort to OAddr OPort + */ + u_int32_t rulebuf[255]; + int i; + + i = fill_rule(rulebuf, sizeof(rulebuf), fwhole, + O_ACCEPT, IPPROTO_TCP, + GetOriginalAddress(link), ntohs(GetOriginalPort(link)), + GetDestAddress(link), ntohs(GetDestPort(link)) ); + r = setsockopt(fireWallFD, IPPROTO_IP, IP_FW_ADD, rulebuf, i); + if (r) + err(1, "alias punch inbound(1) setsockopt(IP_FW_ADD)"); + + i = fill_rule(rulebuf, sizeof(rulebuf), fwhole, + O_ACCEPT, IPPROTO_TCP, + GetDestAddress(link), ntohs(GetDestPort(link)), + GetOriginalAddress(link), ntohs(GetOriginalPort(link)) ); + r = setsockopt(fireWallFD, IPPROTO_IP, IP_FW_ADD, rulebuf, i); + if (r) + err(1, "alias punch inbound(2) setsockopt(IP_FW_ADD)"); + } +#else /* !NEW_IPFW old code to generate ipfw rule */ + /* Build generic part of the two rules */ rule.fw_number = fwhole; IP_FW_SETNSRCP(&rule, 1); /* Number of source ports. */ @@ -2759,6 +2864,7 @@ PunchFWHole(struct alias_link *link) { err(1, "alias punch inbound(2) setsockopt(IP_FW_ADD)"); #endif } +#endif /* !NEW_IPFW */ /* Indicate hole applied */ link->data.tcp->fwhole = fwhole; fw_setfield(fireWallField, fwhole); @@ -2770,6 +2876,10 @@ static void ClearFWHole(struct alias_link *link) { if (link->link_type == LINK_TCP) { int fwhole = link->data.tcp->fwhole; /* Where is the firewall hole? */ +#ifdef NEW_IPFW + while (!setsockopt(fireWallFD, IPPROTO_IP, IP_FW_DEL, &fwhole, sizeof fwhole)) + ; +#else /* !NEW_IPFW */ struct ip_fw rule; if (fwhole < 0) @@ -2779,7 +2889,9 @@ ClearFWHole(struct alias_link *link) { rule.fw_number = fwhole; while (!setsockopt(fireWallFD, IPPROTO_IP, IP_FW_DEL, &rule, sizeof rule)) ; +#endif /* !NEW_IPFW */ fw_clrfield(fireWallField, fwhole); + link->data.tcp->fwhole = -1; } } @@ -2795,9 +2907,15 @@ ClearAllFWHoles(void) { memset(&rule, 0, sizeof rule); for (i = fireWallBaseNum; i < fireWallBaseNum + fireWallNumNums; i++) { +#ifdef NEW_IPFW + int r = i; + while (!setsockopt(fireWallFD, IPPROTO_IP, IP_FW_DEL, &r, sizeof r)) + ; +#else /* !NEW_IPFW */ rule.fw_number = i; while (!setsockopt(fireWallFD, IPPROTO_IP, IP_FW_DEL, &rule, sizeof rule)) ; +#endif /* NEW_IPFW */ } memset(fireWallField, 0, fireWallNumNums); }