diff --git a/contrib/pf/man/pf.conf.5 b/contrib/pf/man/pf.conf.5 index 67cb717540e7..98c3d0ea8789 100644 --- a/contrib/pf/man/pf.conf.5 +++ b/contrib/pf/man/pf.conf.5 @@ -28,7 +28,7 @@ .\" ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE .\" POSSIBILITY OF SUCH DAMAGE. .\" -.Dd October 30, 2006 +.Dd June 10, 2008 .Dt PF.CONF 5 .Os .Sh NAME @@ -2059,6 +2059,13 @@ Changes the timeout values used for states created by this rule. For a list of all valid timeout names, see .Sx OPTIONS above. +.It Ar sloppy +Uses a sloppy TCP connection tracker that does not check sequence +numbers at all, which makes insertion and ICMP teardown attacks way +easier. +This is intended to be used in situations where one does not see all +packets of a connection, i.e. in asymmetric routing situations. +Cannot be used with modulate or synproxy state. .El .Pp Multiple options can be specified, separated by commas: @@ -2923,7 +2930,7 @@ tos = "tos" ( "lowdelay" | "throughput" | "reliability" | [ "0x" ] number ) state-opts = state-opt [ [ "," ] state-opts ] -state-opt = ( "max" number | "no-sync" | timeout | +state-opt = ( "max" number | "no-sync" | timeout | sloppy | "source-track" [ ( "rule" | "global" ) ] | "max-src-nodes" number | "max-src-states" number | "max-src-conn" number | diff --git a/contrib/pf/pfctl/parse.y b/contrib/pf/pfctl/parse.y index 9817e8e23785..c22a0b662e6f 100644 --- a/contrib/pf/pfctl/parse.y +++ b/contrib/pf/pfctl/parse.y @@ -128,7 +128,7 @@ enum { PF_STATE_OPT_MAX, PF_STATE_OPT_NOSYNC, PF_STATE_OPT_SRCTRACK, PF_STATE_OPT_MAX_SRC_STATES, PF_STATE_OPT_MAX_SRC_CONN, PF_STATE_OPT_MAX_SRC_CONN_RATE, PF_STATE_OPT_MAX_SRC_NODES, PF_STATE_OPT_OVERLOAD, PF_STATE_OPT_STATELOCK, - PF_STATE_OPT_TIMEOUT }; + PF_STATE_OPT_TIMEOUT, PF_STATE_OPT_SLOPPY }; enum { PF_SRCTRACK_NONE, PF_SRCTRACK, PF_SRCTRACK_GLOBAL, PF_SRCTRACK_RULE }; @@ -423,7 +423,7 @@ typedef struct { %token QUEUE PRIORITY QLIMIT RTABLE %token LOAD RULESET_OPTIMIZATION %token STICKYADDRESS MAXSRCSTATES MAXSRCNODES SOURCETRACK GLOBAL RULE -%token MAXSRCCONN MAXSRCCONNRATE OVERLOAD FLUSH +%token MAXSRCCONN MAXSRCCONNRATE OVERLOAD FLUSH SLOPPY %token TAGGED TAG IFBOUND FLOATING STATEPOLICY ROUTE %token STRING %token PORTBINARY @@ -1891,6 +1891,14 @@ pfrule : action dir logquick interface route af proto fromto statelock = 1; r.rule_flag |= o->data.statelock; break; + case PF_STATE_OPT_SLOPPY: + if (r.rule_flag & PFRULE_STATESLOPPY) { + yyerror("state sloppy option: " + "multiple definitions"); + YYERROR; + } + r.rule_flag |= PFRULE_STATESLOPPY; + break; case PF_STATE_OPT_TIMEOUT: if (o->data.timeout.number == PFTM_ADAPTIVE_START || @@ -3216,6 +3224,14 @@ state_opt_item : MAXIMUM number { $$->next = NULL; $$->tail = $$; } + | SLOPPY { + $$ = calloc(1, sizeof(struct node_state_opt)); + if ($$ == NULL) + err(1, "state_opt_item: calloc"); + $$->type = PF_STATE_OPT_SLOPPY; + $$->next = NULL; + $$->tail = $$; + } | STRING number { int i; @@ -4101,6 +4117,13 @@ filter_consistent(struct pf_rule *r, int anchor_call) yyerror("keep state on block rules doesn't make sense"); problems++; } + if (r->rule_flag & PFRULE_STATESLOPPY && + (r->keep_state == PF_STATE_MODULATE || + r->keep_state == PF_STATE_SYNPROXY)) { + yyerror("sloppy state matching cannot be used with " + "synproxy state or modulate state"); + problems++; + } return (-problems); } @@ -4969,6 +4992,7 @@ lookup(char *s) { "scrub", SCRUB}, { "set", SET}, { "skip", SKIP}, + { "sloppy", SLOPPY}, { "source-hash", SOURCEHASH}, { "source-track", SOURCETRACK}, { "state", STATE}, diff --git a/contrib/pf/pfctl/pf_print_state.c b/contrib/pf/pfctl/pf_print_state.c index b3a693a5986a..02a39b321567 100644 --- a/contrib/pf/pfctl/pf_print_state.c +++ b/contrib/pf/pfctl/pf_print_state.c @@ -294,6 +294,8 @@ print_state(struct pf_state *s, int opts) printf(", anchor %u", s->anchor.nr); if (s->rule.nr != -1) printf(", rule %u", s->rule.nr); + if (s->state_flags & PFSTATE_SLOPPY) + printf(", sloppy"); if (s->src_node != NULL) printf(", source-track"); if (s->nat_src_node != NULL) diff --git a/contrib/pf/pfctl/pfctl_parser.c b/contrib/pf/pfctl/pfctl_parser.c index 9f232bdb7832..c9b2e1189b68 100644 --- a/contrib/pf/pfctl/pfctl_parser.c +++ b/contrib/pf/pfctl/pfctl_parser.c @@ -873,6 +873,8 @@ print_rule(struct pf_rule *r, const char *anchor_call, int verbose) opts = 1; if (r->rule_flag & PFRULE_IFBOUND) opts = 1; + if (r->rule_flag & PFRULE_STATESLOPPY) + opts = 1; for (i = 0; !opts && i < PFTM_MAX; ++i) if (r->timeout[i]) opts = 1; @@ -939,6 +941,12 @@ print_rule(struct pf_rule *r, const char *anchor_call, int verbose) printf("if-bound"); opts = 0; } + if (r->rule_flag & PFRULE_STATESLOPPY) { + if (!opts) + printf(", "); + printf("sloppy"); + opts = 0; + } for (i = 0; i < PFTM_MAX; ++i) if (r->timeout[i]) { int j; diff --git a/sys/contrib/pf/net/if_pfsync.c b/sys/contrib/pf/net/if_pfsync.c index 0925edb094b9..946611859175 100644 --- a/sys/contrib/pf/net/if_pfsync.c +++ b/sys/contrib/pf/net/if_pfsync.c @@ -465,7 +465,7 @@ pfsync_insert_net_state(struct pfsync_state *sp, u_int8_t chksum_flag) st->direction = sp->direction; st->log = sp->log; st->timeout = sp->timeout; - st->allow_opts = sp->allow_opts; + st->state_flags = sp->state_flags; bcopy(sp->id, &st->id, sizeof(st->id)); st->creatorid = sp->creatorid; @@ -1578,7 +1578,7 @@ pfsync_pack_state(u_int8_t action, struct pf_state *st, int flags) sp->proto = st->proto; sp->direction = st->direction; sp->log = st->log; - sp->allow_opts = st->allow_opts; + sp->state_flags = st->state_flags; sp->timeout = st->timeout; if (flags & PFSYNC_FLAG_STALE) diff --git a/sys/contrib/pf/net/if_pfsync.h b/sys/contrib/pf/net/if_pfsync.h index f7edca3e4003..f306610fb3d5 100644 --- a/sys/contrib/pf/net/if_pfsync.h +++ b/sys/contrib/pf/net/if_pfsync.h @@ -80,7 +80,7 @@ struct pfsync_state { u_int8_t proto; u_int8_t direction; u_int8_t log; - u_int8_t allow_opts; + u_int8_t state_flags; u_int8_t timeout; u_int8_t sync_flags; u_int8_t updates; diff --git a/sys/contrib/pf/net/pf.c b/sys/contrib/pf/net/pf.c index e8031ac7dfec..ebe73c2e9320 100644 --- a/sys/contrib/pf/net/pf.c +++ b/sys/contrib/pf/net/pf.c @@ -253,6 +253,13 @@ int pf_test_fragment(struct pf_rule **, int, struct pfi_kif *, struct mbuf *, void *, struct pf_pdesc *, struct pf_rule **, struct pf_ruleset **); +int pf_tcp_track_full(struct pf_state_peer *, + struct pf_state_peer *, struct pf_state **, + struct pfi_kif *, struct mbuf *, int, + struct pf_pdesc *, u_short *, int *); +int pf_tcp_track_sloppy(struct pf_state_peer *, + struct pf_state_peer *, struct pf_state **, + struct pf_pdesc *, u_short *); int pf_test_state_tcp(struct pf_state **, int, struct pfi_kif *, struct mbuf *, int, void *, struct pf_pdesc *, u_short *); @@ -3528,7 +3535,10 @@ pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction, s->nat_rule.ptr = nr; s->anchor.ptr = a; STATE_INC_COUNTERS(s); - s->allow_opts = r->allow_opts; + if (r->allow_opts) + s->state_flags |= PFSTATE_ALLOWOPTS; + if (r->rule_flag & PFRULE_STATESLOPPY) + s->state_flags |= PFSTATE_SLOPPY; s->log = r->log & PF_LOG_ALL; if (nr != NULL) s->log |= nr->log & PF_LOG_ALL; @@ -3925,7 +3935,10 @@ pf_test_udp(struct pf_rule **rm, struct pf_state **sm, int direction, s->nat_rule.ptr = nr; s->anchor.ptr = a; STATE_INC_COUNTERS(s); - s->allow_opts = r->allow_opts; + if (r->allow_opts) + s->state_flags |= PFSTATE_ALLOWOPTS; + if (r->rule_flag & PFRULE_STATESLOPPY) + s->state_flags |= PFSTATE_SLOPPY; s->log = r->log & PF_LOG_ALL; if (nr != NULL) s->log |= nr->log & PF_LOG_ALL; @@ -4238,7 +4251,10 @@ pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction, s->nat_rule.ptr = nr; s->anchor.ptr = a; STATE_INC_COUNTERS(s); - s->allow_opts = r->allow_opts; + if (r->allow_opts) + s->state_flags |= PFSTATE_ALLOWOPTS; + if (r->rule_flag & PFRULE_STATESLOPPY) + s->state_flags |= PFSTATE_SLOPPY; s->log = r->log & PF_LOG_ALL; if (nr != NULL) s->log |= nr->log & PF_LOG_ALL; @@ -4525,7 +4541,10 @@ pf_test_other(struct pf_rule **rm, struct pf_state **sm, int direction, s->nat_rule.ptr = nr; s->anchor.ptr = a; STATE_INC_COUNTERS(s); - s->allow_opts = r->allow_opts; + if (r->allow_opts) + s->state_flags |= PFSTATE_ALLOWOPTS; + if (r->rule_flag & PFRULE_STATESLOPPY) + s->state_flags |= PFSTATE_SLOPPY; s->log = r->log & PF_LOG_ALL; if (nr != NULL) s->log |= nr->log & PF_LOG_ALL; @@ -4665,6 +4684,430 @@ pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, return (PF_PASS); } +int +pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst, + struct pf_state **state, struct pfi_kif *kif, struct mbuf *m, int off, + struct pf_pdesc *pd, u_short *reason, int *copyback) +{ + struct tcphdr *th = pd->hdr.tcp; + u_int16_t win = ntohs(th->th_win); + u_int32_t ack, end, seq, orig_seq; + u_int8_t sws, dws; + int ackskew; + + if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) { + sws = src->wscale & PF_WSCALE_MASK; + dws = dst->wscale & PF_WSCALE_MASK; + } else + sws = dws = 0; + + /* + * Sequence tracking algorithm from Guido van Rooij's paper: + * http://www.madison-gurkha.com/publications/tcp_filtering/ + * tcp_filtering.ps + */ + + orig_seq = seq = ntohl(th->th_seq); + if (src->seqlo == 0) { + /* First packet from this end. Set its state */ + + if ((pd->flags & PFDESC_TCP_NORM || dst->scrub) && + src->scrub == NULL) { + if (pf_normalize_tcp_init(m, off, pd, th, src, dst)) { + REASON_SET(reason, PFRES_MEMORY); + return (PF_DROP); + } + } + + /* Deferred generation of sequence number modulator */ + if (dst->seqdiff && !src->seqdiff) { +#ifdef __FreeBSD__ + while ((src->seqdiff = pf_new_isn(*state) - seq) == 0) + ; +#else + while ((src->seqdiff = tcp_rndiss_next() - seq) == 0) + ; +#endif + ack = ntohl(th->th_ack) - dst->seqdiff; + pf_change_a(&th->th_seq, &th->th_sum, htonl(seq + + src->seqdiff), 0); + pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0); + *copyback = 1; + } else { + ack = ntohl(th->th_ack); + } + + end = seq + pd->p_len; + if (th->th_flags & TH_SYN) { + end++; + if (dst->wscale & PF_WSCALE_FLAG) { + src->wscale = pf_get_wscale(m, off, th->th_off, + pd->af); + if (src->wscale & PF_WSCALE_FLAG) { + /* Remove scale factor from initial + * window */ + sws = src->wscale & PF_WSCALE_MASK; + win = ((u_int32_t)win + (1 << sws) - 1) + >> sws; + dws = dst->wscale & PF_WSCALE_MASK; + } else { + /* fixup other window */ + dst->max_win <<= dst->wscale & + PF_WSCALE_MASK; + /* in case of a retrans SYN|ACK */ + dst->wscale = 0; + } + } + } + if (th->th_flags & TH_FIN) + end++; + + src->seqlo = seq; + if (src->state < TCPS_SYN_SENT) + src->state = TCPS_SYN_SENT; + + /* + * May need to slide the window (seqhi may have been set by + * the crappy stack check or if we picked up the connection + * after establishment) + */ + if (src->seqhi == 1 || + SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi)) + src->seqhi = end + MAX(1, dst->max_win << dws); + if (win > src->max_win) + src->max_win = win; + + } else { + ack = ntohl(th->th_ack) - dst->seqdiff; + if (src->seqdiff) { + /* Modulate sequence numbers */ + pf_change_a(&th->th_seq, &th->th_sum, htonl(seq + + src->seqdiff), 0); + pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0); + *copyback = 1; + } + end = seq + pd->p_len; + if (th->th_flags & TH_SYN) + end++; + if (th->th_flags & TH_FIN) + end++; + } + + if ((th->th_flags & TH_ACK) == 0) { + /* Let it pass through the ack skew check */ + ack = dst->seqlo; + } else if ((ack == 0 && + (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) || + /* broken tcp stacks do not set ack */ + (dst->state < TCPS_SYN_SENT)) { + /* + * Many stacks (ours included) will set the ACK number in an + * FIN|ACK if the SYN times out -- no sequence to ACK. + */ + ack = dst->seqlo; + } + + if (seq == end) { + /* Ease sequencing restrictions on no data packets */ + seq = src->seqlo; + end = seq; + } + + ackskew = dst->seqlo - ack; + + + /* + * Need to demodulate the sequence numbers in any TCP SACK options + * (Selective ACK). We could optionally validate the SACK values + * against the current ACK window, either forwards or backwards, but + * I'm not confident that SACK has been implemented properly + * everywhere. It wouldn't surprise me if several stacks accidently + * SACK too far backwards of previously ACKed data. There really aren't + * any security implications of bad SACKing unless the target stack + * doesn't validate the option length correctly. Someone trying to + * spoof into a TCP connection won't bother blindly sending SACK + * options anyway. + */ + if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) { + if (pf_modulate_sack(m, off, pd, th, dst)) + *copyback = 1; + } + + +#define MAXACKWINDOW (0xffff + 1500) /* 1500 is an arbitrary fudge factor */ + if (SEQ_GEQ(src->seqhi, end) && + /* Last octet inside other's window space */ + SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) && + /* Retrans: not more than one window back */ + (ackskew >= -MAXACKWINDOW) && + /* Acking not more than one reassembled fragment backwards */ + (ackskew <= (MAXACKWINDOW << sws)) && + /* Acking not more than one window forward */ + ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo || + (orig_seq == src->seqlo + 1) || (pd->flags & PFDESC_IP_REAS) == 0)) { + /* Require an exact/+1 sequence match on resets when possible */ + + if (dst->scrub || src->scrub) { + if (pf_normalize_tcp_stateful(m, off, pd, reason, th, + *state, src, dst, copyback)) + return (PF_DROP); + } + + /* update max window */ + if (src->max_win < win) + src->max_win = win; + /* synchronize sequencing */ + if (SEQ_GT(end, src->seqlo)) + src->seqlo = end; + /* slide the window of what the other end can send */ + if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) + dst->seqhi = ack + MAX((win << sws), 1); + + + /* update states */ + if (th->th_flags & TH_SYN) + if (src->state < TCPS_SYN_SENT) + src->state = TCPS_SYN_SENT; + if (th->th_flags & TH_FIN) + if (src->state < TCPS_CLOSING) + src->state = TCPS_CLOSING; + if (th->th_flags & TH_ACK) { + if (dst->state == TCPS_SYN_SENT) { + dst->state = TCPS_ESTABLISHED; + if (src->state == TCPS_ESTABLISHED && + (*state)->src_node != NULL && + pf_src_connlimit(state)) { + REASON_SET(reason, PFRES_SRCLIMIT); + return (PF_DROP); + } + } else if (dst->state == TCPS_CLOSING) + dst->state = TCPS_FIN_WAIT_2; + } + if (th->th_flags & TH_RST) + src->state = dst->state = TCPS_TIME_WAIT; + + /* update expire time */ + (*state)->expire = time_second; + if (src->state >= TCPS_FIN_WAIT_2 && + dst->state >= TCPS_FIN_WAIT_2) + (*state)->timeout = PFTM_TCP_CLOSED; + else if (src->state >= TCPS_CLOSING && + dst->state >= TCPS_CLOSING) + (*state)->timeout = PFTM_TCP_FIN_WAIT; + else if (src->state < TCPS_ESTABLISHED || + dst->state < TCPS_ESTABLISHED) + (*state)->timeout = PFTM_TCP_OPENING; + else if (src->state >= TCPS_CLOSING || + dst->state >= TCPS_CLOSING) + (*state)->timeout = PFTM_TCP_CLOSING; + else + (*state)->timeout = PFTM_TCP_ESTABLISHED; + + /* Fall through to PASS packet */ + + } else if ((dst->state < TCPS_SYN_SENT || + dst->state >= TCPS_FIN_WAIT_2 || + src->state >= TCPS_FIN_WAIT_2) && + SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) && + /* Within a window forward of the originating packet */ + SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) { + /* Within a window backward of the originating packet */ + + /* + * This currently handles three situations: + * 1) Stupid stacks will shotgun SYNs before their peer + * replies. + * 2) When PF catches an already established stream (the + * firewall rebooted, the state table was flushed, routes + * changed...) + * 3) Packets get funky immediately after the connection + * closes (this should catch Solaris spurious ACK|FINs + * that web servers like to spew after a close) + * + * This must be a little more careful than the above code + * since packet floods will also be caught here. We don't + * update the TTL here to mitigate the damage of a packet + * flood and so the same code can handle awkward establishment + * and a loosened connection close. + * In the establishment case, a correct peer response will + * validate the connection, go through the normal state code + * and keep updating the state TTL. + */ + + if (pf_status.debug >= PF_DEBUG_MISC) { + printf("pf: loose state match: "); + pf_print_state(*state); + pf_print_flags(th->th_flags); + printf(" seq=%u (%u) ack=%u len=%u ackskew=%d " + "pkts=%llu:%llu\n", seq, orig_seq, ack, pd->p_len, +#ifdef __FreeBSD__ + ackskew, (unsigned long long)(*state)->packets[0], + (unsigned long long)(*state)->packets[1]); +#else + ackskew, (*state)->packets[0], + (*state)->packets[1]); +#endif + } + + if (dst->scrub || src->scrub) { + if (pf_normalize_tcp_stateful(m, off, pd, reason, th, + *state, src, dst, copyback)) + return (PF_DROP); + } + + /* update max window */ + if (src->max_win < win) + src->max_win = win; + /* synchronize sequencing */ + if (SEQ_GT(end, src->seqlo)) + src->seqlo = end; + /* slide the window of what the other end can send */ + if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) + dst->seqhi = ack + MAX((win << sws), 1); + + /* + * Cannot set dst->seqhi here since this could be a shotgunned + * SYN and not an already established connection. + */ + + if (th->th_flags & TH_FIN) + if (src->state < TCPS_CLOSING) + src->state = TCPS_CLOSING; + if (th->th_flags & TH_RST) + src->state = dst->state = TCPS_TIME_WAIT; + + /* Fall through to PASS packet */ + + } else { + if ((*state)->dst.state == TCPS_SYN_SENT && + (*state)->src.state == TCPS_SYN_SENT) { + /* Send RST for state mismatches during handshake */ + if (!(th->th_flags & TH_RST)) +#ifdef __FreeBSD__ + pf_send_tcp(m, (*state)->rule.ptr, pd->af, +#else + pf_send_tcp((*state)->rule.ptr, pd->af, +#endif + pd->dst, pd->src, th->th_dport, + th->th_sport, ntohl(th->th_ack), 0, + TH_RST, 0, 0, + (*state)->rule.ptr->return_ttl, 1, 0, + pd->eh, kif->pfik_ifp); + src->seqlo = 0; + src->seqhi = 1; + src->max_win = 1; + } else if (pf_status.debug >= PF_DEBUG_MISC) { + printf("pf: BAD state: "); + pf_print_state(*state); + pf_print_flags(th->th_flags); + printf(" seq=%u (%u) ack=%u len=%u ackskew=%d " +#ifdef notyet + "pkts=%llu:%llu dir=%s,%s\n", +#else + "pkts=%llu:%llu%s\n", +#endif + seq, orig_seq, ack, pd->p_len, ackskew, +#ifdef __FreeBSD__ + (unsigned long long)(*state)->packets[0], + (unsigned long long)(*state)->packets[1], +#else + (*state)->packets[0], (*state)->packets[1], +#endif +#ifdef notyet + direction == PF_IN ? "in" : "out", + direction == (*state)->direction ? "fwd" : "rev"); +#else + ""); +#endif + printf("pf: State failure on: %c %c %c %c | %c %c\n", + SEQ_GEQ(src->seqhi, end) ? ' ' : '1', + SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ? + ' ': '2', + (ackskew >= -MAXACKWINDOW) ? ' ' : '3', + (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4', + SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) ?' ' :'5', + SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6'); + } + REASON_SET(reason, PFRES_BADSTATE); + return (PF_DROP); + } + + /* Any packets which have gotten here are to be passed */ + return (PF_PASS); +} + +int +pf_tcp_track_sloppy(struct pf_state_peer *src, struct pf_state_peer *dst, + struct pf_state **state, struct pf_pdesc *pd, u_short *reason) +{ + struct tcphdr *th = pd->hdr.tcp; + + if (th->th_flags & TH_SYN) + if (src->state < TCPS_SYN_SENT) + src->state = TCPS_SYN_SENT; + if (th->th_flags & TH_FIN) + if (src->state < TCPS_CLOSING) + src->state = TCPS_CLOSING; + if (th->th_flags & TH_ACK) { + if (dst->state == TCPS_SYN_SENT) { + dst->state = TCPS_ESTABLISHED; + if (src->state == TCPS_ESTABLISHED && + (*state)->src_node != NULL && + pf_src_connlimit(state)) { + REASON_SET(reason, PFRES_SRCLIMIT); + return (PF_DROP); + } + } else if (dst->state == TCPS_CLOSING) { + dst->state = TCPS_FIN_WAIT_2; + } else if (src->state == TCPS_SYN_SENT && + dst->state < TCPS_SYN_SENT) { + /* + * Handle a special sloppy case where we only see one + * half of the connection. If there is a ACK after + * the initial SYN without ever seeing a packet from + * the destination, set the connection to established. + */ + dst->state = src->state = TCPS_ESTABLISHED; + if ((*state)->src_node != NULL && + pf_src_connlimit(state)) { + REASON_SET(reason, PFRES_SRCLIMIT); + return (PF_DROP); + } + } else if (src->state == TCPS_CLOSING && + dst->state == TCPS_ESTABLISHED && + dst->seqlo == 0) { + /* + * Handle the closing of half connections where we + * don't see the full bidirectional FIN/ACK+ACK + * handshake. + */ + dst->state = TCPS_CLOSING; + } + } + if (th->th_flags & TH_RST) + src->state = dst->state = TCPS_TIME_WAIT; + + /* update expire time */ + (*state)->expire = time_second; + if (src->state >= TCPS_FIN_WAIT_2 && + dst->state >= TCPS_FIN_WAIT_2) + (*state)->timeout = PFTM_TCP_CLOSED; + else if (src->state >= TCPS_CLOSING && + dst->state >= TCPS_CLOSING) + (*state)->timeout = PFTM_TCP_FIN_WAIT; + else if (src->state < TCPS_ESTABLISHED || + dst->state < TCPS_ESTABLISHED) + (*state)->timeout = PFTM_TCP_OPENING; + else if (src->state >= TCPS_CLOSING || + dst->state >= TCPS_CLOSING) + (*state)->timeout = PFTM_TCP_CLOSING; + else + (*state)->timeout = PFTM_TCP_ESTABLISHED; + + return (PF_PASS); +} + + int pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, struct mbuf *m, int off, void *h, struct pf_pdesc *pd, @@ -4672,10 +5115,6 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, { struct pf_state_cmp key; struct tcphdr *th = pd->hdr.tcp; - u_int16_t win = ntohs(th->th_win); - u_int32_t ack, end, seq, orig_seq; - u_int8_t sws, dws; - int ackskew; int copyback = 0; struct pf_state_peer *src, *dst; @@ -4826,337 +5265,15 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, return (PF_DROP); } - if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) { - sws = src->wscale & PF_WSCALE_MASK; - dws = dst->wscale & PF_WSCALE_MASK; - } else - sws = dws = 0; - - /* - * Sequence tracking algorithm from Guido van Rooij's paper: - * http://www.madison-gurkha.com/publications/tcp_filtering/ - * tcp_filtering.ps - */ - - orig_seq = seq = ntohl(th->th_seq); - if (src->seqlo == 0) { - /* First packet from this end. Set its state */ - - if ((pd->flags & PFDESC_TCP_NORM || dst->scrub) && - src->scrub == NULL) { - if (pf_normalize_tcp_init(m, off, pd, th, src, dst)) { - REASON_SET(reason, PFRES_MEMORY); - return (PF_DROP); - } - } - - /* Deferred generation of sequence number modulator */ - if (dst->seqdiff && !src->seqdiff) { -#ifdef __FreeBSD__ - while ((src->seqdiff = pf_new_isn(*state) - seq) == 0) - ; -#else - while ((src->seqdiff = tcp_rndiss_next() - seq) == 0) - ; -#endif - ack = ntohl(th->th_ack) - dst->seqdiff; - pf_change_a(&th->th_seq, &th->th_sum, htonl(seq + - src->seqdiff), 0); - pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0); - copyback = 1; - } else { - ack = ntohl(th->th_ack); - } - - end = seq + pd->p_len; - if (th->th_flags & TH_SYN) { - end++; - if (dst->wscale & PF_WSCALE_FLAG) { - src->wscale = pf_get_wscale(m, off, th->th_off, - pd->af); - if (src->wscale & PF_WSCALE_FLAG) { - /* Remove scale factor from initial - * window */ - sws = src->wscale & PF_WSCALE_MASK; - win = ((u_int32_t)win + (1 << sws) - 1) - >> sws; - dws = dst->wscale & PF_WSCALE_MASK; - } else { - /* fixup other window */ - dst->max_win <<= dst->wscale & - PF_WSCALE_MASK; - /* in case of a retrans SYN|ACK */ - dst->wscale = 0; - } - } - } - if (th->th_flags & TH_FIN) - end++; - - src->seqlo = seq; - if (src->state < TCPS_SYN_SENT) - src->state = TCPS_SYN_SENT; - - /* - * May need to slide the window (seqhi may have been set by - * the crappy stack check or if we picked up the connection - * after establishment) - */ - if (src->seqhi == 1 || - SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi)) - src->seqhi = end + MAX(1, dst->max_win << dws); - if (win > src->max_win) - src->max_win = win; - + if ((*state)->state_flags & PFSTATE_SLOPPY) { + if (pf_tcp_track_sloppy(src, dst, state, pd, reason) == PF_DROP) + return (PF_DROP); } else { - ack = ntohl(th->th_ack) - dst->seqdiff; - if (src->seqdiff) { - /* Modulate sequence numbers */ - pf_change_a(&th->th_seq, &th->th_sum, htonl(seq + - src->seqdiff), 0); - pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0); - copyback = 1; - } - end = seq + pd->p_len; - if (th->th_flags & TH_SYN) - end++; - if (th->th_flags & TH_FIN) - end++; + if (pf_tcp_track_full(src, dst, state, kif, m, off, pd, reason, + ©back) == PF_DROP) + return (PF_DROP); } - if ((th->th_flags & TH_ACK) == 0) { - /* Let it pass through the ack skew check */ - ack = dst->seqlo; - } else if ((ack == 0 && - (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) || - /* broken tcp stacks do not set ack */ - (dst->state < TCPS_SYN_SENT)) { - /* - * Many stacks (ours included) will set the ACK number in an - * FIN|ACK if the SYN times out -- no sequence to ACK. - */ - ack = dst->seqlo; - } - - if (seq == end) { - /* Ease sequencing restrictions on no data packets */ - seq = src->seqlo; - end = seq; - } - - ackskew = dst->seqlo - ack; - - - /* - * Need to demodulate the sequence numbers in any TCP SACK options - * (Selective ACK). We could optionally validate the SACK values - * against the current ACK window, either forwards or backwards, but - * I'm not confident that SACK has been implemented properly - * everywhere. It wouldn't surprise me if several stacks accidently - * SACK too far backwards of previously ACKed data. There really aren't - * any security implications of bad SACKing unless the target stack - * doesn't validate the option length correctly. Someone trying to - * spoof into a TCP connection won't bother blindly sending SACK - * options anyway. - */ - if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) { - if (pf_modulate_sack(m, off, pd, th, dst)) - copyback = 1; - } - - -#define MAXACKWINDOW (0xffff + 1500) /* 1500 is an arbitrary fudge factor */ - if (SEQ_GEQ(src->seqhi, end) && - /* Last octet inside other's window space */ - SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) && - /* Retrans: not more than one window back */ - (ackskew >= -MAXACKWINDOW) && - /* Acking not more than one reassembled fragment backwards */ - (ackskew <= (MAXACKWINDOW << sws)) && - /* Acking not more than one window forward */ - ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo || - (orig_seq == src->seqlo + 1) || (pd->flags & PFDESC_IP_REAS) == 0)) { - /* Require an exact/+1 sequence match on resets when possible */ - - if (dst->scrub || src->scrub) { - if (pf_normalize_tcp_stateful(m, off, pd, reason, th, - *state, src, dst, ©back)) - return (PF_DROP); - } - - /* update max window */ - if (src->max_win < win) - src->max_win = win; - /* synchronize sequencing */ - if (SEQ_GT(end, src->seqlo)) - src->seqlo = end; - /* slide the window of what the other end can send */ - if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) - dst->seqhi = ack + MAX((win << sws), 1); - - - /* update states */ - if (th->th_flags & TH_SYN) - if (src->state < TCPS_SYN_SENT) - src->state = TCPS_SYN_SENT; - if (th->th_flags & TH_FIN) - if (src->state < TCPS_CLOSING) - src->state = TCPS_CLOSING; - if (th->th_flags & TH_ACK) { - if (dst->state == TCPS_SYN_SENT) { - dst->state = TCPS_ESTABLISHED; - if (src->state == TCPS_ESTABLISHED && - (*state)->src_node != NULL && - pf_src_connlimit(state)) { - REASON_SET(reason, PFRES_SRCLIMIT); - return (PF_DROP); - } - } else if (dst->state == TCPS_CLOSING) - dst->state = TCPS_FIN_WAIT_2; - } - if (th->th_flags & TH_RST) - src->state = dst->state = TCPS_TIME_WAIT; - - /* update expire time */ - (*state)->expire = time_second; - if (src->state >= TCPS_FIN_WAIT_2 && - dst->state >= TCPS_FIN_WAIT_2) - (*state)->timeout = PFTM_TCP_CLOSED; - else if (src->state >= TCPS_CLOSING && - dst->state >= TCPS_CLOSING) - (*state)->timeout = PFTM_TCP_FIN_WAIT; - else if (src->state < TCPS_ESTABLISHED || - dst->state < TCPS_ESTABLISHED) - (*state)->timeout = PFTM_TCP_OPENING; - else if (src->state >= TCPS_CLOSING || - dst->state >= TCPS_CLOSING) - (*state)->timeout = PFTM_TCP_CLOSING; - else - (*state)->timeout = PFTM_TCP_ESTABLISHED; - - /* Fall through to PASS packet */ - - } else if ((dst->state < TCPS_SYN_SENT || - dst->state >= TCPS_FIN_WAIT_2 || - src->state >= TCPS_FIN_WAIT_2) && - SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) && - /* Within a window forward of the originating packet */ - SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) { - /* Within a window backward of the originating packet */ - - /* - * This currently handles three situations: - * 1) Stupid stacks will shotgun SYNs before their peer - * replies. - * 2) When PF catches an already established stream (the - * firewall rebooted, the state table was flushed, routes - * changed...) - * 3) Packets get funky immediately after the connection - * closes (this should catch Solaris spurious ACK|FINs - * that web servers like to spew after a close) - * - * This must be a little more careful than the above code - * since packet floods will also be caught here. We don't - * update the TTL here to mitigate the damage of a packet - * flood and so the same code can handle awkward establishment - * and a loosened connection close. - * In the establishment case, a correct peer response will - * validate the connection, go through the normal state code - * and keep updating the state TTL. - */ - - if (pf_status.debug >= PF_DEBUG_MISC) { - printf("pf: loose state match: "); - pf_print_state(*state); - pf_print_flags(th->th_flags); - printf(" seq=%u (%u) ack=%u len=%u ackskew=%d " - "pkts=%llu:%llu\n", seq, orig_seq, ack, pd->p_len, -#ifdef __FreeBSD__ - ackskew, (unsigned long long)(*state)->packets[0], - (unsigned long long)(*state)->packets[1]); -#else - ackskew, (*state)->packets[0], - (*state)->packets[1]); -#endif - } - - if (dst->scrub || src->scrub) { - if (pf_normalize_tcp_stateful(m, off, pd, reason, th, - *state, src, dst, ©back)) - return (PF_DROP); - } - - /* update max window */ - if (src->max_win < win) - src->max_win = win; - /* synchronize sequencing */ - if (SEQ_GT(end, src->seqlo)) - src->seqlo = end; - /* slide the window of what the other end can send */ - if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) - dst->seqhi = ack + MAX((win << sws), 1); - - /* - * Cannot set dst->seqhi here since this could be a shotgunned - * SYN and not an already established connection. - */ - - if (th->th_flags & TH_FIN) - if (src->state < TCPS_CLOSING) - src->state = TCPS_CLOSING; - if (th->th_flags & TH_RST) - src->state = dst->state = TCPS_TIME_WAIT; - - /* Fall through to PASS packet */ - - } else { - if ((*state)->dst.state == TCPS_SYN_SENT && - (*state)->src.state == TCPS_SYN_SENT) { - /* Send RST for state mismatches during handshake */ - if (!(th->th_flags & TH_RST)) -#ifdef __FreeBSD__ - pf_send_tcp(m, (*state)->rule.ptr, pd->af, -#else - pf_send_tcp((*state)->rule.ptr, pd->af, -#endif - pd->dst, pd->src, th->th_dport, - th->th_sport, ntohl(th->th_ack), 0, - TH_RST, 0, 0, - (*state)->rule.ptr->return_ttl, 1, 0, - pd->eh, kif->pfik_ifp); - src->seqlo = 0; - src->seqhi = 1; - src->max_win = 1; - } else if (pf_status.debug >= PF_DEBUG_MISC) { - printf("pf: BAD state: "); - pf_print_state(*state); - pf_print_flags(th->th_flags); - printf(" seq=%u (%u) ack=%u len=%u ackskew=%d " - "pkts=%llu:%llu dir=%s,%s\n", - seq, orig_seq, ack, pd->p_len, ackskew, -#ifdef __FreeBSD__ - (unsigned long long)(*state)->packets[0], - (unsigned long long)(*state)->packets[1], -#else - (*state)->packets[0], (*state)->packets[1], -#endif - direction == PF_IN ? "in" : "out", - direction == (*state)->direction ? "fwd" : "rev"); - printf("pf: State failure on: %c %c %c %c | %c %c\n", - SEQ_GEQ(src->seqhi, end) ? ' ' : '1', - SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ? - ' ': '2', - (ackskew >= -MAXACKWINDOW) ? ' ' : '3', - (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4', - SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) ?' ' :'5', - SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6'); - } - REASON_SET(reason, PFRES_BADSTATE); - return (PF_DROP); - } - - /* Any packets which have gotten here are to be passed */ - /* translate source/destination address, if necessary */ if (STATE_TRANSLATE(*state)) { if (direction == PF_OUT) @@ -5533,8 +5650,9 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, copyback = 1; } - if (!SEQ_GEQ(src->seqhi, seq) || - !SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws))) { + if (!((*state)->state_flags & PFSTATE_SLOPPY) && + (!SEQ_GEQ(src->seqhi, seq) || + !SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)))) { if (pf_status.debug >= PF_DEBUG_MISC) { printf("pf: BAD ICMP %d:%d ", icmptype, pd->hdr.icmp->icmp_code); @@ -7052,7 +7170,7 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, done: if (action == PF_PASS && h->ip_hl > 5 && - !((s && s->allow_opts) || r->allow_opts)) { + !((s && s->state_flags & PFSTATE_ALLOWOPTS) || r->allow_opts)) { action = PF_DROP; REASON_SET(&reason, PFRES_IPOPTIONS); log = 1; @@ -7513,7 +7631,7 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, done: /* handle dangerous IPv6 extension headers. */ if (action == PF_PASS && rh_cnt && - !((s && s->allow_opts) || r->allow_opts)) { + !((s && s->state_flags & PFSTATE_ALLOWOPTS) || r->allow_opts)) { action = PF_DROP; REASON_SET(&reason, PFRES_IPOPTIONS); log = 1; diff --git a/sys/contrib/pf/net/pfvar.h b/sys/contrib/pf/net/pfvar.h index 32e721c5733f..de175b1a0213 100644 --- a/sys/contrib/pf/net/pfvar.h +++ b/sys/contrib/pf/net/pfvar.h @@ -700,6 +700,7 @@ struct pf_rule { /* rule flags again */ #define PFRULE_IFBOUND 0x00010000 /* if-bound */ +#define PFRULE_STATESLOPPY 0x00020000 /* sloppy state tracking */ #define PFSTATE_HIWAT 10000 /* default state table size */ #define PFSTATE_ADAPT_START 6000 /* default adaptive timeout start */ @@ -800,7 +801,9 @@ struct pf_state { u_int8_t pad; #endif u_int8_t log; - u_int8_t allow_opts; + u_int8_t state_flags; +#define PFSTATE_ALLOWOPTS 0x01 +#define PFSTATE_SLOPPY 0x02 u_int8_t timeout; u_int8_t sync_flags; #define PFSTATE_NOSYNC 0x01