freebsd-skq/sys/netinet/tcp_syncache.h
andre fc2be30b21 Improve SYN cookies by encoding the MSS, WSCALE (window scaling) and SACK
information into the ISN (initial sequence number) without the additional
use of timestamp bits and switching to the very fast and cryptographically
strong SipHash-2-4 MAC hash algorithm to protect the SYN cookie against
forgeries.

The purpose of SYN cookies is to encode all necessary session state in
the 32 bits of our initial sequence number to avoid storing any information
locally in memory.  This is especially important when under heavy spoofed
SYN attacks where we would either run out of memory or the syncache would
fill with bogus connection attempts swamping out legitimate connections.

The original SYN cookies method only stored an indexed MSS values in the
cookie.  This isn't sufficient anymore and breaks down in the presence of
WSCALE information which is only exchanged during SYN and SYN-ACK.  If we
can't keep track of it then we may severely underestimate the available
send or receive window. This is compounded with large windows whose size
information on the TCP segment header is even lower numerically.  A number
of years back SYN cookies were extended to store the additional state in
the TCP timestamp fields, if available on a connection.  While timestamps
are common among the BSD, Linux and other *nix systems Windows never enabled
them by default and thus are not present for the vast majority of clients
seen on the Internet.

The common parameters used on TCP sessions have changed quite a bit since
SYN cookies very invented some 17 years ago.  Today we have a lot more
bandwidth available making the use window scaling almost mandatory.  Also
SACK has become standard making recovering from packet loss much more
efficient.

This change moves all necessary information into the ISS removing the need
for timestamps.  Both the MSS (16 bits) and send WSCALE (4 bits) are stored
in 3 bit indexed form together with a single bit for SACK.  While this is
significantly less than the original range, it is sufficient to encode all
common values with minimal rounding.

The MSS depends on the MTU of the path and with the dominance of ethernet
the main value seen is around 1460 bytes.  Encapsulations for DSL lines
and some other overheads reduce it by a few more bytes for many connections
seen.  Rounding down to the next lower value in some cases isn't a problem
as we send only slightly more packets for the same amount of data.

The send WSCALE index is bit more tricky as rounding down under-estimates
the available send space available towards the remote host, however a small
number values dominate and are carefully selected again.

The receive WSCALE isn't encoded at all but recalculated based on the local
receive socket buffer size when a valid SYN cookie returns.  A listen socket
buffer size is unlikely to change while active.

The index values for MSS and WSCALE are selected for minimal rounding errors
based on large traffic surveys.  These values have to be periodically
validated against newer traffic surveys adjusting the arrays tcp_sc_msstab[]
and tcp_sc_wstab[] if necessary.

In addition the hash MAC to protect the SYN cookies is changed from MD5
to SipHash-2-4, a much faster and cryptographically secure algorithm.

Reviewed by:	dwmalone
Tested by:	Fabian Keil <fk@fabiankeil.de>
2013-07-11 15:29:25 +00:00

138 lines
4.7 KiB
C

/*-
* Copyright (c) 1982, 1986, 1993, 1994, 1995
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)tcp_var.h 8.4 (Berkeley) 5/24/95
* $FreeBSD$
*/
#ifndef _NETINET_TCP_SYNCACHE_H_
#define _NETINET_TCP_SYNCACHE_H_
#ifdef _KERNEL
void syncache_init(void);
#ifdef VIMAGE
void syncache_destroy(void);
#endif
void syncache_unreach(struct in_conninfo *, struct tcphdr *);
int syncache_expand(struct in_conninfo *, struct tcpopt *,
struct tcphdr *, struct socket **, struct mbuf *);
void syncache_add(struct in_conninfo *, struct tcpopt *,
struct tcphdr *, struct inpcb *, struct socket **, struct mbuf *,
void *, void *);
void syncache_chkrst(struct in_conninfo *, struct tcphdr *);
void syncache_badack(struct in_conninfo *);
int syncache_pcbcount(void);
int syncache_pcblist(struct sysctl_req *req, int max_pcbs, int *pcbs_exported);
struct syncache {
TAILQ_ENTRY(syncache) sc_hash;
struct in_conninfo sc_inc; /* addresses */
int sc_rxttime; /* retransmit time */
u_int16_t sc_rxmits; /* retransmit counter */
u_int32_t sc_tsreflect; /* timestamp to reflect */
u_int32_t sc_ts; /* our timestamp to send */
u_int32_t sc_tsoff; /* ts offset w/ syncookies */
u_int32_t sc_flowlabel; /* IPv6 flowlabel */
tcp_seq sc_irs; /* seq from peer */
tcp_seq sc_iss; /* our ISS */
struct mbuf *sc_ipopts; /* source route */
u_int16_t sc_peer_mss; /* peer's MSS */
u_int16_t sc_wnd; /* advertised window */
u_int8_t sc_ip_ttl; /* IPv4 TTL */
u_int8_t sc_ip_tos; /* IPv4 TOS */
u_int8_t sc_requested_s_scale:4,
sc_requested_r_scale:4;
u_int16_t sc_flags;
#if defined(TCP_OFFLOAD) || !defined(TCP_OFFLOAD_DISABLE)
struct toedev *sc_tod; /* entry added by this TOE */
void *sc_todctx; /* TOE driver context */
#endif
struct label *sc_label; /* MAC label reference */
struct ucred *sc_cred; /* cred cache for jail checks */
u_int32_t sc_spare[2]; /* UTO */
};
/*
* Flags for the sc_flags field.
*/
#define SCF_NOOPT 0x01 /* no TCP options */
#define SCF_WINSCALE 0x02 /* negotiated window scaling */
#define SCF_TIMESTAMP 0x04 /* negotiated timestamps */
/* MSS is implicit */
#define SCF_UNREACH 0x10 /* icmp unreachable received */
#define SCF_SIGNATURE 0x20 /* send MD5 digests */
#define SCF_SACK 0x80 /* send SACK option */
#define SCF_ECN 0x100 /* send ECN setup packet */
struct syncache_head {
struct mtx sch_mtx;
TAILQ_HEAD(sch_head, syncache) sch_bucket;
struct callout sch_timer;
int sch_nextc;
u_int sch_length;
struct tcp_syncache *sch_sc;
};
#define SYNCOOKIE_SECRET_SIZE 16
#define SYNCOOKIE_LIFETIME 15 /* seconds */
struct syncookie_secret {
volatile u_int oddeven;
uint8_t key[2][SYNCOOKIE_SECRET_SIZE];
struct callout reseed;
u_int lifetime;
};
struct tcp_syncache {
struct syncache_head *hashbase;
uma_zone_t zone;
u_int hashsize;
u_int hashmask;
u_int bucket_limit;
u_int cache_limit;
u_int rexmt_limit;
u_int hash_secret;
struct vnet *sch_vnet;
struct syncookie_secret secret;
};
/* Internal use for the syncookie functions. */
union syncookie {
uint8_t cookie;
struct {
uint8_t odd_even:1,
sack_ok:1,
wscale_idx:3,
mss_idx:3;
} flags;
};
#endif /* _KERNEL */
#endif /* !_NETINET_TCP_SYNCACHE_H_ */