Revert r265338, r271089 and r271123 as those changes do not handle

non-inline urgent data and introduce an mbuf exhaustion attack vector
similar to FreeBSD-SA-15:15.tcp, but not requiring VNETs.

Address the issue described in FreeBSD-SA-15:15.tcp.

Reviewed by:	glebius
Approved by:	so
Approved by:	jmallett (mentor)
Security:	FreeBSD-SA-15:15.tcp
Sponsored by:	Norse Corp, Inc.
This commit is contained in:
Patrick Kelsey 2015-07-29 17:59:13 +00:00
parent a80ac30b2d
commit 4741bfcb57
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=286027
6 changed files with 163 additions and 92 deletions

View File

@ -1665,7 +1665,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
tp->snd_nxt == tp->snd_max &&
tiwin && tiwin == tp->snd_wnd &&
((tp->t_flags & (TF_NEEDSYN|TF_NEEDFIN)) == 0) &&
tp->t_segq == NULL && ((to.to_flags & TOF_TS) == 0 ||
LIST_EMPTY(&tp->t_segq) &&
((to.to_flags & TOF_TS) == 0 ||
TSTMP_GEQ(to.to_tsval, tp->ts_recent)) ) {
/*
@ -2903,7 +2904,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
* immediately when segments are out of order (so
* fast retransmit can work).
*/
if (th->th_seq == tp->rcv_nxt && tp->t_segq == NULL &&
if (th->th_seq == tp->rcv_nxt &&
LIST_EMPTY(&tp->t_segq) &&
TCPS_HAVEESTABLISHED(tp->t_state)) {
if (DELAY_ACK(tp, tlen))
tp->t_flags |= TF_DELACK;

View File

@ -71,33 +71,80 @@ __FBSDID("$FreeBSD$");
#include <netinet/tcp_var.h>
#include <netinet6/tcp6_var.h>
#include <netinet/tcpip.h>
#ifdef TCPDEBUG
#include <netinet/tcp_debug.h>
#endif /* TCPDEBUG */
static SYSCTL_NODE(_net_inet_tcp, OID_AUTO, reass, CTLFLAG_RW, 0,
"TCP Segment Reassembly Queue");
static int tcp_reass_maxseg = 0;
SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, maxsegments, CTLFLAG_RDTUN,
&tcp_reass_maxseg, 0,
"Global maximum number of TCP Segments in Reassembly Queue");
static uma_zone_t tcp_reass_zone;
SYSCTL_UMA_CUR(_net_inet_tcp_reass, OID_AUTO, cursegments, CTLFLAG_VNET,
&tcp_reass_zone,
"Global number of TCP Segments currently in Reassembly Queue");
/* Initialize TCP reassembly queue */
static void
tcp_reass_zone_change(void *tag)
{
/* Set the zone limit and read back the effective value. */
tcp_reass_maxseg = nmbclusters / 16;
tcp_reass_maxseg = uma_zone_set_max(tcp_reass_zone,
tcp_reass_maxseg);
}
void
tcp_reass_global_init(void)
{
tcp_reass_maxseg = nmbclusters / 16;
TUNABLE_INT_FETCH("net.inet.tcp.reass.maxsegments",
&tcp_reass_maxseg);
tcp_reass_zone = uma_zcreate("tcpreass", sizeof (struct tseg_qent),
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
/* Set the zone limit and read back the effective value. */
tcp_reass_maxseg = uma_zone_set_max(tcp_reass_zone,
tcp_reass_maxseg);
EVENTHANDLER_REGISTER(nmbclusters_change,
tcp_reass_zone_change, NULL, EVENTHANDLER_PRI_ANY);
}
void
tcp_reass_flush(struct tcpcb *tp)
{
struct mbuf *m;
struct tseg_qent *qe;
INP_WLOCK_ASSERT(tp->t_inpcb);
while ((m = tp->t_segq) != NULL) {
tp->t_segq = m->m_nextpkt;
tp->t_segqlen -= m->m_pkthdr.len;
m_freem(m);
while ((qe = LIST_FIRST(&tp->t_segq)) != NULL) {
LIST_REMOVE(qe, tqe_q);
m_freem(qe->tqe_m);
uma_zfree(tcp_reass_zone, qe);
tp->t_segqlen--;
}
KASSERT((tp->t_segqlen == 0),
("TCP reass queue %p length is %d instead of 0 after flush.",
("TCP reass queue %p segment count is %d instead of 0 after flush.",
tp, tp->t_segqlen));
}
#define M_TCPHDR(m) ((struct tcphdr *)((m)->m_pkthdr.pkt_tcphdr))
int
tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m)
{
struct tseg_qent *q;
struct tseg_qent *p = NULL;
struct tseg_qent *nq;
struct tseg_qent *te = NULL;
struct socket *so = tp->t_inpcb->inp_socket;
struct mbuf *mq, *mp;
int flags, wakeup;
char *s = NULL;
int flags;
struct tseg_qent tqs;
INP_WLOCK_ASSERT(tp->t_inpcb);
@ -113,10 +160,6 @@ tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m)
if (th == NULL)
goto present;
M_ASSERTPKTHDR(m);
KASSERT(*tlenp == m->m_pkthdr.len, ("%s: tlenp %u len %u", __func__,
*tlenp, m->m_pkthdr.len));
/*
* Limit the number of segments that can be queued to reduce the
* potential for mbuf exhaustion. For best performance, we want to be
@ -127,15 +170,17 @@ tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m)
* Always let the missing segment through which caused this queue.
* NB: Access to the socket buffer is left intentionally unlocked as we
* can tolerate stale information here.
*
* XXXLAS: Using sbspace(so->so_rcv) instead of so->so_rcv.sb_hiwat
* should work but causes packets to be dropped when they shouldn't.
* Investigate why and re-evaluate the below limit after the behaviour
* is understood.
*/
if ((th->th_seq != tp->rcv_nxt || !TCPS_HAVEESTABLISHED(tp->t_state)) &&
tp->t_segqlen + m->m_pkthdr.len >= sbspace(&so->so_rcv)) {
char *s;
tp->t_segqlen >= (so->so_rcv.sb_hiwat / tp->t_maxseg) + 1) {
TCPSTAT_INC(tcps_rcvreassfull);
*tlenp = 0;
if ((s = tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL,
NULL))) {
if ((s = tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL, NULL))) {
log(LOG_DEBUG, "%s; %s: queue limit reached, "
"segment dropped\n", s, __func__);
free(s, M_TCPLOG);
@ -144,14 +189,47 @@ tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m)
return (0);
}
/*
* Allocate a new queue entry. If we can't, or hit the zone limit
* just drop the pkt.
*
* Use a temporary structure on the stack for the missing segment
* when the zone is exhausted. Otherwise we may get stuck.
*/
te = uma_zalloc(tcp_reass_zone, M_NOWAIT);
if (te == NULL) {
if (th->th_seq != tp->rcv_nxt || !TCPS_HAVEESTABLISHED(tp->t_state)) {
TCPSTAT_INC(tcps_rcvmemdrop);
m_freem(m);
*tlenp = 0;
if ((s = tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL,
NULL))) {
log(LOG_DEBUG, "%s; %s: global zone limit "
"reached, segment dropped\n", s, __func__);
free(s, M_TCPLOG);
}
return (0);
} else {
bzero(&tqs, sizeof(struct tseg_qent));
te = &tqs;
if ((s = tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL,
NULL))) {
log(LOG_DEBUG,
"%s; %s: global zone limit reached, using "
"stack for missing segment\n", s, __func__);
free(s, M_TCPLOG);
}
}
}
tp->t_segqlen++;
/*
* Find a segment which begins after this one does.
*/
mp = NULL;
for (mq = tp->t_segq; mq != NULL; mq = mq->m_nextpkt) {
if (SEQ_GT(M_TCPHDR(mq)->th_seq, th->th_seq))
LIST_FOREACH(q, &tp->t_segq, tqe_q) {
if (SEQ_GT(q->tqe_th->th_seq, th->th_seq))
break;
mp = mq;
p = q;
}
/*
@ -159,16 +237,18 @@ tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m)
* our data already. If so, drop the data from the incoming
* segment. If it provides all of our data, drop us.
*/
if (mp != NULL) {
if (p != NULL) {
int i;
/* conversion to int (in i) handles seq wraparound */
i = M_TCPHDR(mp)->th_seq + mp->m_pkthdr.len - th->th_seq;
i = p->tqe_th->th_seq + p->tqe_len - th->th_seq;
if (i > 0) {
if (i >= *tlenp) {
TCPSTAT_INC(tcps_rcvduppack);
TCPSTAT_ADD(tcps_rcvdupbyte, *tlenp);
m_freem(m);
if (te != &tqs)
uma_zfree(tcp_reass_zone, te);
tp->t_segqlen--;
/*
* Try to present any queued data
* at the left window edge to the user.
@ -190,54 +270,37 @@ tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m)
* While we overlap succeeding segments trim them or,
* if they are completely covered, dequeue them.
*/
while (mq) {
struct mbuf *nq;
int i;
i = (th->th_seq + *tlenp) - M_TCPHDR(mq)->th_seq;
while (q) {
int i = (th->th_seq + *tlenp) - q->tqe_th->th_seq;
if (i <= 0)
break;
if (i < mq->m_pkthdr.len) {
M_TCPHDR(mq)->th_seq += i;
m_adj(mq, i);
tp->t_segqlen -= i;
if (i < q->tqe_len) {
q->tqe_th->th_seq += i;
q->tqe_len -= i;
m_adj(q->tqe_m, i);
break;
}
nq = mq->m_nextpkt;
tp->t_segqlen -= mq->m_pkthdr.len;
m_freem(mq);
if (mp)
mp->m_nextpkt = nq;
else
tp->t_segq = nq;
mq = nq;
nq = LIST_NEXT(q, tqe_q);
LIST_REMOVE(q, tqe_q);
m_freem(q->tqe_m);
uma_zfree(tcp_reass_zone, q);
tp->t_segqlen--;
q = nq;
}
/*
* Insert the new segment queue entry into place. Try to collapse
* mbuf chains if segments are adjacent.
*/
if (mp) {
if (M_TCPHDR(mp)->th_seq + mp->m_pkthdr.len == th->th_seq)
m_catpkt(mp, m);
else {
m->m_nextpkt = mp->m_nextpkt;
mp->m_nextpkt = m;
m->m_pkthdr.pkt_tcphdr = th;
}
/* Insert the new segment queue entry into place. */
te->tqe_m = m;
te->tqe_th = th;
te->tqe_len = *tlenp;
if (p == NULL) {
LIST_INSERT_HEAD(&tp->t_segq, te, tqe_q);
} else {
mq = tp->t_segq;
tp->t_segq = m;
if (mq && th->th_seq + *tlenp == M_TCPHDR(mq)->th_seq) {
m->m_nextpkt = mq->m_nextpkt;
mq->m_nextpkt = NULL;
m_catpkt(m, mq);
} else
m->m_nextpkt = mq;
m->m_pkthdr.pkt_tcphdr = th;
KASSERT(te != &tqs, ("%s: temporary stack based entry not "
"first element in queue", __func__));
LIST_INSERT_AFTER(p, te, tqe_q);
}
tp->t_segqlen += *tlenp;
present:
/*
@ -246,30 +309,25 @@ tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m)
*/
if (!TCPS_HAVEESTABLISHED(tp->t_state))
return (0);
flags = 0;
wakeup = 0;
q = LIST_FIRST(&tp->t_segq);
if (!q || q->tqe_th->th_seq != tp->rcv_nxt)
return (0);
SOCKBUF_LOCK(&so->so_rcv);
while ((mq = tp->t_segq) != NULL &&
M_TCPHDR(mq)->th_seq == tp->rcv_nxt) {
tp->t_segq = mq->m_nextpkt;
tp->rcv_nxt += mq->m_pkthdr.len;
tp->t_segqlen -= mq->m_pkthdr.len;
flags = M_TCPHDR(mq)->th_flags & TH_FIN;
do {
tp->rcv_nxt += q->tqe_len;
flags = q->tqe_th->th_flags & TH_FIN;
nq = LIST_NEXT(q, tqe_q);
LIST_REMOVE(q, tqe_q);
if (so->so_rcv.sb_state & SBS_CANTRCVMORE)
m_freem(mq);
else {
mq->m_nextpkt = NULL;
sbappendstream_locked(&so->so_rcv, mq, 0);
wakeup = 1;
}
}
m_freem(q->tqe_m);
else
sbappendstream_locked(&so->so_rcv, q->tqe_m, 0);
if (q != &tqs)
uma_zfree(tcp_reass_zone, q);
tp->t_segqlen--;
q = nq;
} while (q && q->tqe_th->th_seq == tp->rcv_nxt);
ND6_HINT(tp);
if (wakeup)
sorwakeup_locked(so);
else
SOCKBUF_UNLOCK(&so->so_rcv);
sorwakeup_locked(so);
return (flags);
}

View File

@ -385,6 +385,8 @@ tcp_init(void)
if (!IS_DEFAULT_VNET(curvnet))
return;
tcp_reass_global_init();
/* XXX virtualize those bellow? */
tcp_delacktime = TCPTV_DELACK;
tcp_keepinit = TCPTV_KEEP_INIT;

View File

@ -1977,7 +1977,7 @@ db_print_tcpcb(struct tcpcb *tp, const char *name, int indent)
db_print_indent(indent);
db_printf("t_segq first: %p t_segqlen: %d t_dupacks: %d\n",
tp->t_segq, tp->t_segqlen, tp->t_dupacks);
LIST_FIRST(&tp->t_segq), tp->t_segqlen, tp->t_dupacks);
db_print_indent(indent);
db_printf("tt_rexmt: %p tt_persist: %p tt_keep: %p\n",

View File

@ -46,6 +46,15 @@ VNET_DECLARE(int, tcp_do_rfc1323);
#endif /* _KERNEL */
/* TCP segment queue entry */
struct tseg_qent {
LIST_ENTRY(tseg_qent) tqe_q;
int tqe_len; /* TCP segment data length */
struct tcphdr *tqe_th; /* a pointer to tcp header */
struct mbuf *tqe_m; /* mbuf contains packet */
};
LIST_HEAD(tsegqe_head, tseg_qent);
struct sackblk {
tcp_seq start; /* start seq no. of sack block */
tcp_seq end; /* end seq no. */
@ -91,7 +100,7 @@ do { \
* Organized for 16 byte cacheline efficiency.
*/
struct tcpcb {
struct mbuf *t_segq; /* segment reassembly queue */
struct tsegqe_head t_segq; /* segment reassembly queue */
void *t_pspare[2]; /* new reassembly queue */
int t_segqlen; /* segment reassembly queue length */
int t_dupacks; /* consecutive dup acks recd */
@ -667,6 +676,7 @@ char *tcp_log_addrs(struct in_conninfo *, struct tcphdr *, void *,
char *tcp_log_vain(struct in_conninfo *, struct tcphdr *, void *,
const void *);
int tcp_reass(struct tcpcb *, struct tcphdr *, int *, struct mbuf *);
void tcp_reass_global_init(void);
void tcp_reass_flush(struct tcpcb *);
int tcp_input(struct mbuf **, int *, int);
u_long tcp_maxmtu(struct in_conninfo *, struct tcp_ifcap *);

View File

@ -150,7 +150,6 @@ struct pkthdr {
#define tso_segsz PH_per.sixteen[1]
#define csum_phsum PH_per.sixteen[2]
#define csum_data PH_per.thirtytwo[1]
#define pkt_tcphdr PH_loc.ptr
/*
* Description of external storage mapped into mbuf; valid only if M_EXT is