Revert r265338, r271089 and r271123 as those changes do not handle
non-inline urgent data and introduce an mbuf exhaustion attack vector similar to FreeBSD-SA-15:15.tcp, but not requiring VNETs. Address the issue described in FreeBSD-SA-15:15.tcp. Reviewed by: glebius Approved by: so Approved by: jmallett (mentor) Security: FreeBSD-SA-15:15.tcp Sponsored by: Norse Corp, Inc.
This commit is contained in:
parent
a80ac30b2d
commit
4741bfcb57
@ -1665,7 +1665,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
tp->snd_nxt == tp->snd_max &&
|
||||
tiwin && tiwin == tp->snd_wnd &&
|
||||
((tp->t_flags & (TF_NEEDSYN|TF_NEEDFIN)) == 0) &&
|
||||
tp->t_segq == NULL && ((to.to_flags & TOF_TS) == 0 ||
|
||||
LIST_EMPTY(&tp->t_segq) &&
|
||||
((to.to_flags & TOF_TS) == 0 ||
|
||||
TSTMP_GEQ(to.to_tsval, tp->ts_recent)) ) {
|
||||
|
||||
/*
|
||||
@ -2903,7 +2904,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
* immediately when segments are out of order (so
|
||||
* fast retransmit can work).
|
||||
*/
|
||||
if (th->th_seq == tp->rcv_nxt && tp->t_segq == NULL &&
|
||||
if (th->th_seq == tp->rcv_nxt &&
|
||||
LIST_EMPTY(&tp->t_segq) &&
|
||||
TCPS_HAVEESTABLISHED(tp->t_state)) {
|
||||
if (DELAY_ACK(tp, tlen))
|
||||
tp->t_flags |= TF_DELACK;
|
||||
|
@ -71,33 +71,80 @@ __FBSDID("$FreeBSD$");
|
||||
#include <netinet/tcp_var.h>
|
||||
#include <netinet6/tcp6_var.h>
|
||||
#include <netinet/tcpip.h>
|
||||
#ifdef TCPDEBUG
|
||||
#include <netinet/tcp_debug.h>
|
||||
#endif /* TCPDEBUG */
|
||||
|
||||
static SYSCTL_NODE(_net_inet_tcp, OID_AUTO, reass, CTLFLAG_RW, 0,
|
||||
"TCP Segment Reassembly Queue");
|
||||
|
||||
static int tcp_reass_maxseg = 0;
|
||||
SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, maxsegments, CTLFLAG_RDTUN,
|
||||
&tcp_reass_maxseg, 0,
|
||||
"Global maximum number of TCP Segments in Reassembly Queue");
|
||||
|
||||
static uma_zone_t tcp_reass_zone;
|
||||
SYSCTL_UMA_CUR(_net_inet_tcp_reass, OID_AUTO, cursegments, CTLFLAG_VNET,
|
||||
&tcp_reass_zone,
|
||||
"Global number of TCP Segments currently in Reassembly Queue");
|
||||
|
||||
/* Initialize TCP reassembly queue */
|
||||
static void
|
||||
tcp_reass_zone_change(void *tag)
|
||||
{
|
||||
|
||||
/* Set the zone limit and read back the effective value. */
|
||||
tcp_reass_maxseg = nmbclusters / 16;
|
||||
tcp_reass_maxseg = uma_zone_set_max(tcp_reass_zone,
|
||||
tcp_reass_maxseg);
|
||||
}
|
||||
|
||||
void
|
||||
tcp_reass_global_init(void)
|
||||
{
|
||||
|
||||
tcp_reass_maxseg = nmbclusters / 16;
|
||||
TUNABLE_INT_FETCH("net.inet.tcp.reass.maxsegments",
|
||||
&tcp_reass_maxseg);
|
||||
tcp_reass_zone = uma_zcreate("tcpreass", sizeof (struct tseg_qent),
|
||||
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
|
||||
/* Set the zone limit and read back the effective value. */
|
||||
tcp_reass_maxseg = uma_zone_set_max(tcp_reass_zone,
|
||||
tcp_reass_maxseg);
|
||||
EVENTHANDLER_REGISTER(nmbclusters_change,
|
||||
tcp_reass_zone_change, NULL, EVENTHANDLER_PRI_ANY);
|
||||
}
|
||||
|
||||
void
|
||||
tcp_reass_flush(struct tcpcb *tp)
|
||||
{
|
||||
struct mbuf *m;
|
||||
struct tseg_qent *qe;
|
||||
|
||||
INP_WLOCK_ASSERT(tp->t_inpcb);
|
||||
|
||||
while ((m = tp->t_segq) != NULL) {
|
||||
tp->t_segq = m->m_nextpkt;
|
||||
tp->t_segqlen -= m->m_pkthdr.len;
|
||||
m_freem(m);
|
||||
while ((qe = LIST_FIRST(&tp->t_segq)) != NULL) {
|
||||
LIST_REMOVE(qe, tqe_q);
|
||||
m_freem(qe->tqe_m);
|
||||
uma_zfree(tcp_reass_zone, qe);
|
||||
tp->t_segqlen--;
|
||||
}
|
||||
|
||||
KASSERT((tp->t_segqlen == 0),
|
||||
("TCP reass queue %p length is %d instead of 0 after flush.",
|
||||
("TCP reass queue %p segment count is %d instead of 0 after flush.",
|
||||
tp, tp->t_segqlen));
|
||||
}
|
||||
|
||||
#define M_TCPHDR(m) ((struct tcphdr *)((m)->m_pkthdr.pkt_tcphdr))
|
||||
|
||||
int
|
||||
tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m)
|
||||
{
|
||||
struct tseg_qent *q;
|
||||
struct tseg_qent *p = NULL;
|
||||
struct tseg_qent *nq;
|
||||
struct tseg_qent *te = NULL;
|
||||
struct socket *so = tp->t_inpcb->inp_socket;
|
||||
struct mbuf *mq, *mp;
|
||||
int flags, wakeup;
|
||||
char *s = NULL;
|
||||
int flags;
|
||||
struct tseg_qent tqs;
|
||||
|
||||
INP_WLOCK_ASSERT(tp->t_inpcb);
|
||||
|
||||
@ -113,10 +160,6 @@ tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m)
|
||||
if (th == NULL)
|
||||
goto present;
|
||||
|
||||
M_ASSERTPKTHDR(m);
|
||||
KASSERT(*tlenp == m->m_pkthdr.len, ("%s: tlenp %u len %u", __func__,
|
||||
*tlenp, m->m_pkthdr.len));
|
||||
|
||||
/*
|
||||
* Limit the number of segments that can be queued to reduce the
|
||||
* potential for mbuf exhaustion. For best performance, we want to be
|
||||
@ -127,15 +170,17 @@ tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m)
|
||||
* Always let the missing segment through which caused this queue.
|
||||
* NB: Access to the socket buffer is left intentionally unlocked as we
|
||||
* can tolerate stale information here.
|
||||
*
|
||||
* XXXLAS: Using sbspace(so->so_rcv) instead of so->so_rcv.sb_hiwat
|
||||
* should work but causes packets to be dropped when they shouldn't.
|
||||
* Investigate why and re-evaluate the below limit after the behaviour
|
||||
* is understood.
|
||||
*/
|
||||
if ((th->th_seq != tp->rcv_nxt || !TCPS_HAVEESTABLISHED(tp->t_state)) &&
|
||||
tp->t_segqlen + m->m_pkthdr.len >= sbspace(&so->so_rcv)) {
|
||||
char *s;
|
||||
|
||||
tp->t_segqlen >= (so->so_rcv.sb_hiwat / tp->t_maxseg) + 1) {
|
||||
TCPSTAT_INC(tcps_rcvreassfull);
|
||||
*tlenp = 0;
|
||||
if ((s = tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL,
|
||||
NULL))) {
|
||||
if ((s = tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL, NULL))) {
|
||||
log(LOG_DEBUG, "%s; %s: queue limit reached, "
|
||||
"segment dropped\n", s, __func__);
|
||||
free(s, M_TCPLOG);
|
||||
@ -144,14 +189,47 @@ tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m)
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate a new queue entry. If we can't, or hit the zone limit
|
||||
* just drop the pkt.
|
||||
*
|
||||
* Use a temporary structure on the stack for the missing segment
|
||||
* when the zone is exhausted. Otherwise we may get stuck.
|
||||
*/
|
||||
te = uma_zalloc(tcp_reass_zone, M_NOWAIT);
|
||||
if (te == NULL) {
|
||||
if (th->th_seq != tp->rcv_nxt || !TCPS_HAVEESTABLISHED(tp->t_state)) {
|
||||
TCPSTAT_INC(tcps_rcvmemdrop);
|
||||
m_freem(m);
|
||||
*tlenp = 0;
|
||||
if ((s = tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL,
|
||||
NULL))) {
|
||||
log(LOG_DEBUG, "%s; %s: global zone limit "
|
||||
"reached, segment dropped\n", s, __func__);
|
||||
free(s, M_TCPLOG);
|
||||
}
|
||||
return (0);
|
||||
} else {
|
||||
bzero(&tqs, sizeof(struct tseg_qent));
|
||||
te = &tqs;
|
||||
if ((s = tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL,
|
||||
NULL))) {
|
||||
log(LOG_DEBUG,
|
||||
"%s; %s: global zone limit reached, using "
|
||||
"stack for missing segment\n", s, __func__);
|
||||
free(s, M_TCPLOG);
|
||||
}
|
||||
}
|
||||
}
|
||||
tp->t_segqlen++;
|
||||
|
||||
/*
|
||||
* Find a segment which begins after this one does.
|
||||
*/
|
||||
mp = NULL;
|
||||
for (mq = tp->t_segq; mq != NULL; mq = mq->m_nextpkt) {
|
||||
if (SEQ_GT(M_TCPHDR(mq)->th_seq, th->th_seq))
|
||||
LIST_FOREACH(q, &tp->t_segq, tqe_q) {
|
||||
if (SEQ_GT(q->tqe_th->th_seq, th->th_seq))
|
||||
break;
|
||||
mp = mq;
|
||||
p = q;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -159,16 +237,18 @@ tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m)
|
||||
* our data already. If so, drop the data from the incoming
|
||||
* segment. If it provides all of our data, drop us.
|
||||
*/
|
||||
if (mp != NULL) {
|
||||
if (p != NULL) {
|
||||
int i;
|
||||
|
||||
/* conversion to int (in i) handles seq wraparound */
|
||||
i = M_TCPHDR(mp)->th_seq + mp->m_pkthdr.len - th->th_seq;
|
||||
i = p->tqe_th->th_seq + p->tqe_len - th->th_seq;
|
||||
if (i > 0) {
|
||||
if (i >= *tlenp) {
|
||||
TCPSTAT_INC(tcps_rcvduppack);
|
||||
TCPSTAT_ADD(tcps_rcvdupbyte, *tlenp);
|
||||
m_freem(m);
|
||||
if (te != &tqs)
|
||||
uma_zfree(tcp_reass_zone, te);
|
||||
tp->t_segqlen--;
|
||||
/*
|
||||
* Try to present any queued data
|
||||
* at the left window edge to the user.
|
||||
@ -190,54 +270,37 @@ tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m)
|
||||
* While we overlap succeeding segments trim them or,
|
||||
* if they are completely covered, dequeue them.
|
||||
*/
|
||||
while (mq) {
|
||||
struct mbuf *nq;
|
||||
int i;
|
||||
|
||||
i = (th->th_seq + *tlenp) - M_TCPHDR(mq)->th_seq;
|
||||
while (q) {
|
||||
int i = (th->th_seq + *tlenp) - q->tqe_th->th_seq;
|
||||
if (i <= 0)
|
||||
break;
|
||||
if (i < mq->m_pkthdr.len) {
|
||||
M_TCPHDR(mq)->th_seq += i;
|
||||
m_adj(mq, i);
|
||||
tp->t_segqlen -= i;
|
||||
if (i < q->tqe_len) {
|
||||
q->tqe_th->th_seq += i;
|
||||
q->tqe_len -= i;
|
||||
m_adj(q->tqe_m, i);
|
||||
break;
|
||||
}
|
||||
|
||||
nq = mq->m_nextpkt;
|
||||
tp->t_segqlen -= mq->m_pkthdr.len;
|
||||
m_freem(mq);
|
||||
if (mp)
|
||||
mp->m_nextpkt = nq;
|
||||
else
|
||||
tp->t_segq = nq;
|
||||
mq = nq;
|
||||
nq = LIST_NEXT(q, tqe_q);
|
||||
LIST_REMOVE(q, tqe_q);
|
||||
m_freem(q->tqe_m);
|
||||
uma_zfree(tcp_reass_zone, q);
|
||||
tp->t_segqlen--;
|
||||
q = nq;
|
||||
}
|
||||
|
||||
/*
|
||||
* Insert the new segment queue entry into place. Try to collapse
|
||||
* mbuf chains if segments are adjacent.
|
||||
*/
|
||||
if (mp) {
|
||||
if (M_TCPHDR(mp)->th_seq + mp->m_pkthdr.len == th->th_seq)
|
||||
m_catpkt(mp, m);
|
||||
else {
|
||||
m->m_nextpkt = mp->m_nextpkt;
|
||||
mp->m_nextpkt = m;
|
||||
m->m_pkthdr.pkt_tcphdr = th;
|
||||
}
|
||||
/* Insert the new segment queue entry into place. */
|
||||
te->tqe_m = m;
|
||||
te->tqe_th = th;
|
||||
te->tqe_len = *tlenp;
|
||||
|
||||
if (p == NULL) {
|
||||
LIST_INSERT_HEAD(&tp->t_segq, te, tqe_q);
|
||||
} else {
|
||||
mq = tp->t_segq;
|
||||
tp->t_segq = m;
|
||||
if (mq && th->th_seq + *tlenp == M_TCPHDR(mq)->th_seq) {
|
||||
m->m_nextpkt = mq->m_nextpkt;
|
||||
mq->m_nextpkt = NULL;
|
||||
m_catpkt(m, mq);
|
||||
} else
|
||||
m->m_nextpkt = mq;
|
||||
m->m_pkthdr.pkt_tcphdr = th;
|
||||
KASSERT(te != &tqs, ("%s: temporary stack based entry not "
|
||||
"first element in queue", __func__));
|
||||
LIST_INSERT_AFTER(p, te, tqe_q);
|
||||
}
|
||||
tp->t_segqlen += *tlenp;
|
||||
|
||||
present:
|
||||
/*
|
||||
@ -246,30 +309,25 @@ tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m)
|
||||
*/
|
||||
if (!TCPS_HAVEESTABLISHED(tp->t_state))
|
||||
return (0);
|
||||
|
||||
flags = 0;
|
||||
wakeup = 0;
|
||||
q = LIST_FIRST(&tp->t_segq);
|
||||
if (!q || q->tqe_th->th_seq != tp->rcv_nxt)
|
||||
return (0);
|
||||
SOCKBUF_LOCK(&so->so_rcv);
|
||||
while ((mq = tp->t_segq) != NULL &&
|
||||
M_TCPHDR(mq)->th_seq == tp->rcv_nxt) {
|
||||
tp->t_segq = mq->m_nextpkt;
|
||||
|
||||
tp->rcv_nxt += mq->m_pkthdr.len;
|
||||
tp->t_segqlen -= mq->m_pkthdr.len;
|
||||
flags = M_TCPHDR(mq)->th_flags & TH_FIN;
|
||||
|
||||
do {
|
||||
tp->rcv_nxt += q->tqe_len;
|
||||
flags = q->tqe_th->th_flags & TH_FIN;
|
||||
nq = LIST_NEXT(q, tqe_q);
|
||||
LIST_REMOVE(q, tqe_q);
|
||||
if (so->so_rcv.sb_state & SBS_CANTRCVMORE)
|
||||
m_freem(mq);
|
||||
else {
|
||||
mq->m_nextpkt = NULL;
|
||||
sbappendstream_locked(&so->so_rcv, mq, 0);
|
||||
wakeup = 1;
|
||||
}
|
||||
}
|
||||
m_freem(q->tqe_m);
|
||||
else
|
||||
sbappendstream_locked(&so->so_rcv, q->tqe_m, 0);
|
||||
if (q != &tqs)
|
||||
uma_zfree(tcp_reass_zone, q);
|
||||
tp->t_segqlen--;
|
||||
q = nq;
|
||||
} while (q && q->tqe_th->th_seq == tp->rcv_nxt);
|
||||
ND6_HINT(tp);
|
||||
if (wakeup)
|
||||
sorwakeup_locked(so);
|
||||
else
|
||||
SOCKBUF_UNLOCK(&so->so_rcv);
|
||||
sorwakeup_locked(so);
|
||||
return (flags);
|
||||
}
|
||||
|
@ -385,6 +385,8 @@ tcp_init(void)
|
||||
if (!IS_DEFAULT_VNET(curvnet))
|
||||
return;
|
||||
|
||||
tcp_reass_global_init();
|
||||
|
||||
/* XXX virtualize those bellow? */
|
||||
tcp_delacktime = TCPTV_DELACK;
|
||||
tcp_keepinit = TCPTV_KEEP_INIT;
|
||||
|
@ -1977,7 +1977,7 @@ db_print_tcpcb(struct tcpcb *tp, const char *name, int indent)
|
||||
|
||||
db_print_indent(indent);
|
||||
db_printf("t_segq first: %p t_segqlen: %d t_dupacks: %d\n",
|
||||
tp->t_segq, tp->t_segqlen, tp->t_dupacks);
|
||||
LIST_FIRST(&tp->t_segq), tp->t_segqlen, tp->t_dupacks);
|
||||
|
||||
db_print_indent(indent);
|
||||
db_printf("tt_rexmt: %p tt_persist: %p tt_keep: %p\n",
|
||||
|
@ -46,6 +46,15 @@ VNET_DECLARE(int, tcp_do_rfc1323);
|
||||
|
||||
#endif /* _KERNEL */
|
||||
|
||||
/* TCP segment queue entry */
|
||||
struct tseg_qent {
|
||||
LIST_ENTRY(tseg_qent) tqe_q;
|
||||
int tqe_len; /* TCP segment data length */
|
||||
struct tcphdr *tqe_th; /* a pointer to tcp header */
|
||||
struct mbuf *tqe_m; /* mbuf contains packet */
|
||||
};
|
||||
LIST_HEAD(tsegqe_head, tseg_qent);
|
||||
|
||||
struct sackblk {
|
||||
tcp_seq start; /* start seq no. of sack block */
|
||||
tcp_seq end; /* end seq no. */
|
||||
@ -91,7 +100,7 @@ do { \
|
||||
* Organized for 16 byte cacheline efficiency.
|
||||
*/
|
||||
struct tcpcb {
|
||||
struct mbuf *t_segq; /* segment reassembly queue */
|
||||
struct tsegqe_head t_segq; /* segment reassembly queue */
|
||||
void *t_pspare[2]; /* new reassembly queue */
|
||||
int t_segqlen; /* segment reassembly queue length */
|
||||
int t_dupacks; /* consecutive dup acks recd */
|
||||
@ -667,6 +676,7 @@ char *tcp_log_addrs(struct in_conninfo *, struct tcphdr *, void *,
|
||||
char *tcp_log_vain(struct in_conninfo *, struct tcphdr *, void *,
|
||||
const void *);
|
||||
int tcp_reass(struct tcpcb *, struct tcphdr *, int *, struct mbuf *);
|
||||
void tcp_reass_global_init(void);
|
||||
void tcp_reass_flush(struct tcpcb *);
|
||||
int tcp_input(struct mbuf **, int *, int);
|
||||
u_long tcp_maxmtu(struct in_conninfo *, struct tcp_ifcap *);
|
||||
|
@ -150,7 +150,6 @@ struct pkthdr {
|
||||
#define tso_segsz PH_per.sixteen[1]
|
||||
#define csum_phsum PH_per.sixteen[2]
|
||||
#define csum_data PH_per.thirtytwo[1]
|
||||
#define pkt_tcphdr PH_loc.ptr
|
||||
|
||||
/*
|
||||
* Description of external storage mapped into mbuf; valid only if M_EXT is
|
||||
|
Loading…
Reference in New Issue
Block a user