Add a TCP TIMEWAIT state which uses less space than a fullblown TCP
control block. Allow the socket and tcpcb structures to be freed earlier than inpcb. Update code to understand an inp w/o a socket. Reviewed by: hsu, silby, jayanth Sponsored by: DARPA, NAI Labs
This commit is contained in:
parent
7990938421
commit
340c35de6a
@ -62,6 +62,7 @@
|
||||
#include <netinet/in_pcb.h>
|
||||
#include <netinet/in_var.h>
|
||||
#include <netinet/ip_var.h>
|
||||
#include <netinet/tcp_var.h>
|
||||
#ifdef INET6
|
||||
#include <netinet/ip6.h>
|
||||
#include <netinet6/ip6_var.h>
|
||||
@ -169,8 +170,11 @@ in_pcballoc(so, pcbinfo, td)
|
||||
}
|
||||
#endif /*IPSEC*/
|
||||
#if defined(INET6)
|
||||
if (INP_SOCKAF(so) == AF_INET6 && ip6_v6only)
|
||||
inp->inp_flags |= IN6P_IPV6_V6ONLY;
|
||||
if (INP_SOCKAF(so) == AF_INET6) {
|
||||
inp->inp_vflag |= INP_IPV6PROTO;
|
||||
if (ip6_v6only)
|
||||
inp->inp_flags |= IN6P_IPV6_V6ONLY;
|
||||
}
|
||||
#endif
|
||||
LIST_INSERT_HEAD(pcbinfo->listhead, inp, inp_list);
|
||||
pcbinfo->ipi_count++;
|
||||
@ -294,6 +298,17 @@ in_pcbbind_setup(inp, nam, laddrp, lportp, td)
|
||||
t = in_pcblookup_local(inp->inp_pcbinfo,
|
||||
sin->sin_addr, lport,
|
||||
prison ? 0 : INPLOOKUP_WILDCARD);
|
||||
/*
|
||||
* XXX
|
||||
* This entire block sorely needs a rewrite.
|
||||
*/
|
||||
if (t && (t->inp_vflag & INP_TIMEWAIT)) {
|
||||
if ((ntohl(sin->sin_addr.s_addr) != INADDR_ANY ||
|
||||
ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
|
||||
(intotw(t)->tw_so_options & SO_REUSEPORT) == 0) &&
|
||||
(so->so_cred->cr_uid != intotw(t)->tw_cred->cr_uid))
|
||||
return (EADDRINUSE);
|
||||
} else
|
||||
if (t &&
|
||||
(ntohl(sin->sin_addr.s_addr) != INADDR_ANY ||
|
||||
ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
|
||||
@ -317,6 +332,10 @@ in_pcbbind_setup(inp, nam, laddrp, lportp, td)
|
||||
return (EADDRNOTAVAIL);
|
||||
t = in_pcblookup_local(pcbinfo, sin->sin_addr,
|
||||
lport, prison ? 0 : wild);
|
||||
if (t && (t->inp_vflag & INP_TIMEWAIT)) {
|
||||
if ((reuseport & intotw(t)->tw_so_options) == 0)
|
||||
return (EADDRINUSE);
|
||||
} else
|
||||
if (t &&
|
||||
(reuseport & t->inp_socket->so_options) == 0) {
|
||||
#if defined(INET6)
|
||||
@ -640,8 +659,10 @@ in_pcbdetach(inp)
|
||||
#endif /*IPSEC*/
|
||||
inp->inp_gencnt = ++ipi->ipi_gencnt;
|
||||
in_pcbremlists(inp);
|
||||
so->so_pcb = 0;
|
||||
sotryfree(so);
|
||||
if (so) {
|
||||
so->so_pcb = 0;
|
||||
sotryfree(so);
|
||||
}
|
||||
if (inp->inp_options)
|
||||
(void)m_free(inp->inp_options);
|
||||
if (inp->inp_route.ro_rt)
|
||||
|
@ -140,6 +140,8 @@ struct inpcb {
|
||||
u_char inp_vflag; /* IP version flag (v4/v6) */
|
||||
#define INP_IPV4 0x1
|
||||
#define INP_IPV6 0x2
|
||||
#define INP_IPV6PROTO 0x4 /* opened under IPv6 protocol */
|
||||
#define INP_TIMEWAIT 0x8 /* .. probably doesn't go here */
|
||||
u_char inp_ip_ttl; /* time to live proto */
|
||||
u_char inp_ip_p; /* protocol proto */
|
||||
|
||||
|
@ -40,6 +40,7 @@
|
||||
#include "opt_inet6.h"
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/domain.h>
|
||||
|
@ -470,10 +470,10 @@ ip_output(m0, opt, ro, flags, imo, inp)
|
||||
sendit:
|
||||
#ifdef IPSEC
|
||||
/* get SP for this packet */
|
||||
if (so == NULL)
|
||||
if (inp == NULL)
|
||||
sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, flags, &error);
|
||||
else
|
||||
sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error);
|
||||
sp = ipsec4_getpolicybypcb(m, IPSEC_DIR_OUTBOUND, inp, &error);
|
||||
|
||||
if (sp == NULL) {
|
||||
ipsecstat.out_inval++;
|
||||
|
@ -137,6 +137,8 @@ static int tcp_reass(struct tcpcb *, struct tcphdr *, int *,
|
||||
struct mbuf *);
|
||||
static void tcp_xmit_timer(struct tcpcb *, int);
|
||||
static void tcp_newreno_partial_ack(struct tcpcb *, struct tcphdr *);
|
||||
static int tcp_timewait(struct tcptw *, struct tcpopt *,
|
||||
struct tcphdr *, struct mbuf *, int);
|
||||
|
||||
/* Neighbor Discovery, Neighbor Unreachability Detection Upper layer hint. */
|
||||
#ifdef INET6
|
||||
@ -636,6 +638,22 @@ findpcb:
|
||||
goto dropwithreset;
|
||||
}
|
||||
INP_LOCK(inp);
|
||||
if (inp->inp_vflag & INP_TIMEWAIT) {
|
||||
/*
|
||||
* The only option of relevance is TOF_CC, and only if
|
||||
* present in a SYN segment. See tcp_timewait().
|
||||
*/
|
||||
if (thflags & TH_SYN)
|
||||
tcp_dooptions(&to, optp, optlen, 1);
|
||||
if (tcp_timewait((struct tcptw *)inp->inp_ppcb,
|
||||
&to, th, m, tlen))
|
||||
goto findpcb;
|
||||
/*
|
||||
* tcp_timewait unlocks inp.
|
||||
*/
|
||||
INP_INFO_WUNLOCK(&tcbinfo);
|
||||
return;
|
||||
}
|
||||
tp = intotcpcb(inp);
|
||||
if (tp == 0) {
|
||||
INP_UNLOCK(inp);
|
||||
@ -1319,6 +1337,7 @@ trimthenstep6:
|
||||
case TCPS_LAST_ACK:
|
||||
case TCPS_CLOSING:
|
||||
case TCPS_TIME_WAIT:
|
||||
KASSERT(tp->t_state != TCPS_TIME_WAIT, ("timewait"));
|
||||
if ((thflags & TH_SYN) &&
|
||||
(to.to_flags & TOF_CC) && tp->cc_recv != 0) {
|
||||
if (tp->t_state == TCPS_TIME_WAIT &&
|
||||
@ -1418,6 +1437,8 @@ trimthenstep6:
|
||||
break;
|
||||
|
||||
case TCPS_TIME_WAIT:
|
||||
KASSERT(tp->t_state != TCPS_TIME_WAIT,
|
||||
("timewait"));
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -1550,6 +1571,7 @@ trimthenstep6:
|
||||
* and start over if the sequence numbers
|
||||
* are above the previous ones.
|
||||
*/
|
||||
KASSERT(tp->t_state != TCPS_TIME_WAIT, ("timewait"));
|
||||
if (thflags & TH_SYN &&
|
||||
tp->t_state == TCPS_TIME_WAIT &&
|
||||
SEQ_GT(th->th_seq, tp->rcv_nxt)) {
|
||||
@ -1678,7 +1700,7 @@ trimthenstep6:
|
||||
case TCPS_CLOSING:
|
||||
case TCPS_LAST_ACK:
|
||||
case TCPS_TIME_WAIT:
|
||||
|
||||
KASSERT(tp->t_state != TCPS_TIME_WAIT, ("timewait"));
|
||||
if (SEQ_LEQ(th->th_ack, tp->snd_una)) {
|
||||
if (tlen == 0 && tiwin == tp->snd_wnd) {
|
||||
tcpstat.tcps_rcvdupack++;
|
||||
@ -1921,6 +1943,10 @@ process_ACK:
|
||||
* specification, but if we don't get a FIN
|
||||
* we'll hang forever.
|
||||
*/
|
||||
/* XXXjl
|
||||
* we should release the tp also, and use a
|
||||
* compressed state.
|
||||
*/
|
||||
if (so->so_state & SS_CANTRCVMORE) {
|
||||
soisdisconnected(so);
|
||||
callout_reset(tp->tt_2msl, tcp_maxidle,
|
||||
@ -1938,19 +1964,11 @@ process_ACK:
|
||||
*/
|
||||
case TCPS_CLOSING:
|
||||
if (ourfinisacked) {
|
||||
tp->t_state = TCPS_TIME_WAIT;
|
||||
tcp_canceltimers(tp);
|
||||
/* Shorten TIME_WAIT [RFC-1644, p.28] */
|
||||
if (tp->cc_recv != 0 &&
|
||||
(ticks - tp->t_starttime) < tcp_msl)
|
||||
callout_reset(tp->tt_2msl,
|
||||
tp->t_rxtcur *
|
||||
TCPTV_TWTRUNC,
|
||||
tcp_timer_2msl, tp);
|
||||
else
|
||||
callout_reset(tp->tt_2msl, 2 * tcp_msl,
|
||||
tcp_timer_2msl, tp);
|
||||
soisdisconnected(so);
|
||||
KASSERT(headlocked, ("headlocked"));
|
||||
INP_INFO_WUNLOCK(&tcbinfo);
|
||||
m_freem(m);
|
||||
tcp_twstart(tp);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
|
||||
@ -1973,6 +1991,7 @@ process_ACK:
|
||||
* it and restart the finack timer.
|
||||
*/
|
||||
case TCPS_TIME_WAIT:
|
||||
KASSERT(tp->t_state != TCPS_TIME_WAIT, ("timewait"));
|
||||
callout_reset(tp->tt_2msl, 2 * tcp_msl,
|
||||
tcp_timer_2msl, tp);
|
||||
goto dropafterack;
|
||||
@ -2166,27 +2185,15 @@ dodata: /* XXX */
|
||||
* standard timers.
|
||||
*/
|
||||
case TCPS_FIN_WAIT_2:
|
||||
tp->t_state = TCPS_TIME_WAIT;
|
||||
tcp_canceltimers(tp);
|
||||
/* Shorten TIME_WAIT [RFC-1644, p.28] */
|
||||
if (tp->cc_recv != 0 &&
|
||||
(ticks - tp->t_starttime) < tcp_msl) {
|
||||
callout_reset(tp->tt_2msl,
|
||||
tp->t_rxtcur * TCPTV_TWTRUNC,
|
||||
tcp_timer_2msl, tp);
|
||||
/* For transaction client, force ACK now. */
|
||||
tp->t_flags |= TF_ACKNOW;
|
||||
}
|
||||
else
|
||||
callout_reset(tp->tt_2msl, 2 * tcp_msl,
|
||||
tcp_timer_2msl, tp);
|
||||
soisdisconnected(so);
|
||||
break;
|
||||
KASSERT(headlocked == 0, ("headlocked"));
|
||||
tcp_twstart(tp);
|
||||
return;
|
||||
|
||||
/*
|
||||
* In TIME_WAIT state restart the 2 MSL time_wait timer.
|
||||
*/
|
||||
case TCPS_TIME_WAIT:
|
||||
KASSERT(tp->t_state != TCPS_TIME_WAIT, ("timewait"));
|
||||
callout_reset(tp->tt_2msl, 2 * tcp_msl,
|
||||
tcp_timer_2msl, tp);
|
||||
break;
|
||||
@ -2802,3 +2809,152 @@ tcp_newreno_partial_ack(tp, th)
|
||||
*/
|
||||
tp->snd_cwnd -= (th->th_ack - tp->snd_una - tp->t_maxseg);
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns 1 if the TIME_WAIT state was killed and we should start over,
|
||||
* looking for a pcb in the listen state. Returns 0 otherwise.
|
||||
*/
|
||||
static int
|
||||
tcp_timewait(tw, to, th, m, tlen)
|
||||
struct tcptw *tw;
|
||||
struct tcpopt *to;
|
||||
struct tcphdr *th;
|
||||
struct mbuf *m;
|
||||
int tlen;
|
||||
{
|
||||
int thflags;
|
||||
tcp_seq seq;
|
||||
#ifdef INET6
|
||||
int isipv6 = (mtod(m, struct ip *)->ip_v == 6) ? 1 : 0;
|
||||
#else
|
||||
const int isipv6 = 0;
|
||||
#endif
|
||||
|
||||
thflags = th->th_flags;
|
||||
|
||||
/*
|
||||
* NOTE: for FIN_WAIT_2 (to be added later),
|
||||
* must validate sequence number before accepting RST
|
||||
*/
|
||||
|
||||
/*
|
||||
* If the segment contains RST:
|
||||
* Drop the segment - see Stevens, vol. 2, p. 964 and
|
||||
* RFC 1337.
|
||||
*/
|
||||
if (thflags & TH_RST)
|
||||
goto drop;
|
||||
|
||||
/*
|
||||
* If segment contains a SYN and CC [not CC.NEW] option:
|
||||
* if connection duration > MSL, drop packet and send RST;
|
||||
*
|
||||
* if SEG.CC > CCrecv then is new SYN.
|
||||
* Complete close and delete TCPCB. Then reprocess
|
||||
* segment, hoping to find new TCPCB in LISTEN state;
|
||||
*
|
||||
* else must be old SYN; drop it.
|
||||
* else do normal processing.
|
||||
*/
|
||||
if ((thflags & TH_SYN) && (to->to_flags & TOF_CC) && tw->cc_recv != 0) {
|
||||
if ((ticks - tw->t_starttime) > tcp_msl)
|
||||
goto reset;
|
||||
if (CC_GT(to->to_cc, tw->cc_recv)) {
|
||||
tcp_twclose(tw);
|
||||
return (1);
|
||||
}
|
||||
goto drop;
|
||||
}
|
||||
|
||||
#if 0
|
||||
/* PAWS not needed at the moment */
|
||||
/*
|
||||
* RFC 1323 PAWS: If we have a timestamp reply on this segment
|
||||
* and it's less than ts_recent, drop it.
|
||||
*/
|
||||
if ((to.to_flags & TOF_TS) != 0 && tp->ts_recent &&
|
||||
TSTMP_LT(to.to_tsval, tp->ts_recent)) {
|
||||
if ((thflags & TH_ACK) == 0)
|
||||
goto drop;
|
||||
goto ack;
|
||||
}
|
||||
/*
|
||||
* ts_recent is never updated because we never accept new segments.
|
||||
*/
|
||||
#endif
|
||||
|
||||
/*
|
||||
* If a new connection request is received
|
||||
* while in TIME_WAIT, drop the old connection
|
||||
* and start over if the sequence numbers
|
||||
* are above the previous ones.
|
||||
*/
|
||||
if ((thflags & TH_SYN) && SEQ_GT(th->th_seq, tw->rcv_nxt)) {
|
||||
tcp_twclose(tw);
|
||||
return (1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Drop the the segment if it does not contain an ACK.
|
||||
*/
|
||||
if ((thflags & TH_ACK) == 0)
|
||||
goto drop;
|
||||
|
||||
/*
|
||||
* Reset the 2MSL timer if this is a duplicate FIN.
|
||||
*/
|
||||
if (thflags & TH_FIN) {
|
||||
seq = th->th_seq + tlen + (thflags & TH_SYN ? 1 : 0);
|
||||
if (seq + 1 == tw->rcv_nxt)
|
||||
callout_reset(tw->tt_2msl,
|
||||
2 * tcp_msl, tcp_timer_2msl, tw);
|
||||
}
|
||||
|
||||
/*
|
||||
* Acknowlege the segment, then drop it.
|
||||
*/
|
||||
tcp_twrespond(tw, TH_ACK);
|
||||
goto drop;
|
||||
|
||||
reset:
|
||||
/*
|
||||
* Generate a RST, dropping incoming segment.
|
||||
* Make ACK acceptable to originator of segment.
|
||||
* Don't bother to respond if destination was broadcast/multicast.
|
||||
*/
|
||||
if (m->m_flags & (M_BCAST|M_MCAST))
|
||||
goto drop;
|
||||
if (isipv6) {
|
||||
struct ip6_hdr *ip6;
|
||||
|
||||
/* IPv6 anycast check is done at tcp6_input() */
|
||||
ip6 = mtod(m, struct ip6_hdr *);
|
||||
if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
|
||||
IN6_IS_ADDR_MULTICAST(&ip6->ip6_src))
|
||||
goto drop;
|
||||
} else {
|
||||
struct ip *ip;
|
||||
|
||||
ip = mtod(m, struct ip *);
|
||||
if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
|
||||
IN_MULTICAST(ntohl(ip->ip_src.s_addr)) ||
|
||||
ip->ip_src.s_addr == htonl(INADDR_BROADCAST) ||
|
||||
in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif))
|
||||
goto drop;
|
||||
}
|
||||
if (thflags & TH_ACK) {
|
||||
tcp_respond(NULL,
|
||||
mtod(m, void *), th, m, 0, th->th_ack, TH_RST);
|
||||
} else {
|
||||
seq = th->th_seq + (thflags & TH_SYN ? 1 : 0);
|
||||
tcp_respond(NULL,
|
||||
mtod(m, void *), th, m, seq, 0, TH_RST|TH_ACK);
|
||||
}
|
||||
INP_UNLOCK(tw->tw_inpcb);
|
||||
return (0);
|
||||
|
||||
drop:
|
||||
INP_UNLOCK(tw->tw_inpcb);
|
||||
m_freem(m);
|
||||
return (0);
|
||||
}
|
||||
|
@ -137,6 +137,8 @@ static int tcp_reass(struct tcpcb *, struct tcphdr *, int *,
|
||||
struct mbuf *);
|
||||
static void tcp_xmit_timer(struct tcpcb *, int);
|
||||
static void tcp_newreno_partial_ack(struct tcpcb *, struct tcphdr *);
|
||||
static int tcp_timewait(struct tcptw *, struct tcpopt *,
|
||||
struct tcphdr *, struct mbuf *, int);
|
||||
|
||||
/* Neighbor Discovery, Neighbor Unreachability Detection Upper layer hint. */
|
||||
#ifdef INET6
|
||||
@ -636,6 +638,22 @@ findpcb:
|
||||
goto dropwithreset;
|
||||
}
|
||||
INP_LOCK(inp);
|
||||
if (inp->inp_vflag & INP_TIMEWAIT) {
|
||||
/*
|
||||
* The only option of relevance is TOF_CC, and only if
|
||||
* present in a SYN segment. See tcp_timewait().
|
||||
*/
|
||||
if (thflags & TH_SYN)
|
||||
tcp_dooptions(&to, optp, optlen, 1);
|
||||
if (tcp_timewait((struct tcptw *)inp->inp_ppcb,
|
||||
&to, th, m, tlen))
|
||||
goto findpcb;
|
||||
/*
|
||||
* tcp_timewait unlocks inp.
|
||||
*/
|
||||
INP_INFO_WUNLOCK(&tcbinfo);
|
||||
return;
|
||||
}
|
||||
tp = intotcpcb(inp);
|
||||
if (tp == 0) {
|
||||
INP_UNLOCK(inp);
|
||||
@ -1319,6 +1337,7 @@ trimthenstep6:
|
||||
case TCPS_LAST_ACK:
|
||||
case TCPS_CLOSING:
|
||||
case TCPS_TIME_WAIT:
|
||||
KASSERT(tp->t_state != TCPS_TIME_WAIT, ("timewait"));
|
||||
if ((thflags & TH_SYN) &&
|
||||
(to.to_flags & TOF_CC) && tp->cc_recv != 0) {
|
||||
if (tp->t_state == TCPS_TIME_WAIT &&
|
||||
@ -1418,6 +1437,8 @@ trimthenstep6:
|
||||
break;
|
||||
|
||||
case TCPS_TIME_WAIT:
|
||||
KASSERT(tp->t_state != TCPS_TIME_WAIT,
|
||||
("timewait"));
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -1550,6 +1571,7 @@ trimthenstep6:
|
||||
* and start over if the sequence numbers
|
||||
* are above the previous ones.
|
||||
*/
|
||||
KASSERT(tp->t_state != TCPS_TIME_WAIT, ("timewait"));
|
||||
if (thflags & TH_SYN &&
|
||||
tp->t_state == TCPS_TIME_WAIT &&
|
||||
SEQ_GT(th->th_seq, tp->rcv_nxt)) {
|
||||
@ -1678,7 +1700,7 @@ trimthenstep6:
|
||||
case TCPS_CLOSING:
|
||||
case TCPS_LAST_ACK:
|
||||
case TCPS_TIME_WAIT:
|
||||
|
||||
KASSERT(tp->t_state != TCPS_TIME_WAIT, ("timewait"));
|
||||
if (SEQ_LEQ(th->th_ack, tp->snd_una)) {
|
||||
if (tlen == 0 && tiwin == tp->snd_wnd) {
|
||||
tcpstat.tcps_rcvdupack++;
|
||||
@ -1921,6 +1943,10 @@ process_ACK:
|
||||
* specification, but if we don't get a FIN
|
||||
* we'll hang forever.
|
||||
*/
|
||||
/* XXXjl
|
||||
* we should release the tp also, and use a
|
||||
* compressed state.
|
||||
*/
|
||||
if (so->so_state & SS_CANTRCVMORE) {
|
||||
soisdisconnected(so);
|
||||
callout_reset(tp->tt_2msl, tcp_maxidle,
|
||||
@ -1938,19 +1964,11 @@ process_ACK:
|
||||
*/
|
||||
case TCPS_CLOSING:
|
||||
if (ourfinisacked) {
|
||||
tp->t_state = TCPS_TIME_WAIT;
|
||||
tcp_canceltimers(tp);
|
||||
/* Shorten TIME_WAIT [RFC-1644, p.28] */
|
||||
if (tp->cc_recv != 0 &&
|
||||
(ticks - tp->t_starttime) < tcp_msl)
|
||||
callout_reset(tp->tt_2msl,
|
||||
tp->t_rxtcur *
|
||||
TCPTV_TWTRUNC,
|
||||
tcp_timer_2msl, tp);
|
||||
else
|
||||
callout_reset(tp->tt_2msl, 2 * tcp_msl,
|
||||
tcp_timer_2msl, tp);
|
||||
soisdisconnected(so);
|
||||
KASSERT(headlocked, ("headlocked"));
|
||||
INP_INFO_WUNLOCK(&tcbinfo);
|
||||
m_freem(m);
|
||||
tcp_twstart(tp);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
|
||||
@ -1973,6 +1991,7 @@ process_ACK:
|
||||
* it and restart the finack timer.
|
||||
*/
|
||||
case TCPS_TIME_WAIT:
|
||||
KASSERT(tp->t_state != TCPS_TIME_WAIT, ("timewait"));
|
||||
callout_reset(tp->tt_2msl, 2 * tcp_msl,
|
||||
tcp_timer_2msl, tp);
|
||||
goto dropafterack;
|
||||
@ -2166,27 +2185,15 @@ dodata: /* XXX */
|
||||
* standard timers.
|
||||
*/
|
||||
case TCPS_FIN_WAIT_2:
|
||||
tp->t_state = TCPS_TIME_WAIT;
|
||||
tcp_canceltimers(tp);
|
||||
/* Shorten TIME_WAIT [RFC-1644, p.28] */
|
||||
if (tp->cc_recv != 0 &&
|
||||
(ticks - tp->t_starttime) < tcp_msl) {
|
||||
callout_reset(tp->tt_2msl,
|
||||
tp->t_rxtcur * TCPTV_TWTRUNC,
|
||||
tcp_timer_2msl, tp);
|
||||
/* For transaction client, force ACK now. */
|
||||
tp->t_flags |= TF_ACKNOW;
|
||||
}
|
||||
else
|
||||
callout_reset(tp->tt_2msl, 2 * tcp_msl,
|
||||
tcp_timer_2msl, tp);
|
||||
soisdisconnected(so);
|
||||
break;
|
||||
KASSERT(headlocked == 0, ("headlocked"));
|
||||
tcp_twstart(tp);
|
||||
return;
|
||||
|
||||
/*
|
||||
* In TIME_WAIT state restart the 2 MSL time_wait timer.
|
||||
*/
|
||||
case TCPS_TIME_WAIT:
|
||||
KASSERT(tp->t_state != TCPS_TIME_WAIT, ("timewait"));
|
||||
callout_reset(tp->tt_2msl, 2 * tcp_msl,
|
||||
tcp_timer_2msl, tp);
|
||||
break;
|
||||
@ -2802,3 +2809,152 @@ tcp_newreno_partial_ack(tp, th)
|
||||
*/
|
||||
tp->snd_cwnd -= (th->th_ack - tp->snd_una - tp->t_maxseg);
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns 1 if the TIME_WAIT state was killed and we should start over,
|
||||
* looking for a pcb in the listen state. Returns 0 otherwise.
|
||||
*/
|
||||
static int
|
||||
tcp_timewait(tw, to, th, m, tlen)
|
||||
struct tcptw *tw;
|
||||
struct tcpopt *to;
|
||||
struct tcphdr *th;
|
||||
struct mbuf *m;
|
||||
int tlen;
|
||||
{
|
||||
int thflags;
|
||||
tcp_seq seq;
|
||||
#ifdef INET6
|
||||
int isipv6 = (mtod(m, struct ip *)->ip_v == 6) ? 1 : 0;
|
||||
#else
|
||||
const int isipv6 = 0;
|
||||
#endif
|
||||
|
||||
thflags = th->th_flags;
|
||||
|
||||
/*
|
||||
* NOTE: for FIN_WAIT_2 (to be added later),
|
||||
* must validate sequence number before accepting RST
|
||||
*/
|
||||
|
||||
/*
|
||||
* If the segment contains RST:
|
||||
* Drop the segment - see Stevens, vol. 2, p. 964 and
|
||||
* RFC 1337.
|
||||
*/
|
||||
if (thflags & TH_RST)
|
||||
goto drop;
|
||||
|
||||
/*
|
||||
* If segment contains a SYN and CC [not CC.NEW] option:
|
||||
* if connection duration > MSL, drop packet and send RST;
|
||||
*
|
||||
* if SEG.CC > CCrecv then is new SYN.
|
||||
* Complete close and delete TCPCB. Then reprocess
|
||||
* segment, hoping to find new TCPCB in LISTEN state;
|
||||
*
|
||||
* else must be old SYN; drop it.
|
||||
* else do normal processing.
|
||||
*/
|
||||
if ((thflags & TH_SYN) && (to->to_flags & TOF_CC) && tw->cc_recv != 0) {
|
||||
if ((ticks - tw->t_starttime) > tcp_msl)
|
||||
goto reset;
|
||||
if (CC_GT(to->to_cc, tw->cc_recv)) {
|
||||
tcp_twclose(tw);
|
||||
return (1);
|
||||
}
|
||||
goto drop;
|
||||
}
|
||||
|
||||
#if 0
|
||||
/* PAWS not needed at the moment */
|
||||
/*
|
||||
* RFC 1323 PAWS: If we have a timestamp reply on this segment
|
||||
* and it's less than ts_recent, drop it.
|
||||
*/
|
||||
if ((to.to_flags & TOF_TS) != 0 && tp->ts_recent &&
|
||||
TSTMP_LT(to.to_tsval, tp->ts_recent)) {
|
||||
if ((thflags & TH_ACK) == 0)
|
||||
goto drop;
|
||||
goto ack;
|
||||
}
|
||||
/*
|
||||
* ts_recent is never updated because we never accept new segments.
|
||||
*/
|
||||
#endif
|
||||
|
||||
/*
|
||||
* If a new connection request is received
|
||||
* while in TIME_WAIT, drop the old connection
|
||||
* and start over if the sequence numbers
|
||||
* are above the previous ones.
|
||||
*/
|
||||
if ((thflags & TH_SYN) && SEQ_GT(th->th_seq, tw->rcv_nxt)) {
|
||||
tcp_twclose(tw);
|
||||
return (1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Drop the the segment if it does not contain an ACK.
|
||||
*/
|
||||
if ((thflags & TH_ACK) == 0)
|
||||
goto drop;
|
||||
|
||||
/*
|
||||
* Reset the 2MSL timer if this is a duplicate FIN.
|
||||
*/
|
||||
if (thflags & TH_FIN) {
|
||||
seq = th->th_seq + tlen + (thflags & TH_SYN ? 1 : 0);
|
||||
if (seq + 1 == tw->rcv_nxt)
|
||||
callout_reset(tw->tt_2msl,
|
||||
2 * tcp_msl, tcp_timer_2msl, tw);
|
||||
}
|
||||
|
||||
/*
|
||||
* Acknowlege the segment, then drop it.
|
||||
*/
|
||||
tcp_twrespond(tw, TH_ACK);
|
||||
goto drop;
|
||||
|
||||
reset:
|
||||
/*
|
||||
* Generate a RST, dropping incoming segment.
|
||||
* Make ACK acceptable to originator of segment.
|
||||
* Don't bother to respond if destination was broadcast/multicast.
|
||||
*/
|
||||
if (m->m_flags & (M_BCAST|M_MCAST))
|
||||
goto drop;
|
||||
if (isipv6) {
|
||||
struct ip6_hdr *ip6;
|
||||
|
||||
/* IPv6 anycast check is done at tcp6_input() */
|
||||
ip6 = mtod(m, struct ip6_hdr *);
|
||||
if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
|
||||
IN6_IS_ADDR_MULTICAST(&ip6->ip6_src))
|
||||
goto drop;
|
||||
} else {
|
||||
struct ip *ip;
|
||||
|
||||
ip = mtod(m, struct ip *);
|
||||
if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
|
||||
IN_MULTICAST(ntohl(ip->ip_src.s_addr)) ||
|
||||
ip->ip_src.s_addr == htonl(INADDR_BROADCAST) ||
|
||||
in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif))
|
||||
goto drop;
|
||||
}
|
||||
if (thflags & TH_ACK) {
|
||||
tcp_respond(NULL,
|
||||
mtod(m, void *), th, m, 0, th->th_ack, TH_RST);
|
||||
} else {
|
||||
seq = th->th_seq + (thflags & TH_SYN ? 1 : 0);
|
||||
tcp_respond(NULL,
|
||||
mtod(m, void *), th, m, seq, 0, TH_RST|TH_ACK);
|
||||
}
|
||||
INP_UNLOCK(tw->tw_inpcb);
|
||||
return (0);
|
||||
|
||||
drop:
|
||||
INP_UNLOCK(tw->tw_inpcb);
|
||||
m_freem(m);
|
||||
return (0);
|
||||
}
|
||||
|
@ -179,6 +179,7 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, inflight_stab, CTLFLAG_RW,
|
||||
|
||||
static void tcp_cleartaocache(void);
|
||||
static struct inpcb *tcp_notify(struct inpcb *, int);
|
||||
static void tcp_discardcb(struct tcpcb *);
|
||||
|
||||
/*
|
||||
* Target size of TCP PCB hash tables. Must be a power of two.
|
||||
@ -191,26 +192,23 @@ static struct inpcb *tcp_notify(struct inpcb *, int);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* This is the actual shape of what we allocate using the zone
|
||||
* allocator. Doing it this way allows us to protect both structures
|
||||
* using the same generation count, and also eliminates the overhead
|
||||
* of allocating tcpcbs separately. By hiding the structure here,
|
||||
* we avoid changing most of the rest of the code (although it needs
|
||||
* to be changed, eventually, for greater efficiency).
|
||||
* XXX
|
||||
* Callouts should be moved into struct tcp directly. They are currently
|
||||
* separate becuase the tcpcb structure is exported to userland for sysctl
|
||||
* parsing purposes, which do not know about callouts.
|
||||
*/
|
||||
#define ALIGNMENT 32
|
||||
#define ALIGNM1 (ALIGNMENT - 1)
|
||||
struct inp_tp {
|
||||
union {
|
||||
struct inpcb inp;
|
||||
char align[(sizeof(struct inpcb) + ALIGNM1) & ~ALIGNM1];
|
||||
} inp_tp_u;
|
||||
struct tcpcb_mem {
|
||||
struct tcpcb tcb;
|
||||
struct callout inp_tp_rexmt, inp_tp_persist, inp_tp_keep, inp_tp_2msl;
|
||||
struct callout inp_tp_delack;
|
||||
struct callout tcpcb_mem_rexmt, tcpcb_mem_persist, tcpcb_mem_keep;
|
||||
struct callout tcpcb_mem_2msl, tcpcb_mem_delack;
|
||||
};
|
||||
#undef ALIGNMENT
|
||||
#undef ALIGNM1
|
||||
struct tcptw_mem {
|
||||
struct tcptw tw;
|
||||
struct callout tcptw_mem_2msl;
|
||||
};
|
||||
|
||||
static uma_zone_t tcpcb_zone;
|
||||
static uma_zone_t tcptw_zone;
|
||||
|
||||
/*
|
||||
* Tcp initialization
|
||||
@ -244,7 +242,7 @@ tcp_init()
|
||||
tcbinfo.hashbase = hashinit(hashsize, M_PCB, &tcbinfo.hashmask);
|
||||
tcbinfo.porthashbase = hashinit(hashsize, M_PCB,
|
||||
&tcbinfo.porthashmask);
|
||||
tcbinfo.ipi_zone = uma_zcreate("tcpcb", sizeof(struct inp_tp),
|
||||
tcbinfo.ipi_zone = uma_zcreate("inpcb", sizeof(struct inpcb),
|
||||
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
|
||||
uma_zone_set_max(tcbinfo.ipi_zone, maxsockets);
|
||||
#ifdef INET6
|
||||
@ -257,6 +255,15 @@ tcp_init()
|
||||
if (max_linkhdr + TCP_MINPROTOHDR > MHLEN)
|
||||
panic("tcp_init");
|
||||
#undef TCP_MINPROTOHDR
|
||||
/*
|
||||
* These have to be type stable for the benefit of the timers.
|
||||
*/
|
||||
tcpcb_zone = uma_zcreate("tcpcb", sizeof(struct tcpcb_mem),
|
||||
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
|
||||
uma_zone_set_max(tcpcb_zone, maxsockets);
|
||||
tcptw_zone = uma_zcreate("tcptw", sizeof(struct tcptw_mem),
|
||||
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
|
||||
uma_zone_set_max(tcptw_zone, maxsockets);
|
||||
|
||||
syncache_init();
|
||||
}
|
||||
@ -552,16 +559,17 @@ struct tcpcb *
|
||||
tcp_newtcpcb(inp)
|
||||
struct inpcb *inp;
|
||||
{
|
||||
struct inp_tp *it;
|
||||
register struct tcpcb *tp;
|
||||
struct tcpcb_mem *tm;
|
||||
struct tcpcb *tp;
|
||||
#ifdef INET6
|
||||
int isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
|
||||
#endif /* INET6 */
|
||||
|
||||
it = (struct inp_tp *)inp;
|
||||
tp = &it->tcb;
|
||||
bzero((char *) tp, sizeof(struct tcpcb));
|
||||
LIST_INIT(&tp->t_segq);
|
||||
tm = uma_zalloc(tcpcb_zone, M_NOWAIT | M_ZERO);
|
||||
if (tm == NULL)
|
||||
return (NULL);
|
||||
tp = &tm->tcb;
|
||||
/* LIST_INIT(&tp->t_segq); */ /* XXX covered by M_ZERO */
|
||||
tp->t_maxseg = tp->t_maxopd =
|
||||
#ifdef INET6
|
||||
isipv6 ? tcp_v6mssdflt :
|
||||
@ -569,11 +577,11 @@ tcp_newtcpcb(inp)
|
||||
tcp_mssdflt;
|
||||
|
||||
/* Set up our timeouts. */
|
||||
callout_init(tp->tt_rexmt = &it->inp_tp_rexmt, 0);
|
||||
callout_init(tp->tt_persist = &it->inp_tp_persist, 0);
|
||||
callout_init(tp->tt_keep = &it->inp_tp_keep, 0);
|
||||
callout_init(tp->tt_2msl = &it->inp_tp_2msl, 0);
|
||||
callout_init(tp->tt_delack = &it->inp_tp_delack, 0);
|
||||
callout_init(tp->tt_rexmt = &tm->tcpcb_mem_rexmt, 0);
|
||||
callout_init(tp->tt_persist = &tm->tcpcb_mem_persist, 0);
|
||||
callout_init(tp->tt_keep = &tm->tcpcb_mem_keep, 0);
|
||||
callout_init(tp->tt_2msl = &tm->tcpcb_mem_2msl, 0);
|
||||
callout_init(tp->tt_delack = &tm->tcpcb_mem_delack, 0);
|
||||
|
||||
if (tcp_do_rfc1323)
|
||||
tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP);
|
||||
@ -628,23 +636,17 @@ tcp_drop(tp, errno)
|
||||
return (tcp_close(tp));
|
||||
}
|
||||
|
||||
/*
|
||||
* Close a TCP control block:
|
||||
* discard all space held by the tcp
|
||||
* discard internet protocol block
|
||||
* wake up any sleepers
|
||||
*/
|
||||
struct tcpcb *
|
||||
tcp_close(tp)
|
||||
register struct tcpcb *tp;
|
||||
static void
|
||||
tcp_discardcb(tp)
|
||||
struct tcpcb *tp;
|
||||
{
|
||||
register struct tseg_qent *q;
|
||||
struct tseg_qent *q;
|
||||
struct inpcb *inp = tp->t_inpcb;
|
||||
struct socket *so = inp->inp_socket;
|
||||
#ifdef INET6
|
||||
int isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
|
||||
#endif /* INET6 */
|
||||
register struct rtentry *rt;
|
||||
struct rtentry *rt;
|
||||
int dosavessthresh;
|
||||
|
||||
/*
|
||||
@ -762,20 +764,37 @@ tcp_close(tp)
|
||||
}
|
||||
no_valid_rt:
|
||||
/* free the reassembly queue, if any */
|
||||
while((q = LIST_FIRST(&tp->t_segq)) != NULL) {
|
||||
while ((q = LIST_FIRST(&tp->t_segq)) != NULL) {
|
||||
LIST_REMOVE(q, tqe_q);
|
||||
m_freem(q->tqe_m);
|
||||
FREE(q, M_TSEGQ);
|
||||
}
|
||||
inp->inp_ppcb = NULL;
|
||||
tp->t_inpcb = NULL;
|
||||
uma_zfree(tcpcb_zone, tp);
|
||||
soisdisconnected(so);
|
||||
}
|
||||
|
||||
/*
|
||||
* Close a TCP control block:
|
||||
* discard all space held by the tcp
|
||||
* discard internet protocol block
|
||||
* wake up any sleepers
|
||||
*/
|
||||
struct tcpcb *
|
||||
tcp_close(tp)
|
||||
struct tcpcb *tp;
|
||||
{
|
||||
struct inpcb *inp = tp->t_inpcb;
|
||||
struct socket *so = inp->inp_socket;
|
||||
|
||||
tcp_discardcb(tp);
|
||||
#ifdef INET6
|
||||
if (INP_CHECK_SOCKAF(so, AF_INET6))
|
||||
in6_pcbdetach(inp);
|
||||
else
|
||||
#endif /* INET6 */
|
||||
in_pcbdetach(inp);
|
||||
#endif
|
||||
in_pcbdetach(inp);
|
||||
tcpstat.tcps_closed++;
|
||||
return ((struct tcpcb *)0);
|
||||
}
|
||||
@ -799,6 +818,8 @@ tcp_drain()
|
||||
*/
|
||||
INP_INFO_RLOCK(&tcbinfo);
|
||||
LIST_FOREACH(inpb, tcbinfo.listhead, inp_list) {
|
||||
if (inpb->inp_vflag & INP_TIMEWAIT)
|
||||
continue;
|
||||
INP_LOCK(inpb);
|
||||
if ((tcpb = intotcpcb(inpb))) {
|
||||
while ((te = LIST_FIRST(&tcpb->t_segq))
|
||||
@ -908,7 +929,9 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS)
|
||||
inp = LIST_NEXT(inp, inp_list)) {
|
||||
INP_LOCK(inp);
|
||||
if (inp->inp_gencnt <= gencnt &&
|
||||
cr_canseesocket(req->td->td_ucred, inp->inp_socket) == 0)
|
||||
(((inp->inp_vflag & INP_TIMEWAIT) &&
|
||||
cr_cansee(req->td->td_ucred, intotw(inp)->tw_cred) == 0) ||
|
||||
cr_canseesocket(req->td->td_ucred, inp->inp_socket) == 0))
|
||||
inp_list[i++] = inp;
|
||||
INP_UNLOCK(inp);
|
||||
}
|
||||
@ -926,12 +949,19 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS)
|
||||
/* XXX should avoid extra copy */
|
||||
bcopy(inp, &xt.xt_inp, sizeof *inp);
|
||||
inp_ppcb = inp->inp_ppcb;
|
||||
if (inp_ppcb != NULL)
|
||||
bcopy(inp_ppcb, &xt.xt_tp, sizeof xt.xt_tp);
|
||||
else
|
||||
if (inp_ppcb == NULL)
|
||||
bzero((char *) &xt.xt_tp, sizeof xt.xt_tp);
|
||||
else if (inp->inp_vflag & INP_TIMEWAIT) {
|
||||
bzero((char *) &xt.xt_tp, sizeof xt.xt_tp);
|
||||
xt.xt_tp.t_state = TCPS_TIME_WAIT;
|
||||
} else
|
||||
bcopy(inp_ppcb, &xt.xt_tp, sizeof xt.xt_tp);
|
||||
if (inp->inp_socket)
|
||||
sotoxsocket(inp->inp_socket, &xt.xt_socket);
|
||||
else {
|
||||
bzero(&xt.xt_socket, sizeof xt.xt_socket);
|
||||
xt.xt_socket.xso_protocol = IPPROTO_TCP;
|
||||
}
|
||||
xt.xt_inp.inp_gencnt = inp->inp_gencnt;
|
||||
error = SYSCTL_OUT(req, &xt, sizeof xt);
|
||||
}
|
||||
@ -1487,7 +1517,7 @@ ipsec_hdrsiz_tcp(tp)
|
||||
struct ip *ip;
|
||||
#ifdef INET6
|
||||
struct ip6_hdr *ip6;
|
||||
#endif /* INET6 */
|
||||
#endif
|
||||
struct tcphdr *th;
|
||||
|
||||
if ((tp == NULL) || ((inp = tp->t_inpcb) == NULL))
|
||||
@ -1558,6 +1588,196 @@ tcp_cleartaocache()
|
||||
{
|
||||
}
|
||||
|
||||
/*
|
||||
* Move a TCP connection into TIME_WAIT state.
|
||||
* tcbinfo is unlocked.
|
||||
* inp is locked, and is unlocked before returning.
|
||||
*/
|
||||
void
|
||||
tcp_twstart(tp)
|
||||
struct tcpcb *tp;
|
||||
{
|
||||
struct tcptw_mem *tm;
|
||||
struct tcptw *tw;
|
||||
struct inpcb *inp;
|
||||
int tw_time, acknow;
|
||||
struct socket *so;
|
||||
|
||||
tm = uma_zalloc(tcptw_zone, M_NOWAIT);
|
||||
if (tm == NULL)
|
||||
/* EEEK! -- preserve old structure or just kill everything? */
|
||||
/* must obtain tcbinfo lock in order to drop the structure. */
|
||||
panic("uma_zalloc(tcptw)");
|
||||
tw = &tm->tw;
|
||||
inp = tp->t_inpcb;
|
||||
tw->tw_inpcb = inp;
|
||||
|
||||
/*
|
||||
* Recover last window size sent.
|
||||
*/
|
||||
tw->last_win = (tp->rcv_adv - tp->rcv_nxt) >> tp->rcv_scale;
|
||||
|
||||
/*
|
||||
* Set t_recent if timestamps are used on the connection.
|
||||
*/
|
||||
if ((tp->t_flags & (TF_REQ_TSTMP|TF_RCVD_TSTMP|TF_NOOPT)) ==
|
||||
(TF_REQ_TSTMP|TF_RCVD_TSTMP))
|
||||
tw->t_recent = tp->ts_recent;
|
||||
else
|
||||
tw->t_recent = 0;
|
||||
|
||||
tw->snd_nxt = tp->snd_nxt;
|
||||
tw->rcv_nxt = tp->rcv_nxt;
|
||||
tw->cc_recv = tp->cc_recv;
|
||||
tw->cc_send = tp->cc_send;
|
||||
tw->t_starttime = tp->t_starttime;
|
||||
callout_init(tw->tt_2msl = &tm->tcptw_mem_2msl, 0);
|
||||
|
||||
/* XXX
|
||||
* If this code will
|
||||
* be used for fin-wait-2 state also, then we may need
|
||||
* a ts_recent from the last segment.
|
||||
*/
|
||||
/* Shorten TIME_WAIT [RFC-1644, p.28] */
|
||||
if (tp->cc_recv != 0 && (ticks - tp->t_starttime) < tcp_msl) {
|
||||
tw_time = tp->t_rxtcur * TCPTV_TWTRUNC;
|
||||
/* For T/TCP client, force ACK now. */
|
||||
acknow = 1;
|
||||
} else {
|
||||
tw_time = 2 * tcp_msl;
|
||||
acknow = tp->t_flags & TF_ACKNOW;
|
||||
}
|
||||
tcp_discardcb(tp);
|
||||
so = inp->inp_socket;
|
||||
so->so_pcb = NULL;
|
||||
tw->tw_cred = crhold(so->so_cred);
|
||||
tw->tw_so_options = so->so_options;
|
||||
sotryfree(so);
|
||||
inp->inp_socket = NULL;
|
||||
inp->inp_ppcb = (caddr_t)tw;
|
||||
inp->inp_vflag |= INP_TIMEWAIT;
|
||||
callout_reset(tw->tt_2msl, tw_time, tcp_timer_2msl_tw, tw);
|
||||
if (acknow)
|
||||
tcp_twrespond(tw, TH_ACK);
|
||||
INP_UNLOCK(inp);
|
||||
}
|
||||
|
||||
void
|
||||
tcp_twclose(tw)
|
||||
struct tcptw *tw;
|
||||
{
|
||||
struct inpcb *inp;
|
||||
|
||||
inp = tw->tw_inpcb;
|
||||
tw->tw_inpcb = NULL;
|
||||
callout_stop(tw->tt_2msl);
|
||||
inp->inp_ppcb = NULL;
|
||||
uma_zfree(tcptw_zone, tw);
|
||||
#ifdef INET6
|
||||
if (inp->inp_vflag & INP_IPV6PROTO)
|
||||
in6_pcbdetach(inp);
|
||||
else
|
||||
#endif
|
||||
in_pcbdetach(inp);
|
||||
tcpstat.tcps_closed++;
|
||||
}
|
||||
|
||||
int
|
||||
tcp_twrespond(struct tcptw *tw, int flags)
|
||||
{
|
||||
struct inpcb *inp = tw->tw_inpcb;
|
||||
struct tcphdr *th;
|
||||
struct mbuf *m;
|
||||
struct ip *ip = NULL;
|
||||
u_int8_t *optp;
|
||||
u_int hdrlen, optlen;
|
||||
int error;
|
||||
#ifdef INET6
|
||||
struct ip6_hdr *ip6 = NULL;
|
||||
int isipv6 = inp->inp_inc.inc_isipv6;
|
||||
#else
|
||||
const int isipv6 = 0;
|
||||
#endif
|
||||
|
||||
m = m_gethdr(M_NOWAIT, MT_HEADER);
|
||||
if (m == NULL)
|
||||
return (ENOBUFS);
|
||||
m->m_data += max_linkhdr;
|
||||
|
||||
if (isipv6) {
|
||||
hdrlen = sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
|
||||
ip6 = mtod(m, struct ip6_hdr *);
|
||||
th = (struct tcphdr *)(ip6 + 1);
|
||||
tcpip_fillheaders(inp, ip6, th);
|
||||
} else {
|
||||
hdrlen = sizeof(struct tcpiphdr);
|
||||
ip = mtod(m, struct ip *);
|
||||
th = (struct tcphdr *)(ip + 1);
|
||||
tcpip_fillheaders(inp, ip, th);
|
||||
}
|
||||
optp = (u_int8_t *)(th + 1);
|
||||
|
||||
/*
|
||||
* Send a timestamp and echo-reply if both our side and our peer
|
||||
* have sent timestamps in our SYN's and this is not a RST.
|
||||
*/
|
||||
if (tw->t_recent && flags == TH_ACK) {
|
||||
u_int32_t *lp = (u_int32_t *)optp;
|
||||
|
||||
/* Form timestamp option as shown in appendix A of RFC 1323. */
|
||||
*lp++ = htonl(TCPOPT_TSTAMP_HDR);
|
||||
*lp++ = htonl(ticks);
|
||||
*lp = htonl(tw->t_recent);
|
||||
optp += TCPOLEN_TSTAMP_APPA;
|
||||
}
|
||||
|
||||
/*
|
||||
* Send `CC-family' options if needed, and it's not a RST.
|
||||
*/
|
||||
if (tw->cc_recv != 0 && flags == TH_ACK) {
|
||||
u_int32_t *lp = (u_int32_t *)optp;
|
||||
|
||||
*lp++ = htonl(TCPOPT_CC_HDR(TCPOPT_CC));
|
||||
*lp = htonl(tw->cc_send);
|
||||
optp += TCPOLEN_CC_APPA;
|
||||
}
|
||||
optlen = optp - (u_int8_t *)(th + 1);
|
||||
|
||||
m->m_len = hdrlen + optlen;
|
||||
m->m_pkthdr.len = m->m_len;
|
||||
|
||||
KASSERT(max_linkhdr + m->m_len <= MHLEN, ("tcptw: mbuf too small"));
|
||||
|
||||
th->th_seq = htonl(tw->snd_nxt);
|
||||
th->th_ack = htonl(tw->rcv_nxt);
|
||||
th->th_off = (sizeof(struct tcphdr) + optlen) >> 2;
|
||||
th->th_flags = flags;
|
||||
th->th_win = htons(tw->last_win);
|
||||
|
||||
if (isipv6) {
|
||||
th->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(struct ip6_hdr),
|
||||
sizeof(struct tcphdr) + optlen);
|
||||
ip6->ip6_hlim = in6_selecthlim(inp, inp->in6p_route.ro_rt ?
|
||||
inp->in6p_route.ro_rt->rt_ifp : NULL);
|
||||
error = ip6_output(m, inp->in6p_outputopts, &inp->in6p_route,
|
||||
(tw->tw_so_options & SO_DONTROUTE), NULL, NULL, inp);
|
||||
} else {
|
||||
th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
|
||||
htons(sizeof(struct tcphdr) + optlen + IPPROTO_TCP));
|
||||
m->m_pkthdr.csum_flags = CSUM_TCP;
|
||||
m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
|
||||
ip->ip_len = m->m_pkthdr.len;
|
||||
error = ip_output(m, inp->inp_options, &inp->inp_route,
|
||||
(tw->tw_so_options & SO_DONTROUTE), NULL, inp);
|
||||
}
|
||||
if (flags & TH_ACK)
|
||||
tcpstat.tcps_sndacks++;
|
||||
else
|
||||
tcpstat.tcps_sndctrl++;
|
||||
tcpstat.tcps_sndtotal++;
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* TCP BANDWIDTH DELAY PRODUCT WINDOW LIMITING
|
||||
*
|
||||
|
@ -251,6 +251,33 @@ tcp_timer_2msl(xtp)
|
||||
splx(s);
|
||||
}
|
||||
|
||||
void
|
||||
tcp_timer_2msl_tw(xtw)
|
||||
void *xtw;
|
||||
{
|
||||
struct tcptw *tw = xtw;
|
||||
int s;
|
||||
|
||||
s = splnet();
|
||||
INP_INFO_WLOCK(&tcbinfo);
|
||||
if (tw->tw_inpcb == NULL) {
|
||||
INP_INFO_WUNLOCK(&tcbinfo);
|
||||
splx(s);
|
||||
return;
|
||||
}
|
||||
INP_LOCK(tw->tw_inpcb);
|
||||
if (callout_pending(tw->tt_2msl) || !callout_active(tw->tt_2msl)) {
|
||||
INP_UNLOCK(tw->tw_inpcb);
|
||||
INP_INFO_WUNLOCK(&tcbinfo);
|
||||
splx(s);
|
||||
return;
|
||||
}
|
||||
callout_deactivate(tw->tt_2msl);
|
||||
tcp_twclose(tw);
|
||||
INP_INFO_WUNLOCK(&tcbinfo);
|
||||
splx(s);
|
||||
}
|
||||
|
||||
void
|
||||
tcp_timer_keep(xtp)
|
||||
void *xtp;
|
||||
|
@ -152,6 +152,7 @@ extern int tcp_ttl; /* time to live for TCP segs */
|
||||
extern int tcp_backoff[];
|
||||
|
||||
void tcp_timer_2msl(void *xtp);
|
||||
void tcp_timer_2msl_tw(void *xtw); /* XXX temporary */
|
||||
void tcp_timer_keep(void *xtp);
|
||||
void tcp_timer_persist(void *xtp);
|
||||
void tcp_timer_rexmt(void *xtp);
|
||||
|
@ -179,6 +179,7 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, inflight_stab, CTLFLAG_RW,
|
||||
|
||||
static void tcp_cleartaocache(void);
|
||||
static struct inpcb *tcp_notify(struct inpcb *, int);
|
||||
static void tcp_discardcb(struct tcpcb *);
|
||||
|
||||
/*
|
||||
* Target size of TCP PCB hash tables. Must be a power of two.
|
||||
@ -191,26 +192,23 @@ static struct inpcb *tcp_notify(struct inpcb *, int);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* This is the actual shape of what we allocate using the zone
|
||||
* allocator. Doing it this way allows us to protect both structures
|
||||
* using the same generation count, and also eliminates the overhead
|
||||
* of allocating tcpcbs separately. By hiding the structure here,
|
||||
* we avoid changing most of the rest of the code (although it needs
|
||||
* to be changed, eventually, for greater efficiency).
|
||||
* XXX
|
||||
* Callouts should be moved into struct tcp directly. They are currently
|
||||
* separate becuase the tcpcb structure is exported to userland for sysctl
|
||||
* parsing purposes, which do not know about callouts.
|
||||
*/
|
||||
#define ALIGNMENT 32
|
||||
#define ALIGNM1 (ALIGNMENT - 1)
|
||||
struct inp_tp {
|
||||
union {
|
||||
struct inpcb inp;
|
||||
char align[(sizeof(struct inpcb) + ALIGNM1) & ~ALIGNM1];
|
||||
} inp_tp_u;
|
||||
struct tcpcb_mem {
|
||||
struct tcpcb tcb;
|
||||
struct callout inp_tp_rexmt, inp_tp_persist, inp_tp_keep, inp_tp_2msl;
|
||||
struct callout inp_tp_delack;
|
||||
struct callout tcpcb_mem_rexmt, tcpcb_mem_persist, tcpcb_mem_keep;
|
||||
struct callout tcpcb_mem_2msl, tcpcb_mem_delack;
|
||||
};
|
||||
#undef ALIGNMENT
|
||||
#undef ALIGNM1
|
||||
struct tcptw_mem {
|
||||
struct tcptw tw;
|
||||
struct callout tcptw_mem_2msl;
|
||||
};
|
||||
|
||||
static uma_zone_t tcpcb_zone;
|
||||
static uma_zone_t tcptw_zone;
|
||||
|
||||
/*
|
||||
* Tcp initialization
|
||||
@ -244,7 +242,7 @@ tcp_init()
|
||||
tcbinfo.hashbase = hashinit(hashsize, M_PCB, &tcbinfo.hashmask);
|
||||
tcbinfo.porthashbase = hashinit(hashsize, M_PCB,
|
||||
&tcbinfo.porthashmask);
|
||||
tcbinfo.ipi_zone = uma_zcreate("tcpcb", sizeof(struct inp_tp),
|
||||
tcbinfo.ipi_zone = uma_zcreate("inpcb", sizeof(struct inpcb),
|
||||
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
|
||||
uma_zone_set_max(tcbinfo.ipi_zone, maxsockets);
|
||||
#ifdef INET6
|
||||
@ -257,6 +255,15 @@ tcp_init()
|
||||
if (max_linkhdr + TCP_MINPROTOHDR > MHLEN)
|
||||
panic("tcp_init");
|
||||
#undef TCP_MINPROTOHDR
|
||||
/*
|
||||
* These have to be type stable for the benefit of the timers.
|
||||
*/
|
||||
tcpcb_zone = uma_zcreate("tcpcb", sizeof(struct tcpcb_mem),
|
||||
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
|
||||
uma_zone_set_max(tcpcb_zone, maxsockets);
|
||||
tcptw_zone = uma_zcreate("tcptw", sizeof(struct tcptw_mem),
|
||||
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
|
||||
uma_zone_set_max(tcptw_zone, maxsockets);
|
||||
|
||||
syncache_init();
|
||||
}
|
||||
@ -552,16 +559,17 @@ struct tcpcb *
|
||||
tcp_newtcpcb(inp)
|
||||
struct inpcb *inp;
|
||||
{
|
||||
struct inp_tp *it;
|
||||
register struct tcpcb *tp;
|
||||
struct tcpcb_mem *tm;
|
||||
struct tcpcb *tp;
|
||||
#ifdef INET6
|
||||
int isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
|
||||
#endif /* INET6 */
|
||||
|
||||
it = (struct inp_tp *)inp;
|
||||
tp = &it->tcb;
|
||||
bzero((char *) tp, sizeof(struct tcpcb));
|
||||
LIST_INIT(&tp->t_segq);
|
||||
tm = uma_zalloc(tcpcb_zone, M_NOWAIT | M_ZERO);
|
||||
if (tm == NULL)
|
||||
return (NULL);
|
||||
tp = &tm->tcb;
|
||||
/* LIST_INIT(&tp->t_segq); */ /* XXX covered by M_ZERO */
|
||||
tp->t_maxseg = tp->t_maxopd =
|
||||
#ifdef INET6
|
||||
isipv6 ? tcp_v6mssdflt :
|
||||
@ -569,11 +577,11 @@ tcp_newtcpcb(inp)
|
||||
tcp_mssdflt;
|
||||
|
||||
/* Set up our timeouts. */
|
||||
callout_init(tp->tt_rexmt = &it->inp_tp_rexmt, 0);
|
||||
callout_init(tp->tt_persist = &it->inp_tp_persist, 0);
|
||||
callout_init(tp->tt_keep = &it->inp_tp_keep, 0);
|
||||
callout_init(tp->tt_2msl = &it->inp_tp_2msl, 0);
|
||||
callout_init(tp->tt_delack = &it->inp_tp_delack, 0);
|
||||
callout_init(tp->tt_rexmt = &tm->tcpcb_mem_rexmt, 0);
|
||||
callout_init(tp->tt_persist = &tm->tcpcb_mem_persist, 0);
|
||||
callout_init(tp->tt_keep = &tm->tcpcb_mem_keep, 0);
|
||||
callout_init(tp->tt_2msl = &tm->tcpcb_mem_2msl, 0);
|
||||
callout_init(tp->tt_delack = &tm->tcpcb_mem_delack, 0);
|
||||
|
||||
if (tcp_do_rfc1323)
|
||||
tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP);
|
||||
@ -628,23 +636,17 @@ tcp_drop(tp, errno)
|
||||
return (tcp_close(tp));
|
||||
}
|
||||
|
||||
/*
|
||||
* Close a TCP control block:
|
||||
* discard all space held by the tcp
|
||||
* discard internet protocol block
|
||||
* wake up any sleepers
|
||||
*/
|
||||
struct tcpcb *
|
||||
tcp_close(tp)
|
||||
register struct tcpcb *tp;
|
||||
static void
|
||||
tcp_discardcb(tp)
|
||||
struct tcpcb *tp;
|
||||
{
|
||||
register struct tseg_qent *q;
|
||||
struct tseg_qent *q;
|
||||
struct inpcb *inp = tp->t_inpcb;
|
||||
struct socket *so = inp->inp_socket;
|
||||
#ifdef INET6
|
||||
int isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
|
||||
#endif /* INET6 */
|
||||
register struct rtentry *rt;
|
||||
struct rtentry *rt;
|
||||
int dosavessthresh;
|
||||
|
||||
/*
|
||||
@ -762,20 +764,37 @@ tcp_close(tp)
|
||||
}
|
||||
no_valid_rt:
|
||||
/* free the reassembly queue, if any */
|
||||
while((q = LIST_FIRST(&tp->t_segq)) != NULL) {
|
||||
while ((q = LIST_FIRST(&tp->t_segq)) != NULL) {
|
||||
LIST_REMOVE(q, tqe_q);
|
||||
m_freem(q->tqe_m);
|
||||
FREE(q, M_TSEGQ);
|
||||
}
|
||||
inp->inp_ppcb = NULL;
|
||||
tp->t_inpcb = NULL;
|
||||
uma_zfree(tcpcb_zone, tp);
|
||||
soisdisconnected(so);
|
||||
}
|
||||
|
||||
/*
|
||||
* Close a TCP control block:
|
||||
* discard all space held by the tcp
|
||||
* discard internet protocol block
|
||||
* wake up any sleepers
|
||||
*/
|
||||
struct tcpcb *
|
||||
tcp_close(tp)
|
||||
struct tcpcb *tp;
|
||||
{
|
||||
struct inpcb *inp = tp->t_inpcb;
|
||||
struct socket *so = inp->inp_socket;
|
||||
|
||||
tcp_discardcb(tp);
|
||||
#ifdef INET6
|
||||
if (INP_CHECK_SOCKAF(so, AF_INET6))
|
||||
in6_pcbdetach(inp);
|
||||
else
|
||||
#endif /* INET6 */
|
||||
in_pcbdetach(inp);
|
||||
#endif
|
||||
in_pcbdetach(inp);
|
||||
tcpstat.tcps_closed++;
|
||||
return ((struct tcpcb *)0);
|
||||
}
|
||||
@ -799,6 +818,8 @@ tcp_drain()
|
||||
*/
|
||||
INP_INFO_RLOCK(&tcbinfo);
|
||||
LIST_FOREACH(inpb, tcbinfo.listhead, inp_list) {
|
||||
if (inpb->inp_vflag & INP_TIMEWAIT)
|
||||
continue;
|
||||
INP_LOCK(inpb);
|
||||
if ((tcpb = intotcpcb(inpb))) {
|
||||
while ((te = LIST_FIRST(&tcpb->t_segq))
|
||||
@ -908,7 +929,9 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS)
|
||||
inp = LIST_NEXT(inp, inp_list)) {
|
||||
INP_LOCK(inp);
|
||||
if (inp->inp_gencnt <= gencnt &&
|
||||
cr_canseesocket(req->td->td_ucred, inp->inp_socket) == 0)
|
||||
(((inp->inp_vflag & INP_TIMEWAIT) &&
|
||||
cr_cansee(req->td->td_ucred, intotw(inp)->tw_cred) == 0) ||
|
||||
cr_canseesocket(req->td->td_ucred, inp->inp_socket) == 0))
|
||||
inp_list[i++] = inp;
|
||||
INP_UNLOCK(inp);
|
||||
}
|
||||
@ -926,12 +949,19 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS)
|
||||
/* XXX should avoid extra copy */
|
||||
bcopy(inp, &xt.xt_inp, sizeof *inp);
|
||||
inp_ppcb = inp->inp_ppcb;
|
||||
if (inp_ppcb != NULL)
|
||||
bcopy(inp_ppcb, &xt.xt_tp, sizeof xt.xt_tp);
|
||||
else
|
||||
if (inp_ppcb == NULL)
|
||||
bzero((char *) &xt.xt_tp, sizeof xt.xt_tp);
|
||||
else if (inp->inp_vflag & INP_TIMEWAIT) {
|
||||
bzero((char *) &xt.xt_tp, sizeof xt.xt_tp);
|
||||
xt.xt_tp.t_state = TCPS_TIME_WAIT;
|
||||
} else
|
||||
bcopy(inp_ppcb, &xt.xt_tp, sizeof xt.xt_tp);
|
||||
if (inp->inp_socket)
|
||||
sotoxsocket(inp->inp_socket, &xt.xt_socket);
|
||||
else {
|
||||
bzero(&xt.xt_socket, sizeof xt.xt_socket);
|
||||
xt.xt_socket.xso_protocol = IPPROTO_TCP;
|
||||
}
|
||||
xt.xt_inp.inp_gencnt = inp->inp_gencnt;
|
||||
error = SYSCTL_OUT(req, &xt, sizeof xt);
|
||||
}
|
||||
@ -1487,7 +1517,7 @@ ipsec_hdrsiz_tcp(tp)
|
||||
struct ip *ip;
|
||||
#ifdef INET6
|
||||
struct ip6_hdr *ip6;
|
||||
#endif /* INET6 */
|
||||
#endif
|
||||
struct tcphdr *th;
|
||||
|
||||
if ((tp == NULL) || ((inp = tp->t_inpcb) == NULL))
|
||||
@ -1558,6 +1588,196 @@ tcp_cleartaocache()
|
||||
{
|
||||
}
|
||||
|
||||
/*
|
||||
* Move a TCP connection into TIME_WAIT state.
|
||||
* tcbinfo is unlocked.
|
||||
* inp is locked, and is unlocked before returning.
|
||||
*/
|
||||
void
|
||||
tcp_twstart(tp)
|
||||
struct tcpcb *tp;
|
||||
{
|
||||
struct tcptw_mem *tm;
|
||||
struct tcptw *tw;
|
||||
struct inpcb *inp;
|
||||
int tw_time, acknow;
|
||||
struct socket *so;
|
||||
|
||||
tm = uma_zalloc(tcptw_zone, M_NOWAIT);
|
||||
if (tm == NULL)
|
||||
/* EEEK! -- preserve old structure or just kill everything? */
|
||||
/* must obtain tcbinfo lock in order to drop the structure. */
|
||||
panic("uma_zalloc(tcptw)");
|
||||
tw = &tm->tw;
|
||||
inp = tp->t_inpcb;
|
||||
tw->tw_inpcb = inp;
|
||||
|
||||
/*
|
||||
* Recover last window size sent.
|
||||
*/
|
||||
tw->last_win = (tp->rcv_adv - tp->rcv_nxt) >> tp->rcv_scale;
|
||||
|
||||
/*
|
||||
* Set t_recent if timestamps are used on the connection.
|
||||
*/
|
||||
if ((tp->t_flags & (TF_REQ_TSTMP|TF_RCVD_TSTMP|TF_NOOPT)) ==
|
||||
(TF_REQ_TSTMP|TF_RCVD_TSTMP))
|
||||
tw->t_recent = tp->ts_recent;
|
||||
else
|
||||
tw->t_recent = 0;
|
||||
|
||||
tw->snd_nxt = tp->snd_nxt;
|
||||
tw->rcv_nxt = tp->rcv_nxt;
|
||||
tw->cc_recv = tp->cc_recv;
|
||||
tw->cc_send = tp->cc_send;
|
||||
tw->t_starttime = tp->t_starttime;
|
||||
callout_init(tw->tt_2msl = &tm->tcptw_mem_2msl, 0);
|
||||
|
||||
/* XXX
|
||||
* If this code will
|
||||
* be used for fin-wait-2 state also, then we may need
|
||||
* a ts_recent from the last segment.
|
||||
*/
|
||||
/* Shorten TIME_WAIT [RFC-1644, p.28] */
|
||||
if (tp->cc_recv != 0 && (ticks - tp->t_starttime) < tcp_msl) {
|
||||
tw_time = tp->t_rxtcur * TCPTV_TWTRUNC;
|
||||
/* For T/TCP client, force ACK now. */
|
||||
acknow = 1;
|
||||
} else {
|
||||
tw_time = 2 * tcp_msl;
|
||||
acknow = tp->t_flags & TF_ACKNOW;
|
||||
}
|
||||
tcp_discardcb(tp);
|
||||
so = inp->inp_socket;
|
||||
so->so_pcb = NULL;
|
||||
tw->tw_cred = crhold(so->so_cred);
|
||||
tw->tw_so_options = so->so_options;
|
||||
sotryfree(so);
|
||||
inp->inp_socket = NULL;
|
||||
inp->inp_ppcb = (caddr_t)tw;
|
||||
inp->inp_vflag |= INP_TIMEWAIT;
|
||||
callout_reset(tw->tt_2msl, tw_time, tcp_timer_2msl_tw, tw);
|
||||
if (acknow)
|
||||
tcp_twrespond(tw, TH_ACK);
|
||||
INP_UNLOCK(inp);
|
||||
}
|
||||
|
||||
void
|
||||
tcp_twclose(tw)
|
||||
struct tcptw *tw;
|
||||
{
|
||||
struct inpcb *inp;
|
||||
|
||||
inp = tw->tw_inpcb;
|
||||
tw->tw_inpcb = NULL;
|
||||
callout_stop(tw->tt_2msl);
|
||||
inp->inp_ppcb = NULL;
|
||||
uma_zfree(tcptw_zone, tw);
|
||||
#ifdef INET6
|
||||
if (inp->inp_vflag & INP_IPV6PROTO)
|
||||
in6_pcbdetach(inp);
|
||||
else
|
||||
#endif
|
||||
in_pcbdetach(inp);
|
||||
tcpstat.tcps_closed++;
|
||||
}
|
||||
|
||||
int
|
||||
tcp_twrespond(struct tcptw *tw, int flags)
|
||||
{
|
||||
struct inpcb *inp = tw->tw_inpcb;
|
||||
struct tcphdr *th;
|
||||
struct mbuf *m;
|
||||
struct ip *ip = NULL;
|
||||
u_int8_t *optp;
|
||||
u_int hdrlen, optlen;
|
||||
int error;
|
||||
#ifdef INET6
|
||||
struct ip6_hdr *ip6 = NULL;
|
||||
int isipv6 = inp->inp_inc.inc_isipv6;
|
||||
#else
|
||||
const int isipv6 = 0;
|
||||
#endif
|
||||
|
||||
m = m_gethdr(M_NOWAIT, MT_HEADER);
|
||||
if (m == NULL)
|
||||
return (ENOBUFS);
|
||||
m->m_data += max_linkhdr;
|
||||
|
||||
if (isipv6) {
|
||||
hdrlen = sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
|
||||
ip6 = mtod(m, struct ip6_hdr *);
|
||||
th = (struct tcphdr *)(ip6 + 1);
|
||||
tcpip_fillheaders(inp, ip6, th);
|
||||
} else {
|
||||
hdrlen = sizeof(struct tcpiphdr);
|
||||
ip = mtod(m, struct ip *);
|
||||
th = (struct tcphdr *)(ip + 1);
|
||||
tcpip_fillheaders(inp, ip, th);
|
||||
}
|
||||
optp = (u_int8_t *)(th + 1);
|
||||
|
||||
/*
|
||||
* Send a timestamp and echo-reply if both our side and our peer
|
||||
* have sent timestamps in our SYN's and this is not a RST.
|
||||
*/
|
||||
if (tw->t_recent && flags == TH_ACK) {
|
||||
u_int32_t *lp = (u_int32_t *)optp;
|
||||
|
||||
/* Form timestamp option as shown in appendix A of RFC 1323. */
|
||||
*lp++ = htonl(TCPOPT_TSTAMP_HDR);
|
||||
*lp++ = htonl(ticks);
|
||||
*lp = htonl(tw->t_recent);
|
||||
optp += TCPOLEN_TSTAMP_APPA;
|
||||
}
|
||||
|
||||
/*
|
||||
* Send `CC-family' options if needed, and it's not a RST.
|
||||
*/
|
||||
if (tw->cc_recv != 0 && flags == TH_ACK) {
|
||||
u_int32_t *lp = (u_int32_t *)optp;
|
||||
|
||||
*lp++ = htonl(TCPOPT_CC_HDR(TCPOPT_CC));
|
||||
*lp = htonl(tw->cc_send);
|
||||
optp += TCPOLEN_CC_APPA;
|
||||
}
|
||||
optlen = optp - (u_int8_t *)(th + 1);
|
||||
|
||||
m->m_len = hdrlen + optlen;
|
||||
m->m_pkthdr.len = m->m_len;
|
||||
|
||||
KASSERT(max_linkhdr + m->m_len <= MHLEN, ("tcptw: mbuf too small"));
|
||||
|
||||
th->th_seq = htonl(tw->snd_nxt);
|
||||
th->th_ack = htonl(tw->rcv_nxt);
|
||||
th->th_off = (sizeof(struct tcphdr) + optlen) >> 2;
|
||||
th->th_flags = flags;
|
||||
th->th_win = htons(tw->last_win);
|
||||
|
||||
if (isipv6) {
|
||||
th->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(struct ip6_hdr),
|
||||
sizeof(struct tcphdr) + optlen);
|
||||
ip6->ip6_hlim = in6_selecthlim(inp, inp->in6p_route.ro_rt ?
|
||||
inp->in6p_route.ro_rt->rt_ifp : NULL);
|
||||
error = ip6_output(m, inp->in6p_outputopts, &inp->in6p_route,
|
||||
(tw->tw_so_options & SO_DONTROUTE), NULL, NULL, inp);
|
||||
} else {
|
||||
th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
|
||||
htons(sizeof(struct tcphdr) + optlen + IPPROTO_TCP));
|
||||
m->m_pkthdr.csum_flags = CSUM_TCP;
|
||||
m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
|
||||
ip->ip_len = m->m_pkthdr.len;
|
||||
error = ip_output(m, inp->inp_options, &inp->inp_route,
|
||||
(tw->tw_so_options & SO_DONTROUTE), NULL, inp);
|
||||
}
|
||||
if (flags & TH_ACK)
|
||||
tcpstat.tcps_sndacks++;
|
||||
else
|
||||
tcpstat.tcps_sndctrl++;
|
||||
tcpstat.tcps_sndtotal++;
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* TCP BANDWIDTH DELAY PRODUCT WINDOW LIMITING
|
||||
*
|
||||
|
@ -235,6 +235,20 @@ struct syncache_head {
|
||||
TAILQ_HEAD(, syncache) sch_bucket;
|
||||
u_int sch_length;
|
||||
};
|
||||
|
||||
struct tcptw {
|
||||
struct inpcb *tw_inpcb; /* XXX back pointer to internet pcb */
|
||||
tcp_seq snd_nxt;
|
||||
tcp_seq rcv_nxt;
|
||||
tcp_cc cc_recv;
|
||||
tcp_cc cc_send;
|
||||
u_short last_win; /* cached window value */
|
||||
u_short tw_so_options; /* copy of so_options */
|
||||
struct ucred *tw_cred; /* user credentials */
|
||||
u_long t_recent;
|
||||
u_long t_starttime;
|
||||
struct callout *tt_2msl; /* 2*msl TIME_WAIT timer */
|
||||
};
|
||||
|
||||
/*
|
||||
* The TAO cache entry which is stored in the protocol family specific
|
||||
@ -254,6 +268,7 @@ struct rmxp_tao {
|
||||
#define rmx_taop(r) ((struct rmxp_tao *)(r).rmx_filler)
|
||||
|
||||
#define intotcpcb(ip) ((struct tcpcb *)(ip)->inp_ppcb)
|
||||
#define intotw(ip) ((struct tcptw *)(ip)->inp_ppcb)
|
||||
#define sototcpcb(so) (intotcpcb(sotoinpcb(so)))
|
||||
|
||||
/*
|
||||
@ -448,6 +463,8 @@ extern int ss_fltsz_local;
|
||||
void tcp_canceltimers(struct tcpcb *);
|
||||
struct tcpcb *
|
||||
tcp_close(struct tcpcb *);
|
||||
void tcp_twstart(struct tcpcb *);
|
||||
void tcp_twclose(struct tcptw *);
|
||||
void tcp_ctlinput(int, struct sockaddr *, void *);
|
||||
int tcp_ctloutput(struct socket *, struct sockopt *);
|
||||
struct tcpcb *
|
||||
@ -471,6 +488,7 @@ struct inpcb *
|
||||
tcp_quench(struct inpcb *, int);
|
||||
void tcp_respond(struct tcpcb *, void *,
|
||||
struct tcphdr *, struct mbuf *, tcp_seq, tcp_seq, int);
|
||||
int tcp_twrespond(struct tcptw *, int);
|
||||
struct rtentry *
|
||||
tcp_rtlookup(struct in_conninfo *);
|
||||
void tcp_setpersist(struct tcpcb *);
|
||||
|
@ -614,9 +614,10 @@ in6_pcbdetach(inp)
|
||||
#endif /* IPSEC */
|
||||
inp->inp_gencnt = ++ipi->ipi_gencnt;
|
||||
in_pcbremlists(inp);
|
||||
sotoinpcb(so) = 0;
|
||||
sotryfree(so);
|
||||
|
||||
if (so) {
|
||||
so->so_pcb = NULL;
|
||||
sotryfree(so);
|
||||
}
|
||||
if (inp->in6p_options)
|
||||
m_freem(inp->in6p_options);
|
||||
ip6_freepcbopts(inp->in6p_outputopts);
|
||||
@ -627,7 +628,6 @@ in6_pcbdetach(inp)
|
||||
if (inp->inp_options)
|
||||
(void)m_free(inp->inp_options);
|
||||
ip_freemoptions(inp->inp_moptions);
|
||||
|
||||
inp->inp_vflag = 0;
|
||||
INP_LOCK_DESTROY(inp);
|
||||
uma_zfree(ipi->ipi_zone, inp);
|
||||
|
@ -218,10 +218,10 @@ ip6_output(m0, opt, ro, flags, im6o, ifpp, inp)
|
||||
|
||||
#ifdef IPSEC
|
||||
/* get a security policy for this packet */
|
||||
if (so == NULL)
|
||||
if (inp == NULL)
|
||||
sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error);
|
||||
else
|
||||
sp = ipsec6_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error);
|
||||
sp = ipsec6_getpolicybypcb(m, IPSEC_DIR_OUTBOUND, inp, &error);
|
||||
|
||||
if (sp == NULL) {
|
||||
ipsec6stat.out_inval++;
|
||||
|
@ -235,10 +235,10 @@ static int ipsec6_encapsulate __P((struct mbuf *, struct secasvar *));
|
||||
* NOTE: IPv6 mapped adddress concern is implemented here.
|
||||
*/
|
||||
struct secpolicy *
|
||||
ipsec4_getpolicybysock(m, dir, so, error)
|
||||
ipsec4_getpolicybypcb(m, dir, inp, error)
|
||||
struct mbuf *m;
|
||||
u_int dir;
|
||||
struct socket *so;
|
||||
struct inpcb *inp;
|
||||
int *error;
|
||||
{
|
||||
struct inpcbpolicy *pcbsp = NULL;
|
||||
@ -246,35 +246,19 @@ ipsec4_getpolicybysock(m, dir, so, error)
|
||||
struct secpolicy *kernsp = NULL; /* policy on kernel */
|
||||
|
||||
/* sanity check */
|
||||
if (m == NULL || so == NULL || error == NULL)
|
||||
if (m == NULL || inp == NULL || error == NULL)
|
||||
panic("ipsec4_getpolicybysock: NULL pointer was passed.\n");
|
||||
|
||||
switch (so->so_proto->pr_domain->dom_family) {
|
||||
case AF_INET:
|
||||
/* set spidx in pcb */
|
||||
*error = ipsec4_setspidx_inpcb(m, sotoinpcb(so));
|
||||
break;
|
||||
/* set spidx in pcb */
|
||||
#ifdef INET6
|
||||
case AF_INET6:
|
||||
/* set spidx in pcb */
|
||||
*error = ipsec6_setspidx_in6pcb(m, sotoin6pcb(so));
|
||||
break;
|
||||
if (inp->inp_vflag & INP_IPV6PROTO)
|
||||
*error = ipsec6_setspidx_in6pcb(m, inp);
|
||||
else
|
||||
#endif
|
||||
default:
|
||||
panic("ipsec4_getpolicybysock: unsupported address family\n");
|
||||
}
|
||||
*error = ipsec4_setspidx_inpcb(m, inp);
|
||||
if (*error)
|
||||
return NULL;
|
||||
switch (so->so_proto->pr_domain->dom_family) {
|
||||
case AF_INET:
|
||||
pcbsp = sotoinpcb(so)->inp_sp;
|
||||
break;
|
||||
#ifdef INET6
|
||||
case AF_INET6:
|
||||
pcbsp = sotoin6pcb(so)->in6p_sp;
|
||||
break;
|
||||
#endif
|
||||
}
|
||||
pcbsp = inp->inp_sp;
|
||||
|
||||
/* sanity check */
|
||||
if (pcbsp == NULL)
|
||||
@ -390,6 +374,19 @@ ipsec4_getpolicybysock(m, dir, so, error)
|
||||
/* NOTREACHED */
|
||||
}
|
||||
|
||||
struct secpolicy *
|
||||
ipsec4_getpolicybysock(m, dir, so, error)
|
||||
struct mbuf *m;
|
||||
u_int dir;
|
||||
struct socket *so;
|
||||
int *error;
|
||||
{
|
||||
|
||||
if (so == NULL)
|
||||
panic("ipsec4_getpolicybysock: NULL pointer was passed.\n");
|
||||
return (ipsec4_getpolicybypcb(m, dir, sotoinpcb(so), error));
|
||||
}
|
||||
|
||||
/*
|
||||
* For FORWADING packet or OUTBOUND without a socket. Searching SPD for packet,
|
||||
* and return a pointer to SP.
|
||||
@ -462,10 +459,10 @@ ipsec4_getpolicybyaddr(m, dir, flag, error)
|
||||
* others: a pointer to SP
|
||||
*/
|
||||
struct secpolicy *
|
||||
ipsec6_getpolicybysock(m, dir, so, error)
|
||||
ipsec6_getpolicybypcb(m, dir, inp, error)
|
||||
struct mbuf *m;
|
||||
u_int dir;
|
||||
struct socket *so;
|
||||
struct inpcb *inp;
|
||||
int *error;
|
||||
{
|
||||
struct inpcbpolicy *pcbsp = NULL;
|
||||
@ -473,18 +470,17 @@ ipsec6_getpolicybysock(m, dir, so, error)
|
||||
struct secpolicy *kernsp = NULL; /* policy on kernel */
|
||||
|
||||
/* sanity check */
|
||||
if (m == NULL || so == NULL || error == NULL)
|
||||
if (m == NULL || inp == NULL || error == NULL)
|
||||
panic("ipsec6_getpolicybysock: NULL pointer was passed.\n");
|
||||
|
||||
#ifdef DIAGNOSTIC
|
||||
if (so->so_proto->pr_domain->dom_family != AF_INET6)
|
||||
if ((inp->inp_vflag & INP_IPV6PROTO) == 0)
|
||||
panic("ipsec6_getpolicybysock: socket domain != inet6\n");
|
||||
#endif
|
||||
|
||||
/* set spidx in pcb */
|
||||
ipsec6_setspidx_in6pcb(m, sotoin6pcb(so));
|
||||
|
||||
pcbsp = sotoin6pcb(so)->in6p_sp;
|
||||
ipsec6_setspidx_in6pcb(m, inp);
|
||||
pcbsp = inp->in6p_sp;
|
||||
|
||||
/* sanity check */
|
||||
if (pcbsp == NULL)
|
||||
@ -601,6 +597,19 @@ ipsec6_getpolicybysock(m, dir, so, error)
|
||||
/* NOTREACHED */
|
||||
}
|
||||
|
||||
struct secpolicy *
|
||||
ipsec6_getpolicybysock(m, dir, so, error)
|
||||
struct mbuf *m;
|
||||
u_int dir;
|
||||
struct socket *so;
|
||||
int *error;
|
||||
{
|
||||
|
||||
if (so == NULL)
|
||||
panic("ipsec6_getpolicybysock: NULL pointer was passed.\n");
|
||||
return (ipsec6_getpolicybypcb(m, dir, sotoin6pcb(so), error));
|
||||
}
|
||||
|
||||
/*
|
||||
* For FORWADING packet or OUTBOUND without a socket. Searching SPD for packet,
|
||||
* and return a pointer to SP.
|
||||
@ -1690,9 +1699,9 @@ ipsec_in_reject(sp, m)
|
||||
* and {ah,esp}4_input for tunnel mode
|
||||
*/
|
||||
int
|
||||
ipsec4_in_reject_so(m, so)
|
||||
ipsec4_in_reject(m, inp)
|
||||
struct mbuf *m;
|
||||
struct socket *so;
|
||||
struct inpcb *inp;
|
||||
{
|
||||
struct secpolicy *sp = NULL;
|
||||
int error;
|
||||
@ -1706,10 +1715,10 @@ ipsec4_in_reject_so(m, so)
|
||||
* When we are called from ip_forward(), we call
|
||||
* ipsec4_getpolicybyaddr() with IP_FORWARDING flag.
|
||||
*/
|
||||
if (so == NULL)
|
||||
if (inp == NULL)
|
||||
sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_INBOUND, IP_FORWARDING, &error);
|
||||
else
|
||||
sp = ipsec4_getpolicybysock(m, IPSEC_DIR_INBOUND, so, &error);
|
||||
sp = ipsec4_getpolicybypcb(m, IPSEC_DIR_INBOUND, inp, &error);
|
||||
|
||||
if (sp == NULL)
|
||||
return 0; /* XXX should be panic ?
|
||||
@ -1724,18 +1733,16 @@ ipsec4_in_reject_so(m, so)
|
||||
}
|
||||
|
||||
int
|
||||
ipsec4_in_reject(m, inp)
|
||||
ipsec4_in_reject_so(m, so)
|
||||
struct mbuf *m;
|
||||
struct inpcb *inp;
|
||||
struct socket *so;
|
||||
{
|
||||
if (inp == NULL)
|
||||
return ipsec4_in_reject_so(m, NULL);
|
||||
if (inp->inp_socket)
|
||||
return ipsec4_in_reject_so(m, inp->inp_socket);
|
||||
else
|
||||
panic("ipsec4_in_reject: invalid inpcb/socket");
|
||||
if (so == NULL)
|
||||
return ipsec4_in_reject(m, NULL);
|
||||
return ipsec4_in_reject(m, sotoinpcb(so));
|
||||
}
|
||||
|
||||
|
||||
#ifdef INET6
|
||||
/*
|
||||
* Check AH/ESP integrity.
|
||||
@ -1743,9 +1750,9 @@ ipsec4_in_reject(m, inp)
|
||||
* and {ah,esp}6_input for tunnel mode
|
||||
*/
|
||||
int
|
||||
ipsec6_in_reject_so(m, so)
|
||||
ipsec6_in_reject(m, in6p)
|
||||
struct mbuf *m;
|
||||
struct socket *so;
|
||||
struct in6pcb *in6p;
|
||||
{
|
||||
struct secpolicy *sp = NULL;
|
||||
int error;
|
||||
@ -1759,33 +1766,30 @@ ipsec6_in_reject_so(m, so)
|
||||
* When we are called from ip_forward(), we call
|
||||
* ipsec6_getpolicybyaddr() with IP_FORWARDING flag.
|
||||
*/
|
||||
if (so == NULL)
|
||||
if (in6p == NULL)
|
||||
sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_INBOUND, IP_FORWARDING, &error);
|
||||
else
|
||||
sp = ipsec6_getpolicybysock(m, IPSEC_DIR_INBOUND, so, &error);
|
||||
sp = ipsec6_getpolicybypcb(m, IPSEC_DIR_INBOUND, in6p, &error);
|
||||
|
||||
if (sp == NULL)
|
||||
return 0; /* XXX should be panic ? */
|
||||
|
||||
result = ipsec_in_reject(sp, m);
|
||||
KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
|
||||
printf("DP ipsec6_in_reject_so call free SP:%p\n", sp));
|
||||
printf("DP ipsec6_in_reject call free SP:%p\n", sp));
|
||||
key_freesp(sp);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
int
|
||||
ipsec6_in_reject(m, in6p)
|
||||
ipsec6_in_reject_so(m, so)
|
||||
struct mbuf *m;
|
||||
struct in6pcb *in6p;
|
||||
struct socket *so;
|
||||
{
|
||||
if (in6p == NULL)
|
||||
return ipsec6_in_reject_so(m, NULL);
|
||||
if (in6p->in6p_socket)
|
||||
return ipsec6_in_reject_so(m, in6p->in6p_socket);
|
||||
else
|
||||
panic("ipsec6_in_reject: invalid in6p/socket");
|
||||
if (so == NULL)
|
||||
return ipsec6_in_reject(m, NULL);
|
||||
return ipsec6_in_reject(m, sotoin6pcb(so));
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -1889,7 +1893,7 @@ ipsec4_hdrsiz(m, dir, inp)
|
||||
if (inp == NULL)
|
||||
sp = ipsec4_getpolicybyaddr(m, dir, IP_FORWARDING, &error);
|
||||
else
|
||||
sp = ipsec4_getpolicybysock(m, dir, inp->inp_socket, &error);
|
||||
sp = ipsec4_getpolicybypcb(m, dir, inp, &error);
|
||||
|
||||
if (sp == NULL)
|
||||
return 0; /* XXX should be panic ? */
|
||||
@ -1921,15 +1925,18 @@ ipsec6_hdrsiz(m, dir, in6p)
|
||||
/* sanity check */
|
||||
if (m == NULL)
|
||||
return 0; /* XXX shoud be panic ? */
|
||||
#if 0
|
||||
/* this is possible in TIME_WAIT state */
|
||||
if (in6p != NULL && in6p->in6p_socket == NULL)
|
||||
panic("ipsec6_hdrsize: why is socket NULL but there is PCB.");
|
||||
#endif
|
||||
|
||||
/* get SP for this packet */
|
||||
/* XXX Is it right to call with IP_FORWARDING. */
|
||||
if (in6p == NULL)
|
||||
sp = ipsec6_getpolicybyaddr(m, dir, IP_FORWARDING, &error);
|
||||
else
|
||||
sp = ipsec6_getpolicybysock(m, dir, in6p->in6p_socket, &error);
|
||||
sp = ipsec6_getpolicybypcb(m, dir, in6p, &error);
|
||||
|
||||
if (sp == NULL)
|
||||
return 0;
|
||||
|
@ -297,12 +297,14 @@ extern int ip4_esp_randpad;
|
||||
|
||||
#define ipseclog(x) do { if (ipsec_debug) log x; } while (0)
|
||||
|
||||
struct inpcb;
|
||||
extern struct secpolicy *ipsec4_getpolicybypcb
|
||||
__P((struct mbuf *, u_int, struct inpcb *, int *));
|
||||
extern struct secpolicy *ipsec4_getpolicybysock
|
||||
__P((struct mbuf *, u_int, struct socket *, int *));
|
||||
extern struct secpolicy *ipsec4_getpolicybyaddr
|
||||
__P((struct mbuf *, u_int, int, int *));
|
||||
|
||||
struct inpcb;
|
||||
extern int ipsec_init_policy __P((struct socket *so, struct inpcbpolicy **));
|
||||
extern int ipsec_copy_policy
|
||||
__P((struct inpcbpolicy *, struct inpcbpolicy *));
|
||||
|
@ -50,13 +50,14 @@ extern int ip6_ah_net_deflev;
|
||||
extern int ip6_ipsec_ecn;
|
||||
extern int ip6_esp_randpad;
|
||||
|
||||
struct inpcb;
|
||||
extern struct secpolicy *ipsec6_getpolicybypcb
|
||||
__P((struct mbuf *, u_int, struct inpcb *, int *));
|
||||
extern struct secpolicy *ipsec6_getpolicybysock
|
||||
__P((struct mbuf *, u_int, struct socket *, int *));
|
||||
extern struct secpolicy *ipsec6_getpolicybyaddr
|
||||
__P((struct mbuf *, u_int, int, int *));
|
||||
|
||||
struct inpcb;
|
||||
|
||||
extern int ipsec6_in_reject_so __P((struct mbuf *, struct socket *));
|
||||
extern int ipsec6_delete_pcbpolicy __P((struct inpcb *));
|
||||
extern int ipsec6_set_policy __P((struct inpcb *inp, int optname,
|
||||
|
@ -180,7 +180,7 @@ rip6_input(mp, offp, proto)
|
||||
/*
|
||||
* Check AH/ESP integrity.
|
||||
*/
|
||||
if (n && ipsec6_in_reject_so(n, last->inp_socket)) {
|
||||
if (n && ipsec6_in_reject(n, last)) {
|
||||
m_freem(n);
|
||||
ipsec6stat.in_polvio++;
|
||||
/* do not inject data into pcb */
|
||||
@ -219,7 +219,7 @@ rip6_input(mp, offp, proto)
|
||||
/*
|
||||
* Check AH/ESP integrity.
|
||||
*/
|
||||
if (last && ipsec6_in_reject_so(m, last->inp_socket)) {
|
||||
if (last && ipsec6_in_reject(m, last)) {
|
||||
m_freem(m);
|
||||
ipsec6stat.in_polvio++;
|
||||
ip6stat.ip6s_delivered--;
|
||||
|
@ -261,7 +261,7 @@ udp6_input(mp, offp, proto)
|
||||
/*
|
||||
* Check AH/ESP integrity.
|
||||
*/
|
||||
if (ipsec6_in_reject_so(m, last->inp_socket))
|
||||
if (ipsec6_in_reject(m, last))
|
||||
ipsec6stat.in_polvio++;
|
||||
/* do not inject data into pcb */
|
||||
else
|
||||
@ -328,7 +328,7 @@ udp6_input(mp, offp, proto)
|
||||
/*
|
||||
* Check AH/ESP integrity.
|
||||
*/
|
||||
if (ipsec6_in_reject_so(m, last->inp_socket)) {
|
||||
if (ipsec6_in_reject(m, last)) {
|
||||
ipsec6stat.in_polvio++;
|
||||
goto bad;
|
||||
}
|
||||
@ -384,7 +384,7 @@ udp6_input(mp, offp, proto)
|
||||
/*
|
||||
* Check AH/ESP integrity.
|
||||
*/
|
||||
if (ipsec6_in_reject_so(m, in6p->in6p_socket)) {
|
||||
if (ipsec6_in_reject(m, in6p)) {
|
||||
ipsec6stat.in_polvio++;
|
||||
goto bad;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user