Add new socket options: TCP_KEEPINIT, TCP_KEEPIDLE, TCP_KEEPINTVL and
TCP_KEEPCNT, that allow to control initial timeout, idle time, idle re-send interval and idle send count on a per-socket basis. Reviewed by: andre, bz, lstewart
This commit is contained in:
parent
96baefc0cb
commit
4326beb059
@ -38,7 +38,7 @@
|
||||
.\" From: @(#)tcp.4 8.1 (Berkeley) 6/5/93
|
||||
.\" $FreeBSD$
|
||||
.\"
|
||||
.Dd November 14, 2011
|
||||
.Dd February 5, 2012
|
||||
.Dt TCP 4
|
||||
.Os
|
||||
.Sh NAME
|
||||
@ -146,6 +146,65 @@ connection.
|
||||
See
|
||||
.Xr mod_cc 4
|
||||
for details.
|
||||
.It Dv TCP_KEEPINIT
|
||||
This write-only
|
||||
.Xr setsockopt 2
|
||||
option accepts a per-socket timeout argument of
|
||||
.Vt "u_int"
|
||||
in seconds, for new, non-established
|
||||
.Tn TCP
|
||||
connections.
|
||||
For the global default in milliseconds see
|
||||
.Va keepinit
|
||||
in the
|
||||
.Sx MIB Variables
|
||||
section further down.
|
||||
.It Dv TCP_KEEPIDLE
|
||||
This write-only
|
||||
.Xr setsockopt 2
|
||||
option accepts an argument of
|
||||
.Vt "u_int"
|
||||
for the amount of time, in seconds, that the connection must be idle
|
||||
before keepalive probes (if enabled) are sent for the connection of this
|
||||
socket.
|
||||
If set on a listening socket, the value is inherited by the newly created
|
||||
socket upon
|
||||
.Xr accept 2 .
|
||||
For the global default in milliseconds see
|
||||
.Va keepidle
|
||||
in the
|
||||
.Sx MIB Variables
|
||||
section further down.
|
||||
.It Dv TCP_KEEPINTVL
|
||||
This write-only
|
||||
.Xr setsockopt 2
|
||||
option accepts an argument of
|
||||
.Vt "u_int"
|
||||
to set the per-socket interval, in seconds, between keepalive probes sent
|
||||
to a peer.
|
||||
If set on a listening socket, the value is inherited by the newly created
|
||||
socket upon
|
||||
.Xr accept 2 .
|
||||
For the global default in milliseconds see
|
||||
.Va keepintvl
|
||||
in the
|
||||
.Sx MIB Variables
|
||||
section further down.
|
||||
.It Dv TCP_KEEPCNT
|
||||
This write-only
|
||||
.Xr setsockopt 2
|
||||
option accepts an argument of
|
||||
.Vt "u_int"
|
||||
and allows a per-socket tuning of the number of probes sent, with no response,
|
||||
before the connection will be dropped.
|
||||
If set on a listening socket, the value is inherited by the newly created
|
||||
socket upon
|
||||
.Xr accept 2 .
|
||||
For the global default see the
|
||||
.Va keepcnt
|
||||
in the
|
||||
.Sx MIB Variables
|
||||
section further down.
|
||||
.It Dv TCP_NODELAY
|
||||
Under most circumstances,
|
||||
.Tn TCP
|
||||
@ -296,17 +355,21 @@ The Maximum Segment Lifetime, in milliseconds, for a packet.
|
||||
Timeout, in milliseconds, for new, non-established
|
||||
.Tn TCP
|
||||
connections.
|
||||
The default is 75000 msec.
|
||||
.It Va keepidle
|
||||
Amount of time, in milliseconds, that the connection must be idle
|
||||
before keepalive probes (if enabled) are sent.
|
||||
The default is 7200000 msec (2 hours).
|
||||
.It Va keepintvl
|
||||
The interval, in milliseconds, between keepalive probes sent to remote
|
||||
machines, when no response is received on a
|
||||
.Va keepidle
|
||||
probe.
|
||||
After
|
||||
.Dv TCPTV_KEEPCNT
|
||||
(default 8) probes are sent, with no response, the connection is dropped.
|
||||
The default is 75000 msec.
|
||||
.It Va keepcnt
|
||||
Number of probes sent, with no response, before a connection
|
||||
is dropped.
|
||||
The default is 8 packets.
|
||||
.It Va always_keepalive
|
||||
Assume that
|
||||
.Dv SO_KEEPALIVE
|
||||
|
@ -159,6 +159,10 @@ struct tcphdr {
|
||||
#define TCP_MD5SIG 0x10 /* use MD5 digests (RFC2385) */
|
||||
#define TCP_INFO 0x20 /* retrieve tcp_info structure */
|
||||
#define TCP_CONGESTION 0x40 /* get/set congestion control algorithm */
|
||||
#define TCP_KEEPINIT 0x80 /* N, time to establish connection */
|
||||
#define TCP_KEEPIDLE 0x100 /* L,N,X start keeplives after this period */
|
||||
#define TCP_KEEPINTVL 0x200 /* L,N interval between keepalives */
|
||||
#define TCP_KEEPCNT 0x400 /* L,N number of keepalives before close */
|
||||
|
||||
#define TCP_CA_NAME_MAX 16 /* max congestion control name length */
|
||||
|
||||
|
@ -1446,7 +1446,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
*/
|
||||
tp->t_rcvtime = ticks;
|
||||
if (TCPS_HAVEESTABLISHED(tp->t_state))
|
||||
tcp_timer_activate(tp, TT_KEEP, tcp_keepidle);
|
||||
tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp));
|
||||
|
||||
/*
|
||||
* Unscale the window into a 32-bit value.
|
||||
@ -1889,7 +1889,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
} else {
|
||||
tp->t_state = TCPS_ESTABLISHED;
|
||||
cc_conn_init(tp);
|
||||
tcp_timer_activate(tp, TT_KEEP, tcp_keepidle);
|
||||
tcp_timer_activate(tp, TT_KEEP,
|
||||
TP_KEEPIDLE(tp));
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
@ -2293,7 +2294,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
} else {
|
||||
tp->t_state = TCPS_ESTABLISHED;
|
||||
cc_conn_init(tp);
|
||||
tcp_timer_activate(tp, TT_KEEP, tcp_keepidle);
|
||||
tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp));
|
||||
}
|
||||
/*
|
||||
* If segment contains data or ACK, will call tcp_reass()
|
||||
@ -2630,12 +2631,11 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
* compressed state.
|
||||
*/
|
||||
if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
|
||||
int timeout;
|
||||
|
||||
soisdisconnected(so);
|
||||
timeout = (tcp_fast_finwait2_recycle) ?
|
||||
tcp_finwait2_timeout : tcp_maxidle;
|
||||
tcp_timer_activate(tp, TT_2MSL, timeout);
|
||||
tcp_timer_activate(tp, TT_2MSL,
|
||||
(tcp_fast_finwait2_recycle ?
|
||||
tcp_finwait2_timeout :
|
||||
TP_MAXIDLE(tp)));
|
||||
}
|
||||
tp->t_state = TCPS_FIN_WAIT_2;
|
||||
}
|
||||
|
@ -845,7 +845,15 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
|
||||
*/
|
||||
if (sc->sc_rxmits > 1)
|
||||
tp->snd_cwnd = tp->t_maxseg;
|
||||
tcp_timer_activate(tp, TT_KEEP, tcp_keepinit);
|
||||
|
||||
/*
|
||||
* Copy and activate timers.
|
||||
*/
|
||||
tp->t_keepinit = sototcpcb(lso)->t_keepinit;
|
||||
tp->t_keepidle = sototcpcb(lso)->t_keepidle;
|
||||
tp->t_keepintvl = sototcpcb(lso)->t_keepintvl;
|
||||
tp->t_keepcnt = sototcpcb(lso)->t_keepcnt;
|
||||
tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
|
||||
|
||||
INP_WUNLOCK(inp);
|
||||
|
||||
|
@ -111,12 +111,12 @@ int tcp_finwait2_timeout;
|
||||
SYSCTL_PROC(_net_inet_tcp, OID_AUTO, finwait2_timeout, CTLTYPE_INT|CTLFLAG_RW,
|
||||
&tcp_finwait2_timeout, 0, sysctl_msec_to_ticks, "I", "FIN-WAIT2 timeout");
|
||||
|
||||
int tcp_keepcnt = TCPTV_KEEPCNT;
|
||||
SYSCTL_INT(_net_inet_tcp, OID_AUTO, keepcnt, CTLFLAG_RW, &tcp_keepcnt, 0,
|
||||
"Number of keepalive probes to send");
|
||||
|
||||
static int tcp_keepcnt = TCPTV_KEEPCNT;
|
||||
/* max idle probes */
|
||||
int tcp_maxpersistidle;
|
||||
/* max idle time in persist */
|
||||
int tcp_maxidle;
|
||||
|
||||
static int per_cpu_timers = 0;
|
||||
SYSCTL_INT(_net_inet_tcp, OID_AUTO, per_cpu_timers, CTLFLAG_RW,
|
||||
@ -138,7 +138,6 @@ tcp_slowtimo(void)
|
||||
VNET_LIST_RLOCK_NOSLEEP();
|
||||
VNET_FOREACH(vnet_iter) {
|
||||
CURVNET_SET(vnet_iter);
|
||||
tcp_maxidle = tcp_keepcnt * tcp_keepintvl;
|
||||
INP_INFO_WLOCK(&V_tcbinfo);
|
||||
(void) tcp_tw_2msl_scan(0);
|
||||
INP_INFO_WUNLOCK(&V_tcbinfo);
|
||||
@ -255,9 +254,9 @@ tcp_timer_2msl(void *xtp)
|
||||
tp = tcp_close(tp);
|
||||
} else {
|
||||
if (tp->t_state != TCPS_TIME_WAIT &&
|
||||
ticks - tp->t_rcvtime <= tcp_maxidle)
|
||||
callout_reset_on(&tp->t_timers->tt_2msl, tcp_keepintvl,
|
||||
tcp_timer_2msl, tp, INP_CPU(inp));
|
||||
ticks - tp->t_rcvtime <= TP_MAXIDLE(tp))
|
||||
callout_reset_on(&tp->t_timers->tt_2msl,
|
||||
TP_KEEPINTVL(tp), tcp_timer_2msl, tp, INP_CPU(inp));
|
||||
else
|
||||
tp = tcp_close(tp);
|
||||
}
|
||||
@ -318,7 +317,7 @@ tcp_timer_keep(void *xtp)
|
||||
goto dropit;
|
||||
if ((always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) &&
|
||||
tp->t_state <= TCPS_CLOSING) {
|
||||
if (ticks - tp->t_rcvtime >= tcp_keepidle + tcp_maxidle)
|
||||
if (ticks - tp->t_rcvtime >= TP_KEEPIDLE(tp) + TP_MAXIDLE(tp))
|
||||
goto dropit;
|
||||
/*
|
||||
* Send a packet designed to force a response
|
||||
@ -340,9 +339,11 @@ tcp_timer_keep(void *xtp)
|
||||
tp->rcv_nxt, tp->snd_una - 1, 0);
|
||||
free(t_template, M_TEMP);
|
||||
}
|
||||
callout_reset_on(&tp->t_timers->tt_keep, tcp_keepintvl, tcp_timer_keep, tp, INP_CPU(inp));
|
||||
callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPINTVL(tp),
|
||||
tcp_timer_keep, tp, INP_CPU(inp));
|
||||
} else
|
||||
callout_reset_on(&tp->t_timers->tt_keep, tcp_keepidle, tcp_timer_keep, tp, INP_CPU(inp));
|
||||
callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPIDLE(tp),
|
||||
tcp_timer_keep, tp, INP_CPU(inp));
|
||||
|
||||
#ifdef TCPDEBUG
|
||||
if (inp->inp_socket->so_options & SO_DEBUG)
|
||||
|
@ -153,10 +153,16 @@ struct tcp_timer {
|
||||
#define TT_KEEP 0x08
|
||||
#define TT_2MSL 0x10
|
||||
|
||||
#define TP_KEEPINIT(tp) ((tp)->t_keepinit ? (tp)->t_keepinit : tcp_keepinit)
|
||||
#define TP_KEEPIDLE(tp) ((tp)->t_keepidle ? (tp)->t_keepidle : tcp_keepidle)
|
||||
#define TP_KEEPINTVL(tp) ((tp)->t_keepintvl ? (tp)->t_keepintvl : tcp_keepintvl)
|
||||
#define TP_KEEPCNT(tp) ((tp)->t_keepcnt ? (tp)->t_keepcnt : tcp_keepcnt)
|
||||
#define TP_MAXIDLE(tp) (TP_KEEPCNT(tp) * TP_KEEPINTVL(tp))
|
||||
|
||||
extern int tcp_keepinit; /* time to establish connection */
|
||||
extern int tcp_keepidle; /* time before keepalive probes begin */
|
||||
extern int tcp_keepintvl; /* time between keepalive probes */
|
||||
extern int tcp_maxidle; /* time to drop after starting probes */
|
||||
extern int tcp_keepcnt; /* number of keepalives */
|
||||
extern int tcp_delacktime; /* time before sending a delayed ACK */
|
||||
extern int tcp_maxpersistidle;
|
||||
extern int tcp_rexmit_min;
|
||||
|
@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/limits.h>
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/sysctl.h>
|
||||
@ -1118,7 +1119,7 @@ tcp_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
|
||||
soisconnecting(so);
|
||||
TCPSTAT_INC(tcps_connattempt);
|
||||
tp->t_state = TCPS_SYN_SENT;
|
||||
tcp_timer_activate(tp, TT_KEEP, tcp_keepinit);
|
||||
tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
|
||||
tp->iss = tcp_new_isn(tp);
|
||||
tcp_sendseqinit(tp);
|
||||
|
||||
@ -1191,7 +1192,7 @@ tcp6_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
|
||||
soisconnecting(so);
|
||||
TCPSTAT_INC(tcps_connattempt);
|
||||
tp->t_state = TCPS_SYN_SENT;
|
||||
tcp_timer_activate(tp, TT_KEEP, tcp_keepinit);
|
||||
tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
|
||||
tp->iss = tcp_new_isn(tp);
|
||||
tcp_sendseqinit(tp);
|
||||
|
||||
@ -1272,6 +1273,7 @@ int
|
||||
tcp_ctloutput(struct socket *so, struct sockopt *sopt)
|
||||
{
|
||||
int error, opt, optval;
|
||||
u_int ui;
|
||||
struct inpcb *inp;
|
||||
struct tcpcb *tp;
|
||||
struct tcp_info ti;
|
||||
@ -1439,6 +1441,59 @@ tcp_ctloutput(struct socket *so, struct sockopt *sopt)
|
||||
INP_WUNLOCK(inp);
|
||||
break;
|
||||
|
||||
case TCP_KEEPIDLE:
|
||||
case TCP_KEEPINTVL:
|
||||
case TCP_KEEPCNT:
|
||||
case TCP_KEEPINIT:
|
||||
INP_WUNLOCK(inp);
|
||||
error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui));
|
||||
if (error)
|
||||
return (error);
|
||||
|
||||
if (ui > (UINT_MAX / hz)) {
|
||||
error = EINVAL;
|
||||
break;
|
||||
}
|
||||
ui *= hz;
|
||||
|
||||
INP_WLOCK_RECHECK(inp);
|
||||
switch (sopt->sopt_name) {
|
||||
case TCP_KEEPIDLE:
|
||||
tp->t_keepidle = ui;
|
||||
/*
|
||||
* XXX: better check current remaining
|
||||
* timeout and "merge" it with new value.
|
||||
*/
|
||||
if ((tp->t_state > TCPS_LISTEN) &&
|
||||
(tp->t_state <= TCPS_CLOSING))
|
||||
tcp_timer_activate(tp, TT_KEEP,
|
||||
TP_KEEPIDLE(tp));
|
||||
break;
|
||||
case TCP_KEEPINTVL:
|
||||
tp->t_keepintvl = ui;
|
||||
if ((tp->t_state == TCPS_FIN_WAIT_2) &&
|
||||
(TP_MAXIDLE(tp) > 0))
|
||||
tcp_timer_activate(tp, TT_2MSL,
|
||||
TP_MAXIDLE(tp));
|
||||
break;
|
||||
case TCP_KEEPCNT:
|
||||
tp->t_keepcnt = ui;
|
||||
if ((tp->t_state == TCPS_FIN_WAIT_2) &&
|
||||
(TP_MAXIDLE(tp) > 0))
|
||||
tcp_timer_activate(tp, TT_2MSL,
|
||||
TP_MAXIDLE(tp));
|
||||
break;
|
||||
case TCP_KEEPINIT:
|
||||
tp->t_keepinit = ui;
|
||||
if (tp->t_state == TCPS_SYN_RECEIVED ||
|
||||
tp->t_state == TCPS_SYN_SENT)
|
||||
tcp_timer_activate(tp, TT_KEEP,
|
||||
TP_KEEPINIT(tp));
|
||||
break;
|
||||
}
|
||||
INP_WUNLOCK(inp);
|
||||
break;
|
||||
|
||||
default:
|
||||
INP_WUNLOCK(inp);
|
||||
error = ENOPROTOOPT;
|
||||
@ -1636,7 +1691,7 @@ tcp_usrclosed(struct tcpcb *tp)
|
||||
int timeout;
|
||||
|
||||
timeout = (tcp_fast_finwait2_recycle) ?
|
||||
tcp_finwait2_timeout : tcp_maxidle;
|
||||
tcp_finwait2_timeout : TP_MAXIDLE(tp);
|
||||
tcp_timer_activate(tp, TT_2MSL, timeout);
|
||||
}
|
||||
}
|
||||
|
@ -203,7 +203,12 @@ struct tcpcb {
|
||||
struct cc_var *ccv; /* congestion control specific vars */
|
||||
struct osd *osd; /* storage for Khelp module data */
|
||||
|
||||
uint32_t t_ispare[12]; /* 4 keep timers, 5 UTO, 3 TBD */
|
||||
u_int t_keepinit; /* time to establish connection */
|
||||
u_int t_keepidle; /* time before keepalive probes begin */
|
||||
u_int t_keepintvl; /* interval between keepalives */
|
||||
u_int t_keepcnt; /* number of keepalives before close */
|
||||
|
||||
uint32_t t_ispare[8]; /* 5 UTO, 3 TBD */
|
||||
void *t_pspare2[4]; /* 4 TBD */
|
||||
uint64_t _pad[6]; /* 6 TBD (1-2 CC/RTT?) */
|
||||
};
|
||||
|
@ -58,7 +58,7 @@
|
||||
* in the range 5 to 9.
|
||||
*/
|
||||
#undef __FreeBSD_version
|
||||
#define __FreeBSD_version 1000006 /* Master, propagated to newvers */
|
||||
#define __FreeBSD_version 1000007 /* Master, propagated to newvers */
|
||||
|
||||
/*
|
||||
* __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,
|
||||
|
Loading…
Reference in New Issue
Block a user