2005-01-07 01:45:51 +00:00
|
|
|
/*-
|
1995-10-03 16:54:17 +00:00
|
|
|
* Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
|
1994-05-24 10:09:53 +00:00
|
|
|
* The Regents of the University of California. All rights reserved.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
* 4. Neither the name of the University nor the names of its contributors
|
|
|
|
* may be used to endorse or promote products derived from this software
|
|
|
|
* without specific prior written permission.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
* SUCH DAMAGE.
|
|
|
|
*
|
1995-10-03 16:54:17 +00:00
|
|
|
* @(#)tcp_timer.c 8.2 (Berkeley) 5/24/95
|
1994-05-24 10:09:53 +00:00
|
|
|
*/
|
|
|
|
|
2007-10-07 20:44:24 +00:00
|
|
|
#include <sys/cdefs.h>
|
|
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
|
2012-11-27 06:42:44 +00:00
|
|
|
#include "opt_inet.h"
|
2000-01-09 19:17:30 +00:00
|
|
|
#include "opt_inet6.h"
|
1997-09-16 18:36:06 +00:00
|
|
|
#include "opt_tcpdebug.h"
|
2014-05-18 22:39:01 +00:00
|
|
|
#include "opt_rss.h"
|
1997-09-16 18:36:06 +00:00
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
#include <sys/param.h>
|
1995-11-09 20:23:09 +00:00
|
|
|
#include <sys/kernel.h>
|
2002-09-05 15:33:30 +00:00
|
|
|
#include <sys/lock.h>
|
2001-06-23 03:21:46 +00:00
|
|
|
#include <sys/mbuf.h>
|
2002-09-05 15:33:30 +00:00
|
|
|
#include <sys/mutex.h>
|
|
|
|
#include <sys/protosw.h>
|
2010-03-20 19:47:30 +00:00
|
|
|
#include <sys/smp.h>
|
1994-05-24 10:09:53 +00:00
|
|
|
#include <sys/socket.h>
|
|
|
|
#include <sys/socketvar.h>
|
2002-09-05 15:33:30 +00:00
|
|
|
#include <sys/sysctl.h>
|
|
|
|
#include <sys/systm.h>
|
1995-10-03 16:54:17 +00:00
|
|
|
|
2008-12-02 21:37:28 +00:00
|
|
|
#include <net/if.h>
|
1994-05-24 10:09:53 +00:00
|
|
|
#include <net/route.h>
|
2009-08-01 19:26:27 +00:00
|
|
|
#include <net/vnet.h>
|
2014-05-18 22:39:01 +00:00
|
|
|
#include <net/netisr.h>
|
1994-05-24 10:09:53 +00:00
|
|
|
|
2010-11-12 06:41:55 +00:00
|
|
|
#include <netinet/cc.h>
|
1994-05-24 10:09:53 +00:00
|
|
|
#include <netinet/in.h>
|
|
|
|
#include <netinet/in_pcb.h>
|
2014-05-18 22:39:01 +00:00
|
|
|
#include <netinet/in_rss.h>
|
2002-09-05 15:33:30 +00:00
|
|
|
#include <netinet/in_systm.h>
|
2000-01-09 19:17:30 +00:00
|
|
|
#ifdef INET6
|
|
|
|
#include <netinet6/in6_pcb.h>
|
|
|
|
#endif
|
1994-05-24 10:09:53 +00:00
|
|
|
#include <netinet/ip_var.h>
|
|
|
|
#include <netinet/tcp_fsm.h>
|
|
|
|
#include <netinet/tcp_timer.h>
|
|
|
|
#include <netinet/tcp_var.h>
|
|
|
|
#include <netinet/tcpip.h>
|
1996-07-12 17:28:47 +00:00
|
|
|
#ifdef TCPDEBUG
|
|
|
|
#include <netinet/tcp_debug.h>
|
|
|
|
#endif
|
1994-05-24 10:09:53 +00:00
|
|
|
|
1999-08-30 21:17:07 +00:00
|
|
|
int tcp_keepinit;
|
1999-08-31 03:40:24 +00:00
|
|
|
SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW,
|
2008-07-20 15:29:58 +00:00
|
|
|
&tcp_keepinit, 0, sysctl_msec_to_ticks, "I", "time to establish connection");
|
1996-09-13 23:51:44 +00:00
|
|
|
|
1999-08-30 21:17:07 +00:00
|
|
|
int tcp_keepidle;
|
1999-08-31 03:40:24 +00:00
|
|
|
SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT|CTLFLAG_RW,
|
2008-07-20 15:29:58 +00:00
|
|
|
&tcp_keepidle, 0, sysctl_msec_to_ticks, "I", "time before keepalive probes begin");
|
1995-11-09 20:23:09 +00:00
|
|
|
|
1999-08-30 21:17:07 +00:00
|
|
|
int tcp_keepintvl;
|
1999-08-31 03:40:24 +00:00
|
|
|
SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT|CTLFLAG_RW,
|
2008-07-20 15:29:58 +00:00
|
|
|
&tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", "time between keepalive probes");
|
1995-11-09 20:23:09 +00:00
|
|
|
|
1999-08-30 21:17:07 +00:00
|
|
|
int tcp_delacktime;
|
2007-03-19 19:00:51 +00:00
|
|
|
SYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime, CTLTYPE_INT|CTLFLAG_RW,
|
|
|
|
&tcp_delacktime, 0, sysctl_msec_to_ticks, "I",
|
1999-08-31 03:40:24 +00:00
|
|
|
"Time before a delayed ACK is sent");
|
2004-08-16 18:32:07 +00:00
|
|
|
|
1999-08-30 21:17:07 +00:00
|
|
|
int tcp_msl;
|
1999-08-31 03:40:24 +00:00
|
|
|
SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW,
|
|
|
|
&tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime");
|
1999-08-30 21:17:07 +00:00
|
|
|
|
2002-07-18 19:06:12 +00:00
|
|
|
int tcp_rexmit_min;
|
|
|
|
SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min, CTLTYPE_INT|CTLFLAG_RW,
|
2007-03-19 19:00:51 +00:00
|
|
|
&tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I",
|
|
|
|
"Minimum Retransmission Timeout");
|
2002-07-18 19:06:12 +00:00
|
|
|
|
|
|
|
int tcp_rexmit_slop;
|
|
|
|
SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, CTLTYPE_INT|CTLFLAG_RW,
|
2007-03-19 19:00:51 +00:00
|
|
|
&tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I",
|
|
|
|
"Retransmission Timer Slop");
|
2002-07-18 19:06:12 +00:00
|
|
|
|
2001-12-07 17:01:28 +00:00
|
|
|
static int always_keepalive = 1;
|
2004-08-16 18:32:07 +00:00
|
|
|
SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW,
|
1999-05-03 23:57:32 +00:00
|
|
|
&always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections");
|
1996-04-04 11:17:04 +00:00
|
|
|
|
2007-02-26 22:25:21 +00:00
|
|
|
int tcp_fast_finwait2_recycle = 0;
|
|
|
|
SYSCTL_INT(_net_inet_tcp, OID_AUTO, fast_finwait2_recycle, CTLFLAG_RW,
|
2007-03-19 19:00:51 +00:00
|
|
|
&tcp_fast_finwait2_recycle, 0,
|
|
|
|
"Recycle closed FIN_WAIT_2 connections faster");
|
2007-02-26 22:25:21 +00:00
|
|
|
|
|
|
|
int tcp_finwait2_timeout;
|
|
|
|
SYSCTL_PROC(_net_inet_tcp, OID_AUTO, finwait2_timeout, CTLTYPE_INT|CTLFLAG_RW,
|
2007-03-19 19:00:51 +00:00
|
|
|
&tcp_finwait2_timeout, 0, sysctl_msec_to_ticks, "I", "FIN-WAIT2 timeout");
|
2007-02-26 22:25:21 +00:00
|
|
|
|
2012-02-05 16:53:02 +00:00
|
|
|
int tcp_keepcnt = TCPTV_KEEPCNT;
|
|
|
|
SYSCTL_INT(_net_inet_tcp, OID_AUTO, keepcnt, CTLFLAG_RW, &tcp_keepcnt, 0,
|
|
|
|
"Number of keepalive probes to send");
|
2007-02-26 22:25:21 +00:00
|
|
|
|
1995-11-14 20:34:56 +00:00
|
|
|
/* max idle probes */
|
1999-08-30 21:17:07 +00:00
|
|
|
int tcp_maxpersistidle;
|
1995-10-03 16:54:17 +00:00
|
|
|
|
2013-01-09 20:27:06 +00:00
|
|
|
static int tcp_rexmit_drop_options = 0;
|
|
|
|
SYSCTL_INT(_net_inet_tcp, OID_AUTO, rexmit_drop_options, CTLFLAG_RW,
|
|
|
|
&tcp_rexmit_drop_options, 0,
|
|
|
|
"Drop TCP options from 3rd and later retransmitted SYN");
|
|
|
|
|
2014-06-30 04:26:29 +00:00
|
|
|
#ifdef RSS
|
|
|
|
static int per_cpu_timers = 1;
|
|
|
|
#else
|
2010-03-20 19:47:30 +00:00
|
|
|
static int per_cpu_timers = 0;
|
2014-06-30 04:26:29 +00:00
|
|
|
#endif
|
2010-03-20 19:47:30 +00:00
|
|
|
SYSCTL_INT(_net_inet_tcp, OID_AUTO, per_cpu_timers, CTLFLAG_RW,
|
|
|
|
&per_cpu_timers , 0, "run tcp timers on all cpus");
|
|
|
|
|
2014-05-18 22:39:01 +00:00
|
|
|
#if 0
|
2010-03-20 19:47:30 +00:00
|
|
|
#define INP_CPU(inp) (per_cpu_timers ? (!CPU_ABSENT(((inp)->inp_flowid % (mp_maxid+1))) ? \
|
|
|
|
((inp)->inp_flowid % (mp_maxid+1)) : curcpu) : 0)
|
2014-05-18 22:39:01 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Map the given inp to a CPU id.
|
|
|
|
*
|
|
|
|
* This queries RSS if it's compiled in, else it defaults to the current
|
|
|
|
* CPU ID.
|
|
|
|
*/
|
|
|
|
static inline int
|
|
|
|
inp_to_cpuid(struct inpcb *inp)
|
|
|
|
{
|
|
|
|
u_int cpuid;
|
|
|
|
|
|
|
|
#ifdef RSS
|
|
|
|
if (per_cpu_timers) {
|
|
|
|
cpuid = rss_hash2cpuid(inp->inp_flowid, inp->inp_flowtype);
|
|
|
|
if (cpuid == NETISR_CPUID_NONE)
|
|
|
|
return (curcpu); /* XXX */
|
|
|
|
else
|
|
|
|
return (cpuid);
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
/* Legacy, pre-RSS behaviour */
|
|
|
|
if (per_cpu_timers) {
|
|
|
|
/*
|
|
|
|
* We don't have a flowid -> cpuid mapping, so cheat and
|
|
|
|
* just map unknown cpuids to curcpu. Not the best, but
|
|
|
|
* apparently better than defaulting to swi 0.
|
|
|
|
*/
|
|
|
|
cpuid = inp->inp_flowid % (mp_maxid + 1);
|
|
|
|
if (! CPU_ABSENT(cpuid))
|
|
|
|
return (cpuid);
|
|
|
|
return (curcpu);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
/* Default for RSS and non-RSS - cpuid 0 */
|
|
|
|
else {
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
}
|
2010-03-20 19:47:30 +00:00
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
|
|
|
* Tcp protocol timeout routine called every 500 ms.
|
1999-08-30 21:17:07 +00:00
|
|
|
* Updates timestamps used for TCP
|
1994-05-24 10:09:53 +00:00
|
|
|
* causes finite state machine actions if timers expire.
|
|
|
|
*/
|
|
|
|
void
|
2007-09-24 05:26:24 +00:00
|
|
|
tcp_slowtimo(void)
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
Step 1.5 of importing the network stack virtualization infrastructure
from the vimage project, as per plan established at devsummit 08/08:
http://wiki.freebsd.org/Image/Notes200808DevSummit
Introduce INIT_VNET_*() initializer macros, VNET_FOREACH() iterator
macros, and CURVNET_SET() context setting macros, all currently
resolving to NOPs.
Prepare for virtualization of selected SYSCTL objects by introducing a
family of SYSCTL_V_*() macros, currently resolving to their global
counterparts, i.e. SYSCTL_V_INT() == SYSCTL_INT().
Move selected #defines from sys/sys/vimage.h to newly introduced header
files specific to virtualized subsystems (sys/net/vnet.h,
sys/netinet/vinet.h etc.).
All the changes are verified to have zero functional impact at this
point in time by doing MD5 comparision between pre- and post-change
object files(*).
(*) netipsec/keysock.c did not validate depending on compile time options.
Implemented by: julian, bz, brooks, zec
Reviewed by: julian, bz, brooks, kris, rwatson, ...
Approved by: julian (mentor)
Obtained from: //depot/projects/vimage-commit2/...
X-MFC after: never
Sponsored by: NLnet Foundation, The FreeBSD Foundation
2008-10-02 15:37:58 +00:00
|
|
|
VNET_ITERATOR_DECL(vnet_iter);
|
|
|
|
|
2009-07-19 14:20:53 +00:00
|
|
|
VNET_LIST_RLOCK_NOSLEEP();
|
Step 1.5 of importing the network stack virtualization infrastructure
from the vimage project, as per plan established at devsummit 08/08:
http://wiki.freebsd.org/Image/Notes200808DevSummit
Introduce INIT_VNET_*() initializer macros, VNET_FOREACH() iterator
macros, and CURVNET_SET() context setting macros, all currently
resolving to NOPs.
Prepare for virtualization of selected SYSCTL objects by introducing a
family of SYSCTL_V_*() macros, currently resolving to their global
counterparts, i.e. SYSCTL_V_INT() == SYSCTL_INT().
Move selected #defines from sys/sys/vimage.h to newly introduced header
files specific to virtualized subsystems (sys/net/vnet.h,
sys/netinet/vinet.h etc.).
All the changes are verified to have zero functional impact at this
point in time by doing MD5 comparision between pre- and post-change
object files(*).
(*) netipsec/keysock.c did not validate depending on compile time options.
Implemented by: julian, bz, brooks, zec
Reviewed by: julian, bz, brooks, kris, rwatson, ...
Approved by: julian (mentor)
Obtained from: //depot/projects/vimage-commit2/...
X-MFC after: never
Sponsored by: NLnet Foundation, The FreeBSD Foundation
2008-10-02 15:37:58 +00:00
|
|
|
VNET_FOREACH(vnet_iter) {
|
|
|
|
CURVNET_SET(vnet_iter);
|
2014-04-10 18:15:35 +00:00
|
|
|
tcp_tw_2msl_scan();
|
Step 1.5 of importing the network stack virtualization infrastructure
from the vimage project, as per plan established at devsummit 08/08:
http://wiki.freebsd.org/Image/Notes200808DevSummit
Introduce INIT_VNET_*() initializer macros, VNET_FOREACH() iterator
macros, and CURVNET_SET() context setting macros, all currently
resolving to NOPs.
Prepare for virtualization of selected SYSCTL objects by introducing a
family of SYSCTL_V_*() macros, currently resolving to their global
counterparts, i.e. SYSCTL_V_INT() == SYSCTL_INT().
Move selected #defines from sys/sys/vimage.h to newly introduced header
files specific to virtualized subsystems (sys/net/vnet.h,
sys/netinet/vinet.h etc.).
All the changes are verified to have zero functional impact at this
point in time by doing MD5 comparision between pre- and post-change
object files(*).
(*) netipsec/keysock.c did not validate depending on compile time options.
Implemented by: julian, bz, brooks, zec
Reviewed by: julian, bz, brooks, kris, rwatson, ...
Approved by: julian (mentor)
Obtained from: //depot/projects/vimage-commit2/...
X-MFC after: never
Sponsored by: NLnet Foundation, The FreeBSD Foundation
2008-10-02 15:37:58 +00:00
|
|
|
CURVNET_RESTORE();
|
|
|
|
}
|
2009-07-19 14:20:53 +00:00
|
|
|
VNET_LIST_RUNLOCK_NOSLEEP();
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
|
|
|
|
2001-02-26 21:33:55 +00:00
|
|
|
int tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] =
|
|
|
|
{ 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 };
|
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
int tcp_backoff[TCP_MAXRXTSHIFT + 1] =
|
2003-06-04 10:03:55 +00:00
|
|
|
{ 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 512, 512, 512 };
|
1994-05-24 10:09:53 +00:00
|
|
|
|
2003-06-04 10:03:55 +00:00
|
|
|
static int tcp_totbackoff = 2559; /* sum of tcp_backoff[] */
|
1995-10-03 16:54:17 +00:00
|
|
|
|
Update TCP for infrastructural changes to the socket/pcb refcount model,
pru_abort(), pru_detach(), and in_pcbdetach():
- Universally support and enforce the invariant that so_pcb is
never NULL, converting dozens of unnecessary NULL checks into
assertions, and eliminating dozens of unnecessary error handling
cases in protocol code.
- In some cases, eliminate unnecessary pcbinfo locking, as it is no
longer required to ensure so_pcb != NULL. For example, the receive
code no longer requires the pcbinfo lock, and the send code only
requires it if building a new connection on an otherwise unconnected
socket triggered via sendto() with an address. This should
significnatly reduce tcbinfo lock contention in the receive and send
cases.
- In order to support the invariant that so_pcb != NULL, it is now
necessary for the TCP code to not discard the tcpcb any time a
connection is dropped, but instead leave the tcpcb until the socket
is shutdown. This case is handled by setting INP_DROPPED, to
substitute for using a NULL so_pcb to indicate that the connection
has been dropped. This requires the inpcb lock, but not the pcbinfo
lock.
- Unlike all other protocols in the tree, TCP may need to retain access
to the socket after the file descriptor has been closed. Set
SS_PROTOREF in tcp_detach() in order to prevent the socket from being
freed, and add a flag, INP_SOCKREF, so that the TCP code knows whether
or not it needs to free the socket when the connection finally does
close. The typical case where this occurs is if close() is called on
a TCP socket before all sent data in the send socket buffer has been
transmitted or acknowledged. If INP_SOCKREF is found when the
connection is dropped, we release the inpcb, tcpcb, and socket instead
of flagging INP_DROPPED.
- Abort and detach protocol switch methods no longer return failures,
nor attempt to free sockets, as the socket layer does this.
- Annotate the existence of a long-standing race in the TCP timer code,
in which timers are stopped but not drained when the socket is freed,
as waiting for drain may lead to deadlocks, or have to occur in a
context where waiting is not permitted. This race has been handled
by testing to see if the tcpcb pointer in the inpcb is NULL (and vice
versa), which is not normally permitted, but may be true of a inpcb
and tcpcb have been freed. Add a counter to test how often this race
has actually occurred, and a large comment for each instance where
we compare potentially freed memory with NULL. This will have to be
fixed in the near future, but requires is to further address how to
handle the timer shutdown shutdown issue.
- Several TCP calls no longer potentially free the passed inpcb/tcpcb,
so no longer need to return a pointer to indicate whether the argument
passed in is still valid.
- Un-macroize debugging and locking setup for various protocol switch
methods for TCP, as it lead to more obscurity, and as locking becomes
more customized to the methods, offers less benefit.
- Assert copyright on tcp_usrreq.c due to significant modifications that
have been made as part of this work.
These changes significantly modify the memory management and connection
logic of our TCP implementation, and are (as such) High Risk Changes,
and likely to contain serious bugs. Please report problems to the
current@ mailing list ASAP, ideally with simple test cases, and
optionally, packet traces.
MFC after: 3 months
2006-04-01 16:36:36 +00:00
|
|
|
static int tcp_timer_race;
|
|
|
|
SYSCTL_INT(_net_inet_tcp, OID_AUTO, timer_race, CTLFLAG_RD, &tcp_timer_race,
|
|
|
|
0, "Count of t_inpcb races on tcp_discardcb");
|
|
|
|
|
2007-09-07 09:19:22 +00:00
|
|
|
/*
|
|
|
|
* TCP timer processing.
|
|
|
|
*/
|
2007-04-11 09:45:16 +00:00
|
|
|
|
2007-09-07 09:19:22 +00:00
|
|
|
void
|
|
|
|
tcp_timer_delack(void *xtp)
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
2007-09-07 09:19:22 +00:00
|
|
|
struct tcpcb *tp = xtp;
|
|
|
|
struct inpcb *inp;
|
Step 1.5 of importing the network stack virtualization infrastructure
from the vimage project, as per plan established at devsummit 08/08:
http://wiki.freebsd.org/Image/Notes200808DevSummit
Introduce INIT_VNET_*() initializer macros, VNET_FOREACH() iterator
macros, and CURVNET_SET() context setting macros, all currently
resolving to NOPs.
Prepare for virtualization of selected SYSCTL objects by introducing a
family of SYSCTL_V_*() macros, currently resolving to their global
counterparts, i.e. SYSCTL_V_INT() == SYSCTL_INT().
Move selected #defines from sys/sys/vimage.h to newly introduced header
files specific to virtualized subsystems (sys/net/vnet.h,
sys/netinet/vinet.h etc.).
All the changes are verified to have zero functional impact at this
point in time by doing MD5 comparision between pre- and post-change
object files(*).
(*) netipsec/keysock.c did not validate depending on compile time options.
Implemented by: julian, bz, brooks, zec
Reviewed by: julian, bz, brooks, kris, rwatson, ...
Approved by: julian (mentor)
Obtained from: //depot/projects/vimage-commit2/...
X-MFC after: never
Sponsored by: NLnet Foundation, The FreeBSD Foundation
2008-10-02 15:37:58 +00:00
|
|
|
CURVNET_SET(tp->t_vnet);
|
1999-08-30 21:17:07 +00:00
|
|
|
|
2007-09-07 09:19:22 +00:00
|
|
|
inp = tp->t_inpcb;
|
Update TCP for infrastructural changes to the socket/pcb refcount model,
pru_abort(), pru_detach(), and in_pcbdetach():
- Universally support and enforce the invariant that so_pcb is
never NULL, converting dozens of unnecessary NULL checks into
assertions, and eliminating dozens of unnecessary error handling
cases in protocol code.
- In some cases, eliminate unnecessary pcbinfo locking, as it is no
longer required to ensure so_pcb != NULL. For example, the receive
code no longer requires the pcbinfo lock, and the send code only
requires it if building a new connection on an otherwise unconnected
socket triggered via sendto() with an address. This should
significnatly reduce tcbinfo lock contention in the receive and send
cases.
- In order to support the invariant that so_pcb != NULL, it is now
necessary for the TCP code to not discard the tcpcb any time a
connection is dropped, but instead leave the tcpcb until the socket
is shutdown. This case is handled by setting INP_DROPPED, to
substitute for using a NULL so_pcb to indicate that the connection
has been dropped. This requires the inpcb lock, but not the pcbinfo
lock.
- Unlike all other protocols in the tree, TCP may need to retain access
to the socket after the file descriptor has been closed. Set
SS_PROTOREF in tcp_detach() in order to prevent the socket from being
freed, and add a flag, INP_SOCKREF, so that the TCP code knows whether
or not it needs to free the socket when the connection finally does
close. The typical case where this occurs is if close() is called on
a TCP socket before all sent data in the send socket buffer has been
transmitted or acknowledged. If INP_SOCKREF is found when the
connection is dropped, we release the inpcb, tcpcb, and socket instead
of flagging INP_DROPPED.
- Abort and detach protocol switch methods no longer return failures,
nor attempt to free sockets, as the socket layer does this.
- Annotate the existence of a long-standing race in the TCP timer code,
in which timers are stopped but not drained when the socket is freed,
as waiting for drain may lead to deadlocks, or have to occur in a
context where waiting is not permitted. This race has been handled
by testing to see if the tcpcb pointer in the inpcb is NULL (and vice
versa), which is not normally permitted, but may be true of a inpcb
and tcpcb have been freed. Add a counter to test how often this race
has actually occurred, and a large comment for each instance where
we compare potentially freed memory with NULL. This will have to be
fixed in the near future, but requires is to further address how to
handle the timer shutdown shutdown issue.
- Several TCP calls no longer potentially free the passed inpcb/tcpcb,
so no longer need to return a pointer to indicate whether the argument
passed in is still valid.
- Un-macroize debugging and locking setup for various protocol switch
methods for TCP, as it lead to more obscurity, and as locking becomes
more customized to the methods, offers less benefit.
- Assert copyright on tcp_usrreq.c due to significant modifications that
have been made as part of this work.
These changes significantly modify the memory management and connection
logic of our TCP implementation, and are (as such) High Risk Changes,
and likely to contain serious bugs. Please report problems to the
current@ mailing list ASAP, ideally with simple test cases, and
optionally, packet traces.
MFC after: 3 months
2006-04-01 16:36:36 +00:00
|
|
|
/*
|
2007-09-07 09:19:22 +00:00
|
|
|
* XXXRW: While this assert is in fact correct, bugs in the tcpcb
|
|
|
|
* tear-down mean we need it as a work-around for races between
|
|
|
|
* timers and tcp_discardcb().
|
|
|
|
*
|
|
|
|
* KASSERT(inp != NULL, ("tcp_timer_delack: inp == NULL"));
|
2007-04-11 09:45:16 +00:00
|
|
|
*/
|
2007-09-07 09:19:22 +00:00
|
|
|
if (inp == NULL) {
|
|
|
|
tcp_timer_race++;
|
Step 1.5 of importing the network stack virtualization infrastructure
from the vimage project, as per plan established at devsummit 08/08:
http://wiki.freebsd.org/Image/Notes200808DevSummit
Introduce INIT_VNET_*() initializer macros, VNET_FOREACH() iterator
macros, and CURVNET_SET() context setting macros, all currently
resolving to NOPs.
Prepare for virtualization of selected SYSCTL objects by introducing a
family of SYSCTL_V_*() macros, currently resolving to their global
counterparts, i.e. SYSCTL_V_INT() == SYSCTL_INT().
Move selected #defines from sys/sys/vimage.h to newly introduced header
files specific to virtualized subsystems (sys/net/vnet.h,
sys/netinet/vinet.h etc.).
All the changes are verified to have zero functional impact at this
point in time by doing MD5 comparision between pre- and post-change
object files(*).
(*) netipsec/keysock.c did not validate depending on compile time options.
Implemented by: julian, bz, brooks, zec
Reviewed by: julian, bz, brooks, kris, rwatson, ...
Approved by: julian (mentor)
Obtained from: //depot/projects/vimage-commit2/...
X-MFC after: never
Sponsored by: NLnet Foundation, The FreeBSD Foundation
2008-10-02 15:37:58 +00:00
|
|
|
CURVNET_RESTORE();
|
2002-12-24 21:00:31 +00:00
|
|
|
return;
|
|
|
|
}
|
2008-04-17 21:38:18 +00:00
|
|
|
INP_WLOCK(inp);
|
2012-08-05 17:30:17 +00:00
|
|
|
if (callout_pending(&tp->t_timers->tt_delack) ||
|
|
|
|
!callout_active(&tp->t_timers->tt_delack)) {
|
2008-04-17 21:38:18 +00:00
|
|
|
INP_WUNLOCK(inp);
|
Step 1.5 of importing the network stack virtualization infrastructure
from the vimage project, as per plan established at devsummit 08/08:
http://wiki.freebsd.org/Image/Notes200808DevSummit
Introduce INIT_VNET_*() initializer macros, VNET_FOREACH() iterator
macros, and CURVNET_SET() context setting macros, all currently
resolving to NOPs.
Prepare for virtualization of selected SYSCTL objects by introducing a
family of SYSCTL_V_*() macros, currently resolving to their global
counterparts, i.e. SYSCTL_V_INT() == SYSCTL_INT().
Move selected #defines from sys/sys/vimage.h to newly introduced header
files specific to virtualized subsystems (sys/net/vnet.h,
sys/netinet/vinet.h etc.).
All the changes are verified to have zero functional impact at this
point in time by doing MD5 comparision between pre- and post-change
object files(*).
(*) netipsec/keysock.c did not validate depending on compile time options.
Implemented by: julian, bz, brooks, zec
Reviewed by: julian, bz, brooks, kris, rwatson, ...
Approved by: julian (mentor)
Obtained from: //depot/projects/vimage-commit2/...
X-MFC after: never
Sponsored by: NLnet Foundation, The FreeBSD Foundation
2008-10-02 15:37:58 +00:00
|
|
|
CURVNET_RESTORE();
|
1999-08-30 21:17:07 +00:00
|
|
|
return;
|
|
|
|
}
|
2007-09-24 05:26:24 +00:00
|
|
|
callout_deactivate(&tp->t_timers->tt_delack);
|
2012-08-05 17:30:17 +00:00
|
|
|
if ((inp->inp_flags & INP_DROPPED) != 0) {
|
|
|
|
INP_WUNLOCK(inp);
|
|
|
|
CURVNET_RESTORE();
|
|
|
|
return;
|
|
|
|
}
|
1999-08-30 21:17:07 +00:00
|
|
|
|
|
|
|
tp->t_flags |= TF_ACKNOW;
|
2009-04-11 22:07:19 +00:00
|
|
|
TCPSTAT_INC(tcps_delack);
|
1999-08-30 21:17:07 +00:00
|
|
|
(void) tcp_output(tp);
|
2008-04-17 21:38:18 +00:00
|
|
|
INP_WUNLOCK(inp);
|
Step 1.5 of importing the network stack virtualization infrastructure
from the vimage project, as per plan established at devsummit 08/08:
http://wiki.freebsd.org/Image/Notes200808DevSummit
Introduce INIT_VNET_*() initializer macros, VNET_FOREACH() iterator
macros, and CURVNET_SET() context setting macros, all currently
resolving to NOPs.
Prepare for virtualization of selected SYSCTL objects by introducing a
family of SYSCTL_V_*() macros, currently resolving to their global
counterparts, i.e. SYSCTL_V_INT() == SYSCTL_INT().
Move selected #defines from sys/sys/vimage.h to newly introduced header
files specific to virtualized subsystems (sys/net/vnet.h,
sys/netinet/vinet.h etc.).
All the changes are verified to have zero functional impact at this
point in time by doing MD5 comparision between pre- and post-change
object files(*).
(*) netipsec/keysock.c did not validate depending on compile time options.
Implemented by: julian, bz, brooks, zec
Reviewed by: julian, bz, brooks, kris, rwatson, ...
Approved by: julian (mentor)
Obtained from: //depot/projects/vimage-commit2/...
X-MFC after: never
Sponsored by: NLnet Foundation, The FreeBSD Foundation
2008-10-02 15:37:58 +00:00
|
|
|
CURVNET_RESTORE();
|
1999-08-30 21:17:07 +00:00
|
|
|
}
|
1994-05-24 10:09:53 +00:00
|
|
|
|
2007-09-07 09:19:22 +00:00
|
|
|
void
|
|
|
|
tcp_timer_2msl(void *xtp)
|
1999-08-30 21:17:07 +00:00
|
|
|
{
|
2007-09-07 09:19:22 +00:00
|
|
|
struct tcpcb *tp = xtp;
|
|
|
|
struct inpcb *inp;
|
Step 1.5 of importing the network stack virtualization infrastructure
from the vimage project, as per plan established at devsummit 08/08:
http://wiki.freebsd.org/Image/Notes200808DevSummit
Introduce INIT_VNET_*() initializer macros, VNET_FOREACH() iterator
macros, and CURVNET_SET() context setting macros, all currently
resolving to NOPs.
Prepare for virtualization of selected SYSCTL objects by introducing a
family of SYSCTL_V_*() macros, currently resolving to their global
counterparts, i.e. SYSCTL_V_INT() == SYSCTL_INT().
Move selected #defines from sys/sys/vimage.h to newly introduced header
files specific to virtualized subsystems (sys/net/vnet.h,
sys/netinet/vinet.h etc.).
All the changes are verified to have zero functional impact at this
point in time by doing MD5 comparision between pre- and post-change
object files(*).
(*) netipsec/keysock.c did not validate depending on compile time options.
Implemented by: julian, bz, brooks, zec
Reviewed by: julian, bz, brooks, kris, rwatson, ...
Approved by: julian (mentor)
Obtained from: //depot/projects/vimage-commit2/...
X-MFC after: never
Sponsored by: NLnet Foundation, The FreeBSD Foundation
2008-10-02 15:37:58 +00:00
|
|
|
CURVNET_SET(tp->t_vnet);
|
1999-08-30 21:17:07 +00:00
|
|
|
#ifdef TCPDEBUG
|
|
|
|
int ostate;
|
1994-05-24 10:09:53 +00:00
|
|
|
|
1999-08-30 21:17:07 +00:00
|
|
|
ostate = tp->t_state;
|
|
|
|
#endif
|
2007-09-07 09:19:22 +00:00
|
|
|
/*
|
|
|
|
* XXXRW: Does this actually happen?
|
|
|
|
*/
|
Commit step 1 of the vimage project, (network stack)
virtualization work done by Marko Zec (zec@).
This is the first in a series of commits over the course
of the next few weeks.
Mark all uses of global variables to be virtualized
with a V_ prefix.
Use macros to map them back to their global names for
now, so this is a NOP change only.
We hope to have caught at least 85-90% of what is needed
so we do not invalidate a lot of outstanding patches again.
Obtained from: //depot/projects/vimage-commit2/...
Reviewed by: brooks, des, ed, mav, julian,
jamie, kris, rwatson, zec, ...
(various people I forgot, different versions)
md5 (with a bit of help)
Sponsored by: NLnet Foundation, The FreeBSD Foundation
X-MFC after: never
V_Commit_Message_Reviewed_By: more people than the patch
2008-08-17 23:27:27 +00:00
|
|
|
INP_INFO_WLOCK(&V_tcbinfo);
|
2007-09-07 09:19:22 +00:00
|
|
|
inp = tp->t_inpcb;
|
|
|
|
/*
|
|
|
|
* XXXRW: While this assert is in fact correct, bugs in the tcpcb
|
|
|
|
* tear-down mean we need it as a work-around for races between
|
|
|
|
* timers and tcp_discardcb().
|
|
|
|
*
|
|
|
|
* KASSERT(inp != NULL, ("tcp_timer_2msl: inp == NULL"));
|
|
|
|
*/
|
|
|
|
if (inp == NULL) {
|
|
|
|
tcp_timer_race++;
|
Commit step 1 of the vimage project, (network stack)
virtualization work done by Marko Zec (zec@).
This is the first in a series of commits over the course
of the next few weeks.
Mark all uses of global variables to be virtualized
with a V_ prefix.
Use macros to map them back to their global names for
now, so this is a NOP change only.
We hope to have caught at least 85-90% of what is needed
so we do not invalidate a lot of outstanding patches again.
Obtained from: //depot/projects/vimage-commit2/...
Reviewed by: brooks, des, ed, mav, julian,
jamie, kris, rwatson, zec, ...
(various people I forgot, different versions)
md5 (with a bit of help)
Sponsored by: NLnet Foundation, The FreeBSD Foundation
X-MFC after: never
V_Commit_Message_Reviewed_By: more people than the patch
2008-08-17 23:27:27 +00:00
|
|
|
INP_INFO_WUNLOCK(&V_tcbinfo);
|
Step 1.5 of importing the network stack virtualization infrastructure
from the vimage project, as per plan established at devsummit 08/08:
http://wiki.freebsd.org/Image/Notes200808DevSummit
Introduce INIT_VNET_*() initializer macros, VNET_FOREACH() iterator
macros, and CURVNET_SET() context setting macros, all currently
resolving to NOPs.
Prepare for virtualization of selected SYSCTL objects by introducing a
family of SYSCTL_V_*() macros, currently resolving to their global
counterparts, i.e. SYSCTL_V_INT() == SYSCTL_INT().
Move selected #defines from sys/sys/vimage.h to newly introduced header
files specific to virtualized subsystems (sys/net/vnet.h,
sys/netinet/vinet.h etc.).
All the changes are verified to have zero functional impact at this
point in time by doing MD5 comparision between pre- and post-change
object files(*).
(*) netipsec/keysock.c did not validate depending on compile time options.
Implemented by: julian, bz, brooks, zec
Reviewed by: julian, bz, brooks, kris, rwatson, ...
Approved by: julian (mentor)
Obtained from: //depot/projects/vimage-commit2/...
X-MFC after: never
Sponsored by: NLnet Foundation, The FreeBSD Foundation
2008-10-02 15:37:58 +00:00
|
|
|
CURVNET_RESTORE();
|
2007-09-07 09:19:22 +00:00
|
|
|
return;
|
|
|
|
}
|
2008-04-17 21:38:18 +00:00
|
|
|
INP_WLOCK(inp);
|
2007-09-07 09:19:22 +00:00
|
|
|
tcp_free_sackholes(tp);
|
2012-08-05 17:30:17 +00:00
|
|
|
if (callout_pending(&tp->t_timers->tt_2msl) ||
|
2007-09-24 05:26:24 +00:00
|
|
|
!callout_active(&tp->t_timers->tt_2msl)) {
|
2008-04-17 21:38:18 +00:00
|
|
|
INP_WUNLOCK(tp->t_inpcb);
|
Commit step 1 of the vimage project, (network stack)
virtualization work done by Marko Zec (zec@).
This is the first in a series of commits over the course
of the next few weeks.
Mark all uses of global variables to be virtualized
with a V_ prefix.
Use macros to map them back to their global names for
now, so this is a NOP change only.
We hope to have caught at least 85-90% of what is needed
so we do not invalidate a lot of outstanding patches again.
Obtained from: //depot/projects/vimage-commit2/...
Reviewed by: brooks, des, ed, mav, julian,
jamie, kris, rwatson, zec, ...
(various people I forgot, different versions)
md5 (with a bit of help)
Sponsored by: NLnet Foundation, The FreeBSD Foundation
X-MFC after: never
V_Commit_Message_Reviewed_By: more people than the patch
2008-08-17 23:27:27 +00:00
|
|
|
INP_INFO_WUNLOCK(&V_tcbinfo);
|
Step 1.5 of importing the network stack virtualization infrastructure
from the vimage project, as per plan established at devsummit 08/08:
http://wiki.freebsd.org/Image/Notes200808DevSummit
Introduce INIT_VNET_*() initializer macros, VNET_FOREACH() iterator
macros, and CURVNET_SET() context setting macros, all currently
resolving to NOPs.
Prepare for virtualization of selected SYSCTL objects by introducing a
family of SYSCTL_V_*() macros, currently resolving to their global
counterparts, i.e. SYSCTL_V_INT() == SYSCTL_INT().
Move selected #defines from sys/sys/vimage.h to newly introduced header
files specific to virtualized subsystems (sys/net/vnet.h,
sys/netinet/vinet.h etc.).
All the changes are verified to have zero functional impact at this
point in time by doing MD5 comparision between pre- and post-change
object files(*).
(*) netipsec/keysock.c did not validate depending on compile time options.
Implemented by: julian, bz, brooks, zec
Reviewed by: julian, bz, brooks, kris, rwatson, ...
Approved by: julian (mentor)
Obtained from: //depot/projects/vimage-commit2/...
X-MFC after: never
Sponsored by: NLnet Foundation, The FreeBSD Foundation
2008-10-02 15:37:58 +00:00
|
|
|
CURVNET_RESTORE();
|
2007-09-07 09:19:22 +00:00
|
|
|
return;
|
|
|
|
}
|
2007-09-24 05:26:24 +00:00
|
|
|
callout_deactivate(&tp->t_timers->tt_2msl);
|
2012-08-05 17:30:17 +00:00
|
|
|
if ((inp->inp_flags & INP_DROPPED) != 0) {
|
|
|
|
INP_WUNLOCK(inp);
|
|
|
|
INP_INFO_WUNLOCK(&V_tcbinfo);
|
|
|
|
CURVNET_RESTORE();
|
|
|
|
return;
|
|
|
|
}
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
|
|
|
* 2 MSL timeout in shutdown went off. If we're closed but
|
|
|
|
* still waiting for peer to close and connection has been idle
|
|
|
|
* too long, or if 2MSL time is up from TIME_WAIT, delete connection
|
|
|
|
* control block. Otherwise, check again in a bit.
|
2007-02-26 22:25:21 +00:00
|
|
|
*
|
|
|
|
* If fastrecycle of FIN_WAIT_2, in FIN_WAIT_2 and receiver has closed,
|
|
|
|
* there's no point in hanging onto FIN_WAIT_2 socket. Just close it.
|
|
|
|
* Ignore fact that there were recent incoming segments.
|
1994-05-24 10:09:53 +00:00
|
|
|
*/
|
2007-02-26 22:25:21 +00:00
|
|
|
if (tcp_fast_finwait2_recycle && tp->t_state == TCPS_FIN_WAIT_2 &&
|
2007-09-07 09:19:22 +00:00
|
|
|
tp->t_inpcb && tp->t_inpcb->inp_socket &&
|
2007-02-26 22:25:21 +00:00
|
|
|
(tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) {
|
2009-04-11 22:07:19 +00:00
|
|
|
TCPSTAT_INC(tcps_finwait2_drops);
|
2007-09-07 09:19:22 +00:00
|
|
|
tp = tcp_close(tp);
|
2007-02-26 22:25:21 +00:00
|
|
|
} else {
|
|
|
|
if (tp->t_state != TCPS_TIME_WAIT &&
|
2012-02-05 16:53:02 +00:00
|
|
|
ticks - tp->t_rcvtime <= TP_MAXIDLE(tp))
|
|
|
|
callout_reset_on(&tp->t_timers->tt_2msl,
|
2014-05-18 22:39:01 +00:00
|
|
|
TP_KEEPINTVL(tp), tcp_timer_2msl, tp,
|
|
|
|
inp_to_cpuid(inp));
|
2007-09-07 09:19:22 +00:00
|
|
|
else
|
|
|
|
tp = tcp_close(tp);
|
|
|
|
}
|
1999-08-30 21:17:07 +00:00
|
|
|
|
|
|
|
#ifdef TCPDEBUG
|
2007-09-24 14:46:27 +00:00
|
|
|
if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
|
2002-05-31 11:52:35 +00:00
|
|
|
tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
|
|
|
|
PRU_SLOWTIMO);
|
1999-08-30 21:17:07 +00:00
|
|
|
#endif
|
2007-09-07 09:19:22 +00:00
|
|
|
if (tp != NULL)
|
2008-04-17 21:38:18 +00:00
|
|
|
INP_WUNLOCK(inp);
|
Commit step 1 of the vimage project, (network stack)
virtualization work done by Marko Zec (zec@).
This is the first in a series of commits over the course
of the next few weeks.
Mark all uses of global variables to be virtualized
with a V_ prefix.
Use macros to map them back to their global names for
now, so this is a NOP change only.
We hope to have caught at least 85-90% of what is needed
so we do not invalidate a lot of outstanding patches again.
Obtained from: //depot/projects/vimage-commit2/...
Reviewed by: brooks, des, ed, mav, julian,
jamie, kris, rwatson, zec, ...
(various people I forgot, different versions)
md5 (with a bit of help)
Sponsored by: NLnet Foundation, The FreeBSD Foundation
X-MFC after: never
V_Commit_Message_Reviewed_By: more people than the patch
2008-08-17 23:27:27 +00:00
|
|
|
INP_INFO_WUNLOCK(&V_tcbinfo);
|
Step 1.5 of importing the network stack virtualization infrastructure
from the vimage project, as per plan established at devsummit 08/08:
http://wiki.freebsd.org/Image/Notes200808DevSummit
Introduce INIT_VNET_*() initializer macros, VNET_FOREACH() iterator
macros, and CURVNET_SET() context setting macros, all currently
resolving to NOPs.
Prepare for virtualization of selected SYSCTL objects by introducing a
family of SYSCTL_V_*() macros, currently resolving to their global
counterparts, i.e. SYSCTL_V_INT() == SYSCTL_INT().
Move selected #defines from sys/sys/vimage.h to newly introduced header
files specific to virtualized subsystems (sys/net/vnet.h,
sys/netinet/vinet.h etc.).
All the changes are verified to have zero functional impact at this
point in time by doing MD5 comparision between pre- and post-change
object files(*).
(*) netipsec/keysock.c did not validate depending on compile time options.
Implemented by: julian, bz, brooks, zec
Reviewed by: julian, bz, brooks, kris, rwatson, ...
Approved by: julian (mentor)
Obtained from: //depot/projects/vimage-commit2/...
X-MFC after: never
Sponsored by: NLnet Foundation, The FreeBSD Foundation
2008-10-02 15:37:58 +00:00
|
|
|
CURVNET_RESTORE();
|
1999-08-30 21:17:07 +00:00
|
|
|
}
|
1994-05-24 10:09:53 +00:00
|
|
|
|
2007-09-07 09:19:22 +00:00
|
|
|
void
|
|
|
|
tcp_timer_keep(void *xtp)
|
1999-08-30 21:17:07 +00:00
|
|
|
{
|
2007-09-07 09:19:22 +00:00
|
|
|
struct tcpcb *tp = xtp;
|
2001-06-23 03:21:46 +00:00
|
|
|
struct tcptemp *t_template;
|
2007-09-07 09:19:22 +00:00
|
|
|
struct inpcb *inp;
|
Step 1.5 of importing the network stack virtualization infrastructure
from the vimage project, as per plan established at devsummit 08/08:
http://wiki.freebsd.org/Image/Notes200808DevSummit
Introduce INIT_VNET_*() initializer macros, VNET_FOREACH() iterator
macros, and CURVNET_SET() context setting macros, all currently
resolving to NOPs.
Prepare for virtualization of selected SYSCTL objects by introducing a
family of SYSCTL_V_*() macros, currently resolving to their global
counterparts, i.e. SYSCTL_V_INT() == SYSCTL_INT().
Move selected #defines from sys/sys/vimage.h to newly introduced header
files specific to virtualized subsystems (sys/net/vnet.h,
sys/netinet/vinet.h etc.).
All the changes are verified to have zero functional impact at this
point in time by doing MD5 comparision between pre- and post-change
object files(*).
(*) netipsec/keysock.c did not validate depending on compile time options.
Implemented by: julian, bz, brooks, zec
Reviewed by: julian, bz, brooks, kris, rwatson, ...
Approved by: julian (mentor)
Obtained from: //depot/projects/vimage-commit2/...
X-MFC after: never
Sponsored by: NLnet Foundation, The FreeBSD Foundation
2008-10-02 15:37:58 +00:00
|
|
|
CURVNET_SET(tp->t_vnet);
|
1999-08-30 21:17:07 +00:00
|
|
|
#ifdef TCPDEBUG
|
|
|
|
int ostate;
|
|
|
|
|
|
|
|
ostate = tp->t_state;
|
|
|
|
#endif
|
Commit step 1 of the vimage project, (network stack)
virtualization work done by Marko Zec (zec@).
This is the first in a series of commits over the course
of the next few weeks.
Mark all uses of global variables to be virtualized
with a V_ prefix.
Use macros to map them back to their global names for
now, so this is a NOP change only.
We hope to have caught at least 85-90% of what is needed
so we do not invalidate a lot of outstanding patches again.
Obtained from: //depot/projects/vimage-commit2/...
Reviewed by: brooks, des, ed, mav, julian,
jamie, kris, rwatson, zec, ...
(various people I forgot, different versions)
md5 (with a bit of help)
Sponsored by: NLnet Foundation, The FreeBSD Foundation
X-MFC after: never
V_Commit_Message_Reviewed_By: more people than the patch
2008-08-17 23:27:27 +00:00
|
|
|
INP_INFO_WLOCK(&V_tcbinfo);
|
2007-09-07 09:19:22 +00:00
|
|
|
inp = tp->t_inpcb;
|
|
|
|
/*
|
|
|
|
* XXXRW: While this assert is in fact correct, bugs in the tcpcb
|
|
|
|
* tear-down mean we need it as a work-around for races between
|
|
|
|
* timers and tcp_discardcb().
|
|
|
|
*
|
|
|
|
* KASSERT(inp != NULL, ("tcp_timer_keep: inp == NULL"));
|
|
|
|
*/
|
|
|
|
if (inp == NULL) {
|
|
|
|
tcp_timer_race++;
|
Commit step 1 of the vimage project, (network stack)
virtualization work done by Marko Zec (zec@).
This is the first in a series of commits over the course
of the next few weeks.
Mark all uses of global variables to be virtualized
with a V_ prefix.
Use macros to map them back to their global names for
now, so this is a NOP change only.
We hope to have caught at least 85-90% of what is needed
so we do not invalidate a lot of outstanding patches again.
Obtained from: //depot/projects/vimage-commit2/...
Reviewed by: brooks, des, ed, mav, julian,
jamie, kris, rwatson, zec, ...
(various people I forgot, different versions)
md5 (with a bit of help)
Sponsored by: NLnet Foundation, The FreeBSD Foundation
X-MFC after: never
V_Commit_Message_Reviewed_By: more people than the patch
2008-08-17 23:27:27 +00:00
|
|
|
INP_INFO_WUNLOCK(&V_tcbinfo);
|
Step 1.5 of importing the network stack virtualization infrastructure
from the vimage project, as per plan established at devsummit 08/08:
http://wiki.freebsd.org/Image/Notes200808DevSummit
Introduce INIT_VNET_*() initializer macros, VNET_FOREACH() iterator
macros, and CURVNET_SET() context setting macros, all currently
resolving to NOPs.
Prepare for virtualization of selected SYSCTL objects by introducing a
family of SYSCTL_V_*() macros, currently resolving to their global
counterparts, i.e. SYSCTL_V_INT() == SYSCTL_INT().
Move selected #defines from sys/sys/vimage.h to newly introduced header
files specific to virtualized subsystems (sys/net/vnet.h,
sys/netinet/vinet.h etc.).
All the changes are verified to have zero functional impact at this
point in time by doing MD5 comparision between pre- and post-change
object files(*).
(*) netipsec/keysock.c did not validate depending on compile time options.
Implemented by: julian, bz, brooks, zec
Reviewed by: julian, bz, brooks, kris, rwatson, ...
Approved by: julian (mentor)
Obtained from: //depot/projects/vimage-commit2/...
X-MFC after: never
Sponsored by: NLnet Foundation, The FreeBSD Foundation
2008-10-02 15:37:58 +00:00
|
|
|
CURVNET_RESTORE();
|
2007-09-07 09:19:22 +00:00
|
|
|
return;
|
|
|
|
}
|
2008-04-17 21:38:18 +00:00
|
|
|
INP_WLOCK(inp);
|
2012-08-05 17:30:17 +00:00
|
|
|
if (callout_pending(&tp->t_timers->tt_keep) ||
|
|
|
|
!callout_active(&tp->t_timers->tt_keep)) {
|
2008-04-17 21:38:18 +00:00
|
|
|
INP_WUNLOCK(inp);
|
Commit step 1 of the vimage project, (network stack)
virtualization work done by Marko Zec (zec@).
This is the first in a series of commits over the course
of the next few weeks.
Mark all uses of global variables to be virtualized
with a V_ prefix.
Use macros to map them back to their global names for
now, so this is a NOP change only.
We hope to have caught at least 85-90% of what is needed
so we do not invalidate a lot of outstanding patches again.
Obtained from: //depot/projects/vimage-commit2/...
Reviewed by: brooks, des, ed, mav, julian,
jamie, kris, rwatson, zec, ...
(various people I forgot, different versions)
md5 (with a bit of help)
Sponsored by: NLnet Foundation, The FreeBSD Foundation
X-MFC after: never
V_Commit_Message_Reviewed_By: more people than the patch
2008-08-17 23:27:27 +00:00
|
|
|
INP_INFO_WUNLOCK(&V_tcbinfo);
|
Step 1.5 of importing the network stack virtualization infrastructure
from the vimage project, as per plan established at devsummit 08/08:
http://wiki.freebsd.org/Image/Notes200808DevSummit
Introduce INIT_VNET_*() initializer macros, VNET_FOREACH() iterator
macros, and CURVNET_SET() context setting macros, all currently
resolving to NOPs.
Prepare for virtualization of selected SYSCTL objects by introducing a
family of SYSCTL_V_*() macros, currently resolving to their global
counterparts, i.e. SYSCTL_V_INT() == SYSCTL_INT().
Move selected #defines from sys/sys/vimage.h to newly introduced header
files specific to virtualized subsystems (sys/net/vnet.h,
sys/netinet/vinet.h etc.).
All the changes are verified to have zero functional impact at this
point in time by doing MD5 comparision between pre- and post-change
object files(*).
(*) netipsec/keysock.c did not validate depending on compile time options.
Implemented by: julian, bz, brooks, zec
Reviewed by: julian, bz, brooks, kris, rwatson, ...
Approved by: julian (mentor)
Obtained from: //depot/projects/vimage-commit2/...
X-MFC after: never
Sponsored by: NLnet Foundation, The FreeBSD Foundation
2008-10-02 15:37:58 +00:00
|
|
|
CURVNET_RESTORE();
|
2007-09-07 09:19:22 +00:00
|
|
|
return;
|
|
|
|
}
|
2007-09-24 05:26:24 +00:00
|
|
|
callout_deactivate(&tp->t_timers->tt_keep);
|
2012-08-05 17:30:17 +00:00
|
|
|
if ((inp->inp_flags & INP_DROPPED) != 0) {
|
|
|
|
INP_WUNLOCK(inp);
|
|
|
|
INP_INFO_WUNLOCK(&V_tcbinfo);
|
|
|
|
CURVNET_RESTORE();
|
|
|
|
return;
|
|
|
|
}
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
1999-08-30 21:17:07 +00:00
|
|
|
* Keep-alive timer went off; send something
|
|
|
|
* or drop connection if idle for too long.
|
1994-05-24 10:09:53 +00:00
|
|
|
*/
|
2009-04-11 22:07:19 +00:00
|
|
|
TCPSTAT_INC(tcps_keeptimeo);
|
1999-08-30 21:17:07 +00:00
|
|
|
if (tp->t_state < TCPS_ESTABLISHED)
|
|
|
|
goto dropit;
|
2003-11-08 22:57:13 +00:00
|
|
|
if ((always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) &&
|
1999-08-30 21:17:07 +00:00
|
|
|
tp->t_state <= TCPS_CLOSING) {
|
2012-02-05 16:53:02 +00:00
|
|
|
if (ticks - tp->t_rcvtime >= TP_KEEPIDLE(tp) + TP_MAXIDLE(tp))
|
1999-08-30 21:17:07 +00:00
|
|
|
goto dropit;
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
1999-08-30 21:17:07 +00:00
|
|
|
* Send a packet designed to force a response
|
|
|
|
* if the peer is up and reachable:
|
|
|
|
* either an ACK if the connection is still alive,
|
|
|
|
* or an RST if the peer has closed the connection
|
|
|
|
* due to timeout or reboot.
|
|
|
|
* Using sequence number tp->snd_una-1
|
|
|
|
* causes the transmitted zero-length segment
|
|
|
|
* to lie outside the receive window;
|
|
|
|
* by the protocol spec, this requires the
|
|
|
|
* correspondent TCP to respond.
|
1994-05-24 10:09:53 +00:00
|
|
|
*/
|
2009-04-11 22:07:19 +00:00
|
|
|
TCPSTAT_INC(tcps_keepprobe);
|
2003-02-19 22:18:06 +00:00
|
|
|
t_template = tcpip_maketemplate(inp);
|
2001-06-23 03:21:46 +00:00
|
|
|
if (t_template) {
|
|
|
|
tcp_respond(tp, t_template->tt_ipgen,
|
|
|
|
&t_template->tt_t, (struct mbuf *)NULL,
|
|
|
|
tp->rcv_nxt, tp->snd_una - 1, 0);
|
2008-06-02 14:20:26 +00:00
|
|
|
free(t_template, M_TEMP);
|
2001-06-23 03:21:46 +00:00
|
|
|
}
|
2012-02-05 16:53:02 +00:00
|
|
|
callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPINTVL(tp),
|
2014-05-18 22:39:01 +00:00
|
|
|
tcp_timer_keep, tp, inp_to_cpuid(inp));
|
2002-05-31 11:52:35 +00:00
|
|
|
} else
|
2012-02-05 16:53:02 +00:00
|
|
|
callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPIDLE(tp),
|
2014-05-18 22:39:01 +00:00
|
|
|
tcp_timer_keep, tp, inp_to_cpuid(inp));
|
1999-08-30 21:17:07 +00:00
|
|
|
|
|
|
|
#ifdef TCPDEBUG
|
2003-11-08 22:57:13 +00:00
|
|
|
if (inp->inp_socket->so_options & SO_DEBUG)
|
2000-01-09 19:17:30 +00:00
|
|
|
tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
|
1999-08-30 21:17:07 +00:00
|
|
|
PRU_SLOWTIMO);
|
|
|
|
#endif
|
2008-04-17 21:38:18 +00:00
|
|
|
INP_WUNLOCK(inp);
|
Commit step 1 of the vimage project, (network stack)
virtualization work done by Marko Zec (zec@).
This is the first in a series of commits over the course
of the next few weeks.
Mark all uses of global variables to be virtualized
with a V_ prefix.
Use macros to map them back to their global names for
now, so this is a NOP change only.
We hope to have caught at least 85-90% of what is needed
so we do not invalidate a lot of outstanding patches again.
Obtained from: //depot/projects/vimage-commit2/...
Reviewed by: brooks, des, ed, mav, julian,
jamie, kris, rwatson, zec, ...
(various people I forgot, different versions)
md5 (with a bit of help)
Sponsored by: NLnet Foundation, The FreeBSD Foundation
X-MFC after: never
V_Commit_Message_Reviewed_By: more people than the patch
2008-08-17 23:27:27 +00:00
|
|
|
INP_INFO_WUNLOCK(&V_tcbinfo);
|
Step 1.5 of importing the network stack virtualization infrastructure
from the vimage project, as per plan established at devsummit 08/08:
http://wiki.freebsd.org/Image/Notes200808DevSummit
Introduce INIT_VNET_*() initializer macros, VNET_FOREACH() iterator
macros, and CURVNET_SET() context setting macros, all currently
resolving to NOPs.
Prepare for virtualization of selected SYSCTL objects by introducing a
family of SYSCTL_V_*() macros, currently resolving to their global
counterparts, i.e. SYSCTL_V_INT() == SYSCTL_INT().
Move selected #defines from sys/sys/vimage.h to newly introduced header
files specific to virtualized subsystems (sys/net/vnet.h,
sys/netinet/vinet.h etc.).
All the changes are verified to have zero functional impact at this
point in time by doing MD5 comparision between pre- and post-change
object files(*).
(*) netipsec/keysock.c did not validate depending on compile time options.
Implemented by: julian, bz, brooks, zec
Reviewed by: julian, bz, brooks, kris, rwatson, ...
Approved by: julian (mentor)
Obtained from: //depot/projects/vimage-commit2/...
X-MFC after: never
Sponsored by: NLnet Foundation, The FreeBSD Foundation
2008-10-02 15:37:58 +00:00
|
|
|
CURVNET_RESTORE();
|
2007-09-07 09:19:22 +00:00
|
|
|
return;
|
1999-08-30 21:17:07 +00:00
|
|
|
|
|
|
|
dropit:
|
2009-04-11 22:07:19 +00:00
|
|
|
TCPSTAT_INC(tcps_keepdrops);
|
2007-09-07 09:19:22 +00:00
|
|
|
tp = tcp_drop(tp, ETIMEDOUT);
|
|
|
|
|
|
|
|
#ifdef TCPDEBUG
|
|
|
|
if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
|
|
|
|
tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
|
|
|
|
PRU_SLOWTIMO);
|
|
|
|
#endif
|
|
|
|
if (tp != NULL)
|
2008-04-17 21:38:18 +00:00
|
|
|
INP_WUNLOCK(tp->t_inpcb);
|
Commit step 1 of the vimage project, (network stack)
virtualization work done by Marko Zec (zec@).
This is the first in a series of commits over the course
of the next few weeks.
Mark all uses of global variables to be virtualized
with a V_ prefix.
Use macros to map them back to their global names for
now, so this is a NOP change only.
We hope to have caught at least 85-90% of what is needed
so we do not invalidate a lot of outstanding patches again.
Obtained from: //depot/projects/vimage-commit2/...
Reviewed by: brooks, des, ed, mav, julian,
jamie, kris, rwatson, zec, ...
(various people I forgot, different versions)
md5 (with a bit of help)
Sponsored by: NLnet Foundation, The FreeBSD Foundation
X-MFC after: never
V_Commit_Message_Reviewed_By: more people than the patch
2008-08-17 23:27:27 +00:00
|
|
|
INP_INFO_WUNLOCK(&V_tcbinfo);
|
Step 1.5 of importing the network stack virtualization infrastructure
from the vimage project, as per plan established at devsummit 08/08:
http://wiki.freebsd.org/Image/Notes200808DevSummit
Introduce INIT_VNET_*() initializer macros, VNET_FOREACH() iterator
macros, and CURVNET_SET() context setting macros, all currently
resolving to NOPs.
Prepare for virtualization of selected SYSCTL objects by introducing a
family of SYSCTL_V_*() macros, currently resolving to their global
counterparts, i.e. SYSCTL_V_INT() == SYSCTL_INT().
Move selected #defines from sys/sys/vimage.h to newly introduced header
files specific to virtualized subsystems (sys/net/vnet.h,
sys/netinet/vinet.h etc.).
All the changes are verified to have zero functional impact at this
point in time by doing MD5 comparision between pre- and post-change
object files(*).
(*) netipsec/keysock.c did not validate depending on compile time options.
Implemented by: julian, bz, brooks, zec
Reviewed by: julian, bz, brooks, kris, rwatson, ...
Approved by: julian (mentor)
Obtained from: //depot/projects/vimage-commit2/...
X-MFC after: never
Sponsored by: NLnet Foundation, The FreeBSD Foundation
2008-10-02 15:37:58 +00:00
|
|
|
CURVNET_RESTORE();
|
1999-08-30 21:17:07 +00:00
|
|
|
}
|
|
|
|
|
2007-09-07 09:19:22 +00:00
|
|
|
void
|
|
|
|
tcp_timer_persist(void *xtp)
|
1999-08-30 21:17:07 +00:00
|
|
|
{
|
2007-09-07 09:19:22 +00:00
|
|
|
struct tcpcb *tp = xtp;
|
|
|
|
struct inpcb *inp;
|
Step 1.5 of importing the network stack virtualization infrastructure
from the vimage project, as per plan established at devsummit 08/08:
http://wiki.freebsd.org/Image/Notes200808DevSummit
Introduce INIT_VNET_*() initializer macros, VNET_FOREACH() iterator
macros, and CURVNET_SET() context setting macros, all currently
resolving to NOPs.
Prepare for virtualization of selected SYSCTL objects by introducing a
family of SYSCTL_V_*() macros, currently resolving to their global
counterparts, i.e. SYSCTL_V_INT() == SYSCTL_INT().
Move selected #defines from sys/sys/vimage.h to newly introduced header
files specific to virtualized subsystems (sys/net/vnet.h,
sys/netinet/vinet.h etc.).
All the changes are verified to have zero functional impact at this
point in time by doing MD5 comparision between pre- and post-change
object files(*).
(*) netipsec/keysock.c did not validate depending on compile time options.
Implemented by: julian, bz, brooks, zec
Reviewed by: julian, bz, brooks, kris, rwatson, ...
Approved by: julian (mentor)
Obtained from: //depot/projects/vimage-commit2/...
X-MFC after: never
Sponsored by: NLnet Foundation, The FreeBSD Foundation
2008-10-02 15:37:58 +00:00
|
|
|
CURVNET_SET(tp->t_vnet);
|
1999-08-30 21:17:07 +00:00
|
|
|
#ifdef TCPDEBUG
|
|
|
|
int ostate;
|
1994-05-24 10:09:53 +00:00
|
|
|
|
1999-08-30 21:17:07 +00:00
|
|
|
ostate = tp->t_state;
|
|
|
|
#endif
|
Commit step 1 of the vimage project, (network stack)
virtualization work done by Marko Zec (zec@).
This is the first in a series of commits over the course
of the next few weeks.
Mark all uses of global variables to be virtualized
with a V_ prefix.
Use macros to map them back to their global names for
now, so this is a NOP change only.
We hope to have caught at least 85-90% of what is needed
so we do not invalidate a lot of outstanding patches again.
Obtained from: //depot/projects/vimage-commit2/...
Reviewed by: brooks, des, ed, mav, julian,
jamie, kris, rwatson, zec, ...
(various people I forgot, different versions)
md5 (with a bit of help)
Sponsored by: NLnet Foundation, The FreeBSD Foundation
X-MFC after: never
V_Commit_Message_Reviewed_By: more people than the patch
2008-08-17 23:27:27 +00:00
|
|
|
INP_INFO_WLOCK(&V_tcbinfo);
|
2007-09-07 09:19:22 +00:00
|
|
|
inp = tp->t_inpcb;
|
|
|
|
/*
|
|
|
|
* XXXRW: While this assert is in fact correct, bugs in the tcpcb
|
|
|
|
* tear-down mean we need it as a work-around for races between
|
|
|
|
* timers and tcp_discardcb().
|
|
|
|
*
|
|
|
|
* KASSERT(inp != NULL, ("tcp_timer_persist: inp == NULL"));
|
|
|
|
*/
|
|
|
|
if (inp == NULL) {
|
|
|
|
tcp_timer_race++;
|
Commit step 1 of the vimage project, (network stack)
virtualization work done by Marko Zec (zec@).
This is the first in a series of commits over the course
of the next few weeks.
Mark all uses of global variables to be virtualized
with a V_ prefix.
Use macros to map them back to their global names for
now, so this is a NOP change only.
We hope to have caught at least 85-90% of what is needed
so we do not invalidate a lot of outstanding patches again.
Obtained from: //depot/projects/vimage-commit2/...
Reviewed by: brooks, des, ed, mav, julian,
jamie, kris, rwatson, zec, ...
(various people I forgot, different versions)
md5 (with a bit of help)
Sponsored by: NLnet Foundation, The FreeBSD Foundation
X-MFC after: never
V_Commit_Message_Reviewed_By: more people than the patch
2008-08-17 23:27:27 +00:00
|
|
|
INP_INFO_WUNLOCK(&V_tcbinfo);
|
Step 1.5 of importing the network stack virtualization infrastructure
from the vimage project, as per plan established at devsummit 08/08:
http://wiki.freebsd.org/Image/Notes200808DevSummit
Introduce INIT_VNET_*() initializer macros, VNET_FOREACH() iterator
macros, and CURVNET_SET() context setting macros, all currently
resolving to NOPs.
Prepare for virtualization of selected SYSCTL objects by introducing a
family of SYSCTL_V_*() macros, currently resolving to their global
counterparts, i.e. SYSCTL_V_INT() == SYSCTL_INT().
Move selected #defines from sys/sys/vimage.h to newly introduced header
files specific to virtualized subsystems (sys/net/vnet.h,
sys/netinet/vinet.h etc.).
All the changes are verified to have zero functional impact at this
point in time by doing MD5 comparision between pre- and post-change
object files(*).
(*) netipsec/keysock.c did not validate depending on compile time options.
Implemented by: julian, bz, brooks, zec
Reviewed by: julian, bz, brooks, kris, rwatson, ...
Approved by: julian (mentor)
Obtained from: //depot/projects/vimage-commit2/...
X-MFC after: never
Sponsored by: NLnet Foundation, The FreeBSD Foundation
2008-10-02 15:37:58 +00:00
|
|
|
CURVNET_RESTORE();
|
2007-09-07 09:19:22 +00:00
|
|
|
return;
|
|
|
|
}
|
2008-04-17 21:38:18 +00:00
|
|
|
INP_WLOCK(inp);
|
2012-08-05 17:30:17 +00:00
|
|
|
if (callout_pending(&tp->t_timers->tt_persist) ||
|
|
|
|
!callout_active(&tp->t_timers->tt_persist)) {
|
2008-04-17 21:38:18 +00:00
|
|
|
INP_WUNLOCK(inp);
|
Commit step 1 of the vimage project, (network stack)
virtualization work done by Marko Zec (zec@).
This is the first in a series of commits over the course
of the next few weeks.
Mark all uses of global variables to be virtualized
with a V_ prefix.
Use macros to map them back to their global names for
now, so this is a NOP change only.
We hope to have caught at least 85-90% of what is needed
so we do not invalidate a lot of outstanding patches again.
Obtained from: //depot/projects/vimage-commit2/...
Reviewed by: brooks, des, ed, mav, julian,
jamie, kris, rwatson, zec, ...
(various people I forgot, different versions)
md5 (with a bit of help)
Sponsored by: NLnet Foundation, The FreeBSD Foundation
X-MFC after: never
V_Commit_Message_Reviewed_By: more people than the patch
2008-08-17 23:27:27 +00:00
|
|
|
INP_INFO_WUNLOCK(&V_tcbinfo);
|
Step 1.5 of importing the network stack virtualization infrastructure
from the vimage project, as per plan established at devsummit 08/08:
http://wiki.freebsd.org/Image/Notes200808DevSummit
Introduce INIT_VNET_*() initializer macros, VNET_FOREACH() iterator
macros, and CURVNET_SET() context setting macros, all currently
resolving to NOPs.
Prepare for virtualization of selected SYSCTL objects by introducing a
family of SYSCTL_V_*() macros, currently resolving to their global
counterparts, i.e. SYSCTL_V_INT() == SYSCTL_INT().
Move selected #defines from sys/sys/vimage.h to newly introduced header
files specific to virtualized subsystems (sys/net/vnet.h,
sys/netinet/vinet.h etc.).
All the changes are verified to have zero functional impact at this
point in time by doing MD5 comparision between pre- and post-change
object files(*).
(*) netipsec/keysock.c did not validate depending on compile time options.
Implemented by: julian, bz, brooks, zec
Reviewed by: julian, bz, brooks, kris, rwatson, ...
Approved by: julian (mentor)
Obtained from: //depot/projects/vimage-commit2/...
X-MFC after: never
Sponsored by: NLnet Foundation, The FreeBSD Foundation
2008-10-02 15:37:58 +00:00
|
|
|
CURVNET_RESTORE();
|
2007-09-07 09:19:22 +00:00
|
|
|
return;
|
|
|
|
}
|
2007-09-24 05:26:24 +00:00
|
|
|
callout_deactivate(&tp->t_timers->tt_persist);
|
2012-08-05 17:30:17 +00:00
|
|
|
if ((inp->inp_flags & INP_DROPPED) != 0) {
|
|
|
|
INP_WUNLOCK(inp);
|
|
|
|
INP_INFO_WUNLOCK(&V_tcbinfo);
|
|
|
|
CURVNET_RESTORE();
|
|
|
|
return;
|
|
|
|
}
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
|
|
|
* Persistance timer into zero window.
|
|
|
|
* Force a byte to be output, if possible.
|
|
|
|
*/
|
2009-04-11 22:07:19 +00:00
|
|
|
TCPSTAT_INC(tcps_persisttimeo);
|
1999-08-30 21:17:07 +00:00
|
|
|
/*
|
|
|
|
* Hack: if the peer is dead/unreachable, we do not
|
|
|
|
* time out if the window is closed. After a full
|
|
|
|
* backoff, drop the connection if the idle time
|
|
|
|
* (no responses to probes) reaches the maximum
|
|
|
|
* backoff that we would use if retransmitting.
|
|
|
|
*/
|
|
|
|
if (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
|
2009-06-16 19:00:48 +00:00
|
|
|
(ticks - tp->t_rcvtime >= tcp_maxpersistidle ||
|
|
|
|
ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) {
|
2009-04-11 22:07:19 +00:00
|
|
|
TCPSTAT_INC(tcps_persistdrop);
|
2007-09-07 09:19:22 +00:00
|
|
|
tp = tcp_drop(tp, ETIMEDOUT);
|
|
|
|
goto out;
|
1999-08-30 21:17:07 +00:00
|
|
|
}
|
2012-10-28 19:58:20 +00:00
|
|
|
/*
|
|
|
|
* If the user has closed the socket then drop a persisting
|
|
|
|
* connection after a much reduced timeout.
|
|
|
|
*/
|
|
|
|
if (tp->t_state > TCPS_CLOSE_WAIT &&
|
|
|
|
(ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) {
|
|
|
|
TCPSTAT_INC(tcps_persistdrop);
|
|
|
|
tp = tcp_drop(tp, ETIMEDOUT);
|
|
|
|
goto out;
|
|
|
|
}
|
1999-08-30 21:17:07 +00:00
|
|
|
tcp_setpersist(tp);
|
2005-05-21 00:38:29 +00:00
|
|
|
tp->t_flags |= TF_FORCEDATA;
|
1999-08-30 21:17:07 +00:00
|
|
|
(void) tcp_output(tp);
|
2005-05-21 00:38:29 +00:00
|
|
|
tp->t_flags &= ~TF_FORCEDATA;
|
1999-08-30 21:17:07 +00:00
|
|
|
|
2007-09-07 09:19:22 +00:00
|
|
|
out:
|
1999-08-30 21:17:07 +00:00
|
|
|
#ifdef TCPDEBUG
|
2006-05-16 10:51:26 +00:00
|
|
|
if (tp != NULL && tp->t_inpcb->inp_socket->so_options & SO_DEBUG)
|
|
|
|
tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO);
|
1999-08-30 21:17:07 +00:00
|
|
|
#endif
|
2007-09-07 09:19:22 +00:00
|
|
|
if (tp != NULL)
|
2008-04-17 21:38:18 +00:00
|
|
|
INP_WUNLOCK(inp);
|
Commit step 1 of the vimage project, (network stack)
virtualization work done by Marko Zec (zec@).
This is the first in a series of commits over the course
of the next few weeks.
Mark all uses of global variables to be virtualized
with a V_ prefix.
Use macros to map them back to their global names for
now, so this is a NOP change only.
We hope to have caught at least 85-90% of what is needed
so we do not invalidate a lot of outstanding patches again.
Obtained from: //depot/projects/vimage-commit2/...
Reviewed by: brooks, des, ed, mav, julian,
jamie, kris, rwatson, zec, ...
(various people I forgot, different versions)
md5 (with a bit of help)
Sponsored by: NLnet Foundation, The FreeBSD Foundation
X-MFC after: never
V_Commit_Message_Reviewed_By: more people than the patch
2008-08-17 23:27:27 +00:00
|
|
|
INP_INFO_WUNLOCK(&V_tcbinfo);
|
Step 1.5 of importing the network stack virtualization infrastructure
from the vimage project, as per plan established at devsummit 08/08:
http://wiki.freebsd.org/Image/Notes200808DevSummit
Introduce INIT_VNET_*() initializer macros, VNET_FOREACH() iterator
macros, and CURVNET_SET() context setting macros, all currently
resolving to NOPs.
Prepare for virtualization of selected SYSCTL objects by introducing a
family of SYSCTL_V_*() macros, currently resolving to their global
counterparts, i.e. SYSCTL_V_INT() == SYSCTL_INT().
Move selected #defines from sys/sys/vimage.h to newly introduced header
files specific to virtualized subsystems (sys/net/vnet.h,
sys/netinet/vinet.h etc.).
All the changes are verified to have zero functional impact at this
point in time by doing MD5 comparision between pre- and post-change
object files(*).
(*) netipsec/keysock.c did not validate depending on compile time options.
Implemented by: julian, bz, brooks, zec
Reviewed by: julian, bz, brooks, kris, rwatson, ...
Approved by: julian (mentor)
Obtained from: //depot/projects/vimage-commit2/...
X-MFC after: never
Sponsored by: NLnet Foundation, The FreeBSD Foundation
2008-10-02 15:37:58 +00:00
|
|
|
CURVNET_RESTORE();
|
1999-08-30 21:17:07 +00:00
|
|
|
}
|
|
|
|
|
2007-09-07 09:19:22 +00:00
|
|
|
void
|
|
|
|
tcp_timer_rexmt(void * xtp)
|
1999-08-30 21:17:07 +00:00
|
|
|
{
|
2007-09-07 09:19:22 +00:00
|
|
|
struct tcpcb *tp = xtp;
|
Step 1.5 of importing the network stack virtualization infrastructure
from the vimage project, as per plan established at devsummit 08/08:
http://wiki.freebsd.org/Image/Notes200808DevSummit
Introduce INIT_VNET_*() initializer macros, VNET_FOREACH() iterator
macros, and CURVNET_SET() context setting macros, all currently
resolving to NOPs.
Prepare for virtualization of selected SYSCTL objects by introducing a
family of SYSCTL_V_*() macros, currently resolving to their global
counterparts, i.e. SYSCTL_V_INT() == SYSCTL_INT().
Move selected #defines from sys/sys/vimage.h to newly introduced header
files specific to virtualized subsystems (sys/net/vnet.h,
sys/netinet/vinet.h etc.).
All the changes are verified to have zero functional impact at this
point in time by doing MD5 comparision between pre- and post-change
object files(*).
(*) netipsec/keysock.c did not validate depending on compile time options.
Implemented by: julian, bz, brooks, zec
Reviewed by: julian, bz, brooks, kris, rwatson, ...
Approved by: julian (mentor)
Obtained from: //depot/projects/vimage-commit2/...
X-MFC after: never
Sponsored by: NLnet Foundation, The FreeBSD Foundation
2008-10-02 15:37:58 +00:00
|
|
|
CURVNET_SET(tp->t_vnet);
|
1999-08-30 21:17:07 +00:00
|
|
|
int rexmt;
|
2007-09-07 09:19:22 +00:00
|
|
|
int headlocked;
|
|
|
|
struct inpcb *inp;
|
1999-08-30 21:17:07 +00:00
|
|
|
#ifdef TCPDEBUG
|
|
|
|
int ostate;
|
|
|
|
|
|
|
|
ostate = tp->t_state;
|
|
|
|
#endif
|
2010-03-20 19:47:30 +00:00
|
|
|
INP_INFO_RLOCK(&V_tcbinfo);
|
2007-09-07 09:19:22 +00:00
|
|
|
inp = tp->t_inpcb;
|
|
|
|
/*
|
|
|
|
* XXXRW: While this assert is in fact correct, bugs in the tcpcb
|
|
|
|
* tear-down mean we need it as a work-around for races between
|
|
|
|
* timers and tcp_discardcb().
|
|
|
|
*
|
|
|
|
* KASSERT(inp != NULL, ("tcp_timer_rexmt: inp == NULL"));
|
|
|
|
*/
|
|
|
|
if (inp == NULL) {
|
|
|
|
tcp_timer_race++;
|
2010-03-20 19:47:30 +00:00
|
|
|
INP_INFO_RUNLOCK(&V_tcbinfo);
|
Step 1.5 of importing the network stack virtualization infrastructure
from the vimage project, as per plan established at devsummit 08/08:
http://wiki.freebsd.org/Image/Notes200808DevSummit
Introduce INIT_VNET_*() initializer macros, VNET_FOREACH() iterator
macros, and CURVNET_SET() context setting macros, all currently
resolving to NOPs.
Prepare for virtualization of selected SYSCTL objects by introducing a
family of SYSCTL_V_*() macros, currently resolving to their global
counterparts, i.e. SYSCTL_V_INT() == SYSCTL_INT().
Move selected #defines from sys/sys/vimage.h to newly introduced header
files specific to virtualized subsystems (sys/net/vnet.h,
sys/netinet/vinet.h etc.).
All the changes are verified to have zero functional impact at this
point in time by doing MD5 comparision between pre- and post-change
object files(*).
(*) netipsec/keysock.c did not validate depending on compile time options.
Implemented by: julian, bz, brooks, zec
Reviewed by: julian, bz, brooks, kris, rwatson, ...
Approved by: julian (mentor)
Obtained from: //depot/projects/vimage-commit2/...
X-MFC after: never
Sponsored by: NLnet Foundation, The FreeBSD Foundation
2008-10-02 15:37:58 +00:00
|
|
|
CURVNET_RESTORE();
|
2007-09-07 09:19:22 +00:00
|
|
|
return;
|
|
|
|
}
|
2008-04-17 21:38:18 +00:00
|
|
|
INP_WLOCK(inp);
|
2012-08-05 17:30:17 +00:00
|
|
|
if (callout_pending(&tp->t_timers->tt_rexmt) ||
|
|
|
|
!callout_active(&tp->t_timers->tt_rexmt)) {
|
2008-04-17 21:38:18 +00:00
|
|
|
INP_WUNLOCK(inp);
|
2010-03-20 19:47:30 +00:00
|
|
|
INP_INFO_RUNLOCK(&V_tcbinfo);
|
Step 1.5 of importing the network stack virtualization infrastructure
from the vimage project, as per plan established at devsummit 08/08:
http://wiki.freebsd.org/Image/Notes200808DevSummit
Introduce INIT_VNET_*() initializer macros, VNET_FOREACH() iterator
macros, and CURVNET_SET() context setting macros, all currently
resolving to NOPs.
Prepare for virtualization of selected SYSCTL objects by introducing a
family of SYSCTL_V_*() macros, currently resolving to their global
counterparts, i.e. SYSCTL_V_INT() == SYSCTL_INT().
Move selected #defines from sys/sys/vimage.h to newly introduced header
files specific to virtualized subsystems (sys/net/vnet.h,
sys/netinet/vinet.h etc.).
All the changes are verified to have zero functional impact at this
point in time by doing MD5 comparision between pre- and post-change
object files(*).
(*) netipsec/keysock.c did not validate depending on compile time options.
Implemented by: julian, bz, brooks, zec
Reviewed by: julian, bz, brooks, kris, rwatson, ...
Approved by: julian (mentor)
Obtained from: //depot/projects/vimage-commit2/...
X-MFC after: never
Sponsored by: NLnet Foundation, The FreeBSD Foundation
2008-10-02 15:37:58 +00:00
|
|
|
CURVNET_RESTORE();
|
2007-09-07 09:19:22 +00:00
|
|
|
return;
|
|
|
|
}
|
2007-09-24 05:26:24 +00:00
|
|
|
callout_deactivate(&tp->t_timers->tt_rexmt);
|
2012-08-05 17:30:17 +00:00
|
|
|
if ((inp->inp_flags & INP_DROPPED) != 0) {
|
|
|
|
INP_WUNLOCK(inp);
|
|
|
|
INP_INFO_RUNLOCK(&V_tcbinfo);
|
|
|
|
CURVNET_RESTORE();
|
|
|
|
return;
|
|
|
|
}
|
2004-06-23 21:04:37 +00:00
|
|
|
tcp_free_sackholes(tp);
|
1999-08-30 21:17:07 +00:00
|
|
|
/*
|
|
|
|
* Retransmission timer went off. Message has not
|
|
|
|
* been acked within retransmit interval. Back off
|
|
|
|
* to a longer retransmit interval and retransmit one segment.
|
|
|
|
*/
|
|
|
|
if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
|
|
|
|
tp->t_rxtshift = TCP_MAXRXTSHIFT;
|
2009-04-11 22:07:19 +00:00
|
|
|
TCPSTAT_INC(tcps_timeoutdrop);
|
2010-03-20 19:47:30 +00:00
|
|
|
in_pcbref(inp);
|
2011-01-07 21:40:34 +00:00
|
|
|
INP_INFO_RUNLOCK(&V_tcbinfo);
|
|
|
|
INP_WUNLOCK(inp);
|
|
|
|
INP_INFO_WLOCK(&V_tcbinfo);
|
|
|
|
INP_WLOCK(inp);
|
Decompose the current single inpcbinfo lock into two locks:
- The existing ipi_lock continues to protect the global inpcb list and
inpcb counter. This lock is now relegated to a small number of
allocation and free operations, and occasional operations that walk
all connections (including, awkwardly, certain UDP multicast receive
operations -- something to revisit).
- A new ipi_hash_lock protects the two inpcbinfo hash tables for
looking up connections and bound sockets, manipulated using new
INP_HASH_*() macros. This lock, combined with inpcb locks, protects
the 4-tuple address space.
Unlike the current ipi_lock, ipi_hash_lock follows the individual inpcb
connection locks, so may be acquired while manipulating a connection on
which a lock is already held, avoiding the need to acquire the inpcbinfo
lock preemptively when a binding change might later be required. As a
result, however, lookup operations necessarily go through a reference
acquire while holding the lookup lock, later acquiring an inpcb lock --
if required.
A new function in_pcblookup() looks up connections, and accepts flags
indicating how to return the inpcb. Due to lock order changes, callers
no longer need acquire locks before performing a lookup: the lookup
routine will acquire the ipi_hash_lock as needed. In the future, it will
also be able to use alternative lookup and locking strategies
transparently to callers, such as pcbgroup lookup. New lookup flags are,
supplementing the existing INPLOOKUP_WILDCARD flag:
INPLOOKUP_RLOCKPCB - Acquire a read lock on the returned inpcb
INPLOOKUP_WLOCKPCB - Acquire a write lock on the returned inpcb
Callers must pass exactly one of these flags (for the time being).
Some notes:
- All protocols are updated to work within the new regime; especially,
TCP, UDPv4, and UDPv6. pcbinfo ipi_lock acquisitions are largely
eliminated, and global hash lock hold times are dramatically reduced
compared to previous locking.
- The TCP syncache still relies on the pcbinfo lock, something that we
may want to revisit.
- Support for reverting to the FreeBSD 7.x locking strategy in TCP input
is no longer available -- hash lookup locks are now held only very
briefly during inpcb lookup, rather than for potentially extended
periods. However, the pcbinfo ipi_lock will still be acquired if a
connection state might change such that a connection is added or
removed.
- Raw IP sockets continue to use the pcbinfo ipi_lock for protection,
due to maintaining their own hash tables.
- The interface in6_pcblookup_hash_locked() is maintained, which allows
callers to acquire hash locks and perform one or more lookups atomically
with 4-tuple allocation: this is required only for TCPv6, as there is no
in6_pcbconnect_setup(), which there should be.
- UDPv6 locking remains significantly more conservative than UDPv4
locking, which relates to source address selection. This needs
attention, as it likely significantly reduces parallelism in this code
for multithreaded socket use (such as in BIND).
- In the UDPv4 and UDPv6 multicast cases, we need to revisit locking
somewhat, as they relied on ipi_lock to stablise 4-tuple matches, which
is no longer sufficient. A second check once the inpcb lock is held
should do the trick, keeping the general case from requiring the inpcb
lock for every inpcb visited.
- This work reminds us that we need to revisit locking of the v4/v6 flags,
which may be accessed lock-free both before and after this change.
- Right now, a single lock name is used for the pcbhash lock -- this is
undesirable, and probably another argument is required to take care of
this (or a char array name field in the pcbinfo?).
This is not an MFC candidate for 8.x due to its impact on lookup and
locking semantics. It's possible some of these issues could be worked
around with compatibility wrappers, if necessary.
Reviewed by: bz
Sponsored by: Juniper Networks, Inc.
2011-05-30 09:43:55 +00:00
|
|
|
if (in_pcbrele_wlocked(inp)) {
|
2011-01-07 21:40:34 +00:00
|
|
|
INP_INFO_WUNLOCK(&V_tcbinfo);
|
|
|
|
CURVNET_RESTORE();
|
|
|
|
return;
|
|
|
|
}
|
2011-10-12 19:52:23 +00:00
|
|
|
if (inp->inp_flags & INP_DROPPED) {
|
|
|
|
INP_WUNLOCK(inp);
|
|
|
|
INP_INFO_WUNLOCK(&V_tcbinfo);
|
|
|
|
CURVNET_RESTORE();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2007-09-07 09:19:22 +00:00
|
|
|
tp = tcp_drop(tp, tp->t_softerror ?
|
|
|
|
tp->t_softerror : ETIMEDOUT);
|
2010-03-20 19:47:30 +00:00
|
|
|
headlocked = 1;
|
2007-09-07 09:19:22 +00:00
|
|
|
goto out;
|
1999-08-30 21:17:07 +00:00
|
|
|
}
|
2010-03-20 19:47:30 +00:00
|
|
|
INP_INFO_RUNLOCK(&V_tcbinfo);
|
2007-09-07 09:19:22 +00:00
|
|
|
headlocked = 0;
|
2012-10-28 17:25:08 +00:00
|
|
|
if (tp->t_state == TCPS_SYN_SENT) {
|
|
|
|
/*
|
|
|
|
* If the SYN was retransmitted, indicate CWND to be
|
|
|
|
* limited to 1 segment in cc_conn_init().
|
|
|
|
*/
|
|
|
|
tp->snd_cwnd = 1;
|
|
|
|
} else if (tp->t_rxtshift == 1) {
|
1995-07-29 18:48:44 +00:00
|
|
|
/*
|
1999-08-30 21:17:07 +00:00
|
|
|
* first retransmit; record ssthresh and cwnd so they can
|
2004-08-16 18:32:07 +00:00
|
|
|
* be recovered if this turns out to be a "bad" retransmit.
|
|
|
|
* A retransmit is considered "bad" if an ACK for this
|
1999-08-30 21:17:07 +00:00
|
|
|
* segment is received within RTT/2 interval; the assumption
|
2004-08-16 18:32:07 +00:00
|
|
|
* here is that the ACK was already in flight. See
|
1999-08-30 21:17:07 +00:00
|
|
|
* "On Estimating End-to-End Network Path Properties" by
|
|
|
|
* Allman and Paxson for more details.
|
1995-07-29 18:48:44 +00:00
|
|
|
*/
|
1999-08-30 21:17:07 +00:00
|
|
|
tp->snd_cwnd_prev = tp->snd_cwnd;
|
|
|
|
tp->snd_ssthresh_prev = tp->snd_ssthresh;
|
2003-07-15 21:49:53 +00:00
|
|
|
tp->snd_recover_prev = tp->snd_recover;
|
2010-11-12 06:41:55 +00:00
|
|
|
if (IN_FASTRECOVERY(tp->t_flags))
|
|
|
|
tp->t_flags |= TF_WASFRECOVERY;
|
2003-07-15 21:49:53 +00:00
|
|
|
else
|
2010-11-12 06:41:55 +00:00
|
|
|
tp->t_flags &= ~TF_WASFRECOVERY;
|
|
|
|
if (IN_CONGRECOVERY(tp->t_flags))
|
|
|
|
tp->t_flags |= TF_WASCRECOVERY;
|
|
|
|
else
|
|
|
|
tp->t_flags &= ~TF_WASCRECOVERY;
|
1999-08-30 21:17:07 +00:00
|
|
|
tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1));
|
2011-04-29 15:40:12 +00:00
|
|
|
tp->t_flags |= TF_PREVVALID;
|
|
|
|
} else
|
|
|
|
tp->t_flags &= ~TF_PREVVALID;
|
2009-04-11 22:07:19 +00:00
|
|
|
TCPSTAT_INC(tcps_rexmttimeo);
|
2001-02-26 21:33:55 +00:00
|
|
|
if (tp->t_state == TCPS_SYN_SENT)
|
2012-10-28 18:56:57 +00:00
|
|
|
rexmt = TCPTV_RTOBASE * tcp_syn_backoff[tp->t_rxtshift];
|
2001-02-26 21:33:55 +00:00
|
|
|
else
|
|
|
|
rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
|
1999-08-30 21:17:07 +00:00
|
|
|
TCPT_RANGESET(tp->t_rxtcur, rexmt,
|
|
|
|
tp->t_rttmin, TCPTV_REXMTMAX);
|
2001-05-31 19:24:49 +00:00
|
|
|
/*
|
2012-10-28 19:22:18 +00:00
|
|
|
* Disable RFC1323 and SACK if we haven't got any response to
|
2004-08-16 18:32:07 +00:00
|
|
|
* our third SYN to work-around some broken terminal servers
|
|
|
|
* (most of which have hopefully been retired) that have bad VJ
|
|
|
|
* header compression code which trashes TCP segments containing
|
2001-05-31 19:24:49 +00:00
|
|
|
* unknown-to-them TCP options.
|
|
|
|
*/
|
2013-01-09 20:27:06 +00:00
|
|
|
if (tcp_rexmit_drop_options && (tp->t_state == TCPS_SYN_SENT) &&
|
|
|
|
(tp->t_rxtshift == 3))
|
2012-10-28 19:20:23 +00:00
|
|
|
tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP|TF_SACK_PERMIT);
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
2003-11-20 20:07:39 +00:00
|
|
|
* If we backed off this far, our srtt estimate is probably bogus.
|
|
|
|
* Clobber it so we'll take the next rtt measurement as our srtt;
|
1999-08-30 21:17:07 +00:00
|
|
|
* move the current srtt into rttvar to keep the current
|
|
|
|
* retransmit times until then.
|
1994-05-24 10:09:53 +00:00
|
|
|
*/
|
1999-08-30 21:17:07 +00:00
|
|
|
if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) {
|
2000-01-09 19:17:30 +00:00
|
|
|
#ifdef INET6
|
|
|
|
if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0)
|
|
|
|
in6_losing(tp->t_inpcb);
|
|
|
|
#endif
|
1999-08-30 21:17:07 +00:00
|
|
|
tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT);
|
|
|
|
tp->t_srtt = 0;
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
1999-08-30 21:17:07 +00:00
|
|
|
tp->snd_nxt = tp->snd_una;
|
2003-07-15 21:49:53 +00:00
|
|
|
tp->snd_recover = tp->snd_max;
|
1999-08-30 21:17:07 +00:00
|
|
|
/*
|
|
|
|
* Force a segment to be sent.
|
|
|
|
*/
|
|
|
|
tp->t_flags |= TF_ACKNOW;
|
|
|
|
/*
|
|
|
|
* If timing a segment in this window, stop the timer.
|
|
|
|
*/
|
|
|
|
tp->t_rtttime = 0;
|
2010-11-12 06:41:55 +00:00
|
|
|
|
2010-12-02 00:47:55 +00:00
|
|
|
cc_cong_signal(tp, NULL, CC_RTO);
|
2010-11-12 06:41:55 +00:00
|
|
|
|
1999-08-30 21:17:07 +00:00
|
|
|
(void) tcp_output(tp);
|
|
|
|
|
2007-09-07 09:19:22 +00:00
|
|
|
out:
|
1999-08-30 21:17:07 +00:00
|
|
|
#ifdef TCPDEBUG
|
2006-03-26 01:33:41 +00:00
|
|
|
if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
|
2002-05-31 11:52:35 +00:00
|
|
|
tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
|
|
|
|
PRU_SLOWTIMO);
|
1999-08-30 21:17:07 +00:00
|
|
|
#endif
|
2007-09-07 09:19:22 +00:00
|
|
|
if (tp != NULL)
|
2008-04-17 21:38:18 +00:00
|
|
|
INP_WUNLOCK(inp);
|
2007-09-07 09:19:22 +00:00
|
|
|
if (headlocked)
|
Commit step 1 of the vimage project, (network stack)
virtualization work done by Marko Zec (zec@).
This is the first in a series of commits over the course
of the next few weeks.
Mark all uses of global variables to be virtualized
with a V_ prefix.
Use macros to map them back to their global names for
now, so this is a NOP change only.
We hope to have caught at least 85-90% of what is needed
so we do not invalidate a lot of outstanding patches again.
Obtained from: //depot/projects/vimage-commit2/...
Reviewed by: brooks, des, ed, mav, julian,
jamie, kris, rwatson, zec, ...
(various people I forgot, different versions)
md5 (with a bit of help)
Sponsored by: NLnet Foundation, The FreeBSD Foundation
X-MFC after: never
V_Commit_Message_Reviewed_By: more people than the patch
2008-08-17 23:27:27 +00:00
|
|
|
INP_INFO_WUNLOCK(&V_tcbinfo);
|
Step 1.5 of importing the network stack virtualization infrastructure
from the vimage project, as per plan established at devsummit 08/08:
http://wiki.freebsd.org/Image/Notes200808DevSummit
Introduce INIT_VNET_*() initializer macros, VNET_FOREACH() iterator
macros, and CURVNET_SET() context setting macros, all currently
resolving to NOPs.
Prepare for virtualization of selected SYSCTL objects by introducing a
family of SYSCTL_V_*() macros, currently resolving to their global
counterparts, i.e. SYSCTL_V_INT() == SYSCTL_INT().
Move selected #defines from sys/sys/vimage.h to newly introduced header
files specific to virtualized subsystems (sys/net/vnet.h,
sys/netinet/vinet.h etc.).
All the changes are verified to have zero functional impact at this
point in time by doing MD5 comparision between pre- and post-change
object files(*).
(*) netipsec/keysock.c did not validate depending on compile time options.
Implemented by: julian, bz, brooks, zec
Reviewed by: julian, bz, brooks, kris, rwatson, ...
Approved by: julian (mentor)
Obtained from: //depot/projects/vimage-commit2/...
X-MFC after: never
Sponsored by: NLnet Foundation, The FreeBSD Foundation
2008-10-02 15:37:58 +00:00
|
|
|
CURVNET_RESTORE();
|
2007-09-07 09:19:22 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
tcp_timer_activate(struct tcpcb *tp, int timer_type, u_int delta)
|
|
|
|
{
|
|
|
|
struct callout *t_callout;
|
|
|
|
void *f_callout;
|
2010-03-20 19:47:30 +00:00
|
|
|
struct inpcb *inp = tp->t_inpcb;
|
2014-05-18 22:39:01 +00:00
|
|
|
int cpu = inp_to_cpuid(inp);
|
2007-09-07 09:19:22 +00:00
|
|
|
|
2012-06-19 07:34:13 +00:00
|
|
|
#ifdef TCP_OFFLOAD
|
|
|
|
if (tp->t_flags & TF_TOE)
|
|
|
|
return;
|
|
|
|
#endif
|
|
|
|
|
2007-09-07 09:19:22 +00:00
|
|
|
switch (timer_type) {
|
|
|
|
case TT_DELACK:
|
2007-09-24 05:26:24 +00:00
|
|
|
t_callout = &tp->t_timers->tt_delack;
|
2007-09-07 09:19:22 +00:00
|
|
|
f_callout = tcp_timer_delack;
|
|
|
|
break;
|
|
|
|
case TT_REXMT:
|
2007-09-24 05:26:24 +00:00
|
|
|
t_callout = &tp->t_timers->tt_rexmt;
|
2007-09-07 09:19:22 +00:00
|
|
|
f_callout = tcp_timer_rexmt;
|
|
|
|
break;
|
|
|
|
case TT_PERSIST:
|
2007-09-24 05:26:24 +00:00
|
|
|
t_callout = &tp->t_timers->tt_persist;
|
2007-09-07 09:19:22 +00:00
|
|
|
f_callout = tcp_timer_persist;
|
|
|
|
break;
|
|
|
|
case TT_KEEP:
|
2007-09-24 05:26:24 +00:00
|
|
|
t_callout = &tp->t_timers->tt_keep;
|
2007-09-07 09:19:22 +00:00
|
|
|
f_callout = tcp_timer_keep;
|
|
|
|
break;
|
|
|
|
case TT_2MSL:
|
2007-09-24 05:26:24 +00:00
|
|
|
t_callout = &tp->t_timers->tt_2msl;
|
2007-09-07 09:19:22 +00:00
|
|
|
f_callout = tcp_timer_2msl;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
panic("bad timer_type");
|
|
|
|
}
|
|
|
|
if (delta == 0) {
|
|
|
|
callout_stop(t_callout);
|
|
|
|
} else {
|
2010-03-20 19:47:30 +00:00
|
|
|
callout_reset_on(t_callout, delta, f_callout, tp, cpu);
|
2007-09-07 09:19:22 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
tcp_timer_active(struct tcpcb *tp, int timer_type)
|
|
|
|
{
|
|
|
|
struct callout *t_callout;
|
|
|
|
|
|
|
|
switch (timer_type) {
|
|
|
|
case TT_DELACK:
|
2007-09-24 05:26:24 +00:00
|
|
|
t_callout = &tp->t_timers->tt_delack;
|
2007-09-07 09:19:22 +00:00
|
|
|
break;
|
|
|
|
case TT_REXMT:
|
2007-09-24 05:26:24 +00:00
|
|
|
t_callout = &tp->t_timers->tt_rexmt;
|
2007-09-07 09:19:22 +00:00
|
|
|
break;
|
|
|
|
case TT_PERSIST:
|
2007-09-24 05:26:24 +00:00
|
|
|
t_callout = &tp->t_timers->tt_persist;
|
2007-09-07 09:19:22 +00:00
|
|
|
break;
|
|
|
|
case TT_KEEP:
|
2007-09-24 05:26:24 +00:00
|
|
|
t_callout = &tp->t_timers->tt_keep;
|
2007-09-07 09:19:22 +00:00
|
|
|
break;
|
|
|
|
case TT_2MSL:
|
2007-09-24 05:26:24 +00:00
|
|
|
t_callout = &tp->t_timers->tt_2msl;
|
2007-09-07 09:19:22 +00:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
panic("bad timer_type");
|
|
|
|
}
|
|
|
|
return callout_active(t_callout);
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
2009-09-16 05:33:15 +00:00
|
|
|
|
|
|
|
#define ticks_to_msecs(t) (1000*(t) / hz)
|
|
|
|
|
|
|
|
void
|
- Make callout(9) tickless, relying on eventtimers(4) as backend for
precise time event generation. This greatly improves granularity of
callouts which are not anymore constrained to wait next tick to be
scheduled.
- Extend the callout KPI introducing a set of callout_reset_sbt* functions,
which take a sbintime_t as timeout argument. The new KPI also offers a
way for consumers to specify precision tolerance they allow, so that
callout can coalesce events and reduce number of interrupts as well as
potentially avoid scheduling a SWI thread.
- Introduce support for dispatching callouts directly from hardware
interrupt context, specifying an additional flag. This feature should be
used carefully, as long as interrupt context has some limitations
(e.g. no sleeping locks can be held).
- Enhance mechanisms to gather informations about callwheel, introducing
a new sysctl to obtain stats.
This change breaks the KBI. struct callout fields has been changed, in
particular 'int ticks' (4 bytes) has been replaced with 'sbintime_t'
(8 bytes) and another 'sbintime_t' field was added for precision.
Together with: mav
Reviewed by: attilio, bde, luigi, phk
Sponsored by: Google Summer of Code 2012, iXsystems inc.
Tested by: flo (amd64, sparc64), marius (sparc64), ian (arm),
markj (amd64), mav, Fabian Keil
2013-03-04 11:09:56 +00:00
|
|
|
tcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer,
|
|
|
|
struct xtcp_timer *xtimer)
|
2009-09-16 05:33:15 +00:00
|
|
|
{
|
- Make callout(9) tickless, relying on eventtimers(4) as backend for
precise time event generation. This greatly improves granularity of
callouts which are not anymore constrained to wait next tick to be
scheduled.
- Extend the callout KPI introducing a set of callout_reset_sbt* functions,
which take a sbintime_t as timeout argument. The new KPI also offers a
way for consumers to specify precision tolerance they allow, so that
callout can coalesce events and reduce number of interrupts as well as
potentially avoid scheduling a SWI thread.
- Introduce support for dispatching callouts directly from hardware
interrupt context, specifying an additional flag. This feature should be
used carefully, as long as interrupt context has some limitations
(e.g. no sleeping locks can be held).
- Enhance mechanisms to gather informations about callwheel, introducing
a new sysctl to obtain stats.
This change breaks the KBI. struct callout fields has been changed, in
particular 'int ticks' (4 bytes) has been replaced with 'sbintime_t'
(8 bytes) and another 'sbintime_t' field was added for precision.
Together with: mav
Reviewed by: attilio, bde, luigi, phk
Sponsored by: Google Summer of Code 2012, iXsystems inc.
Tested by: flo (amd64, sparc64), marius (sparc64), ian (arm),
markj (amd64), mav, Fabian Keil
2013-03-04 11:09:56 +00:00
|
|
|
sbintime_t now;
|
|
|
|
|
|
|
|
bzero(xtimer, sizeof(*xtimer));
|
2009-09-16 05:33:15 +00:00
|
|
|
if (timer == NULL)
|
|
|
|
return;
|
- Make callout(9) tickless, relying on eventtimers(4) as backend for
precise time event generation. This greatly improves granularity of
callouts which are not anymore constrained to wait next tick to be
scheduled.
- Extend the callout KPI introducing a set of callout_reset_sbt* functions,
which take a sbintime_t as timeout argument. The new KPI also offers a
way for consumers to specify precision tolerance they allow, so that
callout can coalesce events and reduce number of interrupts as well as
potentially avoid scheduling a SWI thread.
- Introduce support for dispatching callouts directly from hardware
interrupt context, specifying an additional flag. This feature should be
used carefully, as long as interrupt context has some limitations
(e.g. no sleeping locks can be held).
- Enhance mechanisms to gather informations about callwheel, introducing
a new sysctl to obtain stats.
This change breaks the KBI. struct callout fields has been changed, in
particular 'int ticks' (4 bytes) has been replaced with 'sbintime_t'
(8 bytes) and another 'sbintime_t' field was added for precision.
Together with: mav
Reviewed by: attilio, bde, luigi, phk
Sponsored by: Google Summer of Code 2012, iXsystems inc.
Tested by: flo (amd64, sparc64), marius (sparc64), ian (arm),
markj (amd64), mav, Fabian Keil
2013-03-04 11:09:56 +00:00
|
|
|
now = getsbinuptime();
|
2009-09-16 05:33:15 +00:00
|
|
|
if (callout_active(&timer->tt_delack))
|
- Make callout(9) tickless, relying on eventtimers(4) as backend for
precise time event generation. This greatly improves granularity of
callouts which are not anymore constrained to wait next tick to be
scheduled.
- Extend the callout KPI introducing a set of callout_reset_sbt* functions,
which take a sbintime_t as timeout argument. The new KPI also offers a
way for consumers to specify precision tolerance they allow, so that
callout can coalesce events and reduce number of interrupts as well as
potentially avoid scheduling a SWI thread.
- Introduce support for dispatching callouts directly from hardware
interrupt context, specifying an additional flag. This feature should be
used carefully, as long as interrupt context has some limitations
(e.g. no sleeping locks can be held).
- Enhance mechanisms to gather informations about callwheel, introducing
a new sysctl to obtain stats.
This change breaks the KBI. struct callout fields has been changed, in
particular 'int ticks' (4 bytes) has been replaced with 'sbintime_t'
(8 bytes) and another 'sbintime_t' field was added for precision.
Together with: mav
Reviewed by: attilio, bde, luigi, phk
Sponsored by: Google Summer of Code 2012, iXsystems inc.
Tested by: flo (amd64, sparc64), marius (sparc64), ian (arm),
markj (amd64), mav, Fabian Keil
2013-03-04 11:09:56 +00:00
|
|
|
xtimer->tt_delack = (timer->tt_delack.c_time - now) / SBT_1MS;
|
2009-09-16 05:33:15 +00:00
|
|
|
if (callout_active(&timer->tt_rexmt))
|
- Make callout(9) tickless, relying on eventtimers(4) as backend for
precise time event generation. This greatly improves granularity of
callouts which are not anymore constrained to wait next tick to be
scheduled.
- Extend the callout KPI introducing a set of callout_reset_sbt* functions,
which take a sbintime_t as timeout argument. The new KPI also offers a
way for consumers to specify precision tolerance they allow, so that
callout can coalesce events and reduce number of interrupts as well as
potentially avoid scheduling a SWI thread.
- Introduce support for dispatching callouts directly from hardware
interrupt context, specifying an additional flag. This feature should be
used carefully, as long as interrupt context has some limitations
(e.g. no sleeping locks can be held).
- Enhance mechanisms to gather informations about callwheel, introducing
a new sysctl to obtain stats.
This change breaks the KBI. struct callout fields has been changed, in
particular 'int ticks' (4 bytes) has been replaced with 'sbintime_t'
(8 bytes) and another 'sbintime_t' field was added for precision.
Together with: mav
Reviewed by: attilio, bde, luigi, phk
Sponsored by: Google Summer of Code 2012, iXsystems inc.
Tested by: flo (amd64, sparc64), marius (sparc64), ian (arm),
markj (amd64), mav, Fabian Keil
2013-03-04 11:09:56 +00:00
|
|
|
xtimer->tt_rexmt = (timer->tt_rexmt.c_time - now) / SBT_1MS;
|
2009-09-16 05:33:15 +00:00
|
|
|
if (callout_active(&timer->tt_persist))
|
- Make callout(9) tickless, relying on eventtimers(4) as backend for
precise time event generation. This greatly improves granularity of
callouts which are not anymore constrained to wait next tick to be
scheduled.
- Extend the callout KPI introducing a set of callout_reset_sbt* functions,
which take a sbintime_t as timeout argument. The new KPI also offers a
way for consumers to specify precision tolerance they allow, so that
callout can coalesce events and reduce number of interrupts as well as
potentially avoid scheduling a SWI thread.
- Introduce support for dispatching callouts directly from hardware
interrupt context, specifying an additional flag. This feature should be
used carefully, as long as interrupt context has some limitations
(e.g. no sleeping locks can be held).
- Enhance mechanisms to gather informations about callwheel, introducing
a new sysctl to obtain stats.
This change breaks the KBI. struct callout fields has been changed, in
particular 'int ticks' (4 bytes) has been replaced with 'sbintime_t'
(8 bytes) and another 'sbintime_t' field was added for precision.
Together with: mav
Reviewed by: attilio, bde, luigi, phk
Sponsored by: Google Summer of Code 2012, iXsystems inc.
Tested by: flo (amd64, sparc64), marius (sparc64), ian (arm),
markj (amd64), mav, Fabian Keil
2013-03-04 11:09:56 +00:00
|
|
|
xtimer->tt_persist = (timer->tt_persist.c_time - now) / SBT_1MS;
|
2009-09-16 05:33:15 +00:00
|
|
|
if (callout_active(&timer->tt_keep))
|
- Make callout(9) tickless, relying on eventtimers(4) as backend for
precise time event generation. This greatly improves granularity of
callouts which are not anymore constrained to wait next tick to be
scheduled.
- Extend the callout KPI introducing a set of callout_reset_sbt* functions,
which take a sbintime_t as timeout argument. The new KPI also offers a
way for consumers to specify precision tolerance they allow, so that
callout can coalesce events and reduce number of interrupts as well as
potentially avoid scheduling a SWI thread.
- Introduce support for dispatching callouts directly from hardware
interrupt context, specifying an additional flag. This feature should be
used carefully, as long as interrupt context has some limitations
(e.g. no sleeping locks can be held).
- Enhance mechanisms to gather informations about callwheel, introducing
a new sysctl to obtain stats.
This change breaks the KBI. struct callout fields has been changed, in
particular 'int ticks' (4 bytes) has been replaced with 'sbintime_t'
(8 bytes) and another 'sbintime_t' field was added for precision.
Together with: mav
Reviewed by: attilio, bde, luigi, phk
Sponsored by: Google Summer of Code 2012, iXsystems inc.
Tested by: flo (amd64, sparc64), marius (sparc64), ian (arm),
markj (amd64), mav, Fabian Keil
2013-03-04 11:09:56 +00:00
|
|
|
xtimer->tt_keep = (timer->tt_keep.c_time - now) / SBT_1MS;
|
2009-09-16 05:33:15 +00:00
|
|
|
if (callout_active(&timer->tt_2msl))
|
- Make callout(9) tickless, relying on eventtimers(4) as backend for
precise time event generation. This greatly improves granularity of
callouts which are not anymore constrained to wait next tick to be
scheduled.
- Extend the callout KPI introducing a set of callout_reset_sbt* functions,
which take a sbintime_t as timeout argument. The new KPI also offers a
way for consumers to specify precision tolerance they allow, so that
callout can coalesce events and reduce number of interrupts as well as
potentially avoid scheduling a SWI thread.
- Introduce support for dispatching callouts directly from hardware
interrupt context, specifying an additional flag. This feature should be
used carefully, as long as interrupt context has some limitations
(e.g. no sleeping locks can be held).
- Enhance mechanisms to gather informations about callwheel, introducing
a new sysctl to obtain stats.
This change breaks the KBI. struct callout fields has been changed, in
particular 'int ticks' (4 bytes) has been replaced with 'sbintime_t'
(8 bytes) and another 'sbintime_t' field was added for precision.
Together with: mav
Reviewed by: attilio, bde, luigi, phk
Sponsored by: Google Summer of Code 2012, iXsystems inc.
Tested by: flo (amd64, sparc64), marius (sparc64), ian (arm),
markj (amd64), mav, Fabian Keil
2013-03-04 11:09:56 +00:00
|
|
|
xtimer->tt_2msl = (timer->tt_2msl.c_time - now) / SBT_1MS;
|
2009-09-16 05:33:15 +00:00
|
|
|
xtimer->t_rcvtime = ticks_to_msecs(ticks - tp->t_rcvtime);
|
|
|
|
}
|