o Backout rev. 1.125 of in_pcb.c. It appeared to behave extremely

bad under high load. For example with 40k sockets and 25k tcptw
  entries, connect() syscall can run for seconds. Debugging showed
  that it iterates the cycle millions times and purges thousands of
  tcptw entries at a time.
  Besides practical unusability this change is architecturally
  wrong. First, in_pcblookup_local() is used in connect() and bind()
  syscalls. No stale entries purging shouldn't be done here. Second,
  it is a layering violation.
o Return back the tcptw purging cycle to tcp_timer_2msl_tw(),
  that was removed in rev. 1.78 by rwatson. The commit log of this
  revision tells nothing about the reason cycle was removed. Now
  we need this cycle, since major cleaner of stale tcptw structures
  is removed.
o Disable probably necessary, but now unused
  tcp_twrecycleable() function.

Reviewed by:	ru
This commit is contained in:
Gleb Smirnoff 2006-09-06 13:56:35 +00:00
parent d398d50285
commit 2c857a9be9
5 changed files with 20 additions and 31 deletions

View File

@ -903,7 +903,6 @@ in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
u_int lport_arg, int wild_okay)
{
struct inpcb *inp;
struct tcptw *tw;
#ifdef INET6
int matchwild = 3 + INP_LOOKUP_MAPPED_PCB_COST;
#else
@ -949,7 +948,6 @@ in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
* First see if this local port is in use by looking on the
* port hash list.
*/
retrylookup:
porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport,
pcbinfo->porthashmask)];
LIST_FOREACH(phd, porthash, phd_hash) {
@ -982,20 +980,6 @@ in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
if ((inp->inp_vflag & INP_IPV6) != 0)
wildcard += INP_LOOKUP_MAPPED_PCB_COST;
#endif
/*
* Clean out old time_wait sockets if they
* are clogging up needed local ports.
*/
if ((inp->inp_vflag & INP_TIMEWAIT) != 0) {
tw = intotw(inp);
if (tw != NULL &&
tcp_twrecycleable(tw)) {
INP_LOCK(inp);
tcp_twclose(tw, 0);
match = NULL;
goto retrylookup;
}
}
if (inp->inp_faddr.s_addr != INADDR_ANY)
wildcard++;
if (inp->inp_laddr.s_addr != INADDR_ANY) {

View File

@ -1808,6 +1808,7 @@ tcp_twstart(struct tcpcb *tp)
INP_UNLOCK(inp);
}
#if 0
/*
* The appromixate rate of ISN increase of Microsoft TCP stacks;
* the actual rate is slightly higher due to the addition of
@ -1822,10 +1823,6 @@ tcp_twstart(struct tcpcb *tp)
* Determine if the ISN we will generate has advanced beyond the last
* sequence number used by the previous connection. If so, indicate
* that it is safe to recycle this tw socket by returning 1.
*
* XXXRW: This function should assert the inpcb lock as it does multiple
* non-atomic reads from the tcptw, but is currently called without it from
* in_pcb.c:in_pcblookup_local().
*/
int
tcp_twrecycleable(struct tcptw *tw)
@ -1833,6 +1830,7 @@ tcp_twrecycleable(struct tcptw *tw)
tcp_seq new_iss = tw->iss;
tcp_seq new_irs = tw->irs;
INP_INFO_WLOCK_ASSERT(&tcbinfo);
new_iss += (ticks - tw->t_starttime) * (ISN_BYTES_PER_SECOND / hz);
new_irs += (ticks - tw->t_starttime) * (MS_ISN_BYTES_PER_SECOND / hz);
@ -1841,6 +1839,7 @@ tcp_twrecycleable(struct tcptw *tw)
else
return (0);
}
#endif
void
tcp_twclose(struct tcptw *tw, int reuse)

View File

@ -288,15 +288,20 @@ tcp_timer_2msl_tw(int reuse)
int i;
INP_INFO_WLOCK_ASSERT(&tcbinfo);
for (i = 0; i < 2; i++) {
for (i = 0; i < TWLIST_NLISTS; i++) {
twl = tw_2msl_list[i];
tw_tail = &twl->tw_tail;
tw = LIST_FIRST(&twl->tw_list);
if (tw == tw_tail || (!reuse && tw->tw_time > ticks))
continue;
INP_LOCK(tw->tw_inpcb);
tcp_twclose(tw, reuse);
return (reuse ? tw : NULL);
for (;;) {
tw = LIST_FIRST(&twl->tw_list);
if (tw == tw_tail || (!reuse && tw->tw_time > ticks))
break;
INP_LOCK(tw->tw_inpcb);
tcp_twclose(tw, reuse);
if (reuse)
return (tw);
}
}
return (NULL);
}

View File

@ -1808,6 +1808,7 @@ tcp_twstart(struct tcpcb *tp)
INP_UNLOCK(inp);
}
#if 0
/*
* The appromixate rate of ISN increase of Microsoft TCP stacks;
* the actual rate is slightly higher due to the addition of
@ -1822,10 +1823,6 @@ tcp_twstart(struct tcpcb *tp)
* Determine if the ISN we will generate has advanced beyond the last
* sequence number used by the previous connection. If so, indicate
* that it is safe to recycle this tw socket by returning 1.
*
* XXXRW: This function should assert the inpcb lock as it does multiple
* non-atomic reads from the tcptw, but is currently called without it from
* in_pcb.c:in_pcblookup_local().
*/
int
tcp_twrecycleable(struct tcptw *tw)
@ -1833,6 +1830,7 @@ tcp_twrecycleable(struct tcptw *tw)
tcp_seq new_iss = tw->iss;
tcp_seq new_irs = tw->irs;
INP_INFO_WLOCK_ASSERT(&tcbinfo);
new_iss += (ticks - tw->t_starttime) * (ISN_BYTES_PER_SECOND / hz);
new_irs += (ticks - tw->t_starttime) * (MS_ISN_BYTES_PER_SECOND / hz);
@ -1841,6 +1839,7 @@ tcp_twrecycleable(struct tcptw *tw)
else
return (0);
}
#endif
void
tcp_twclose(struct tcptw *tw, int reuse)

View File

@ -494,7 +494,9 @@ struct tcpcb *
tcp_close(struct tcpcb *);
void tcp_discardcb(struct tcpcb *);
void tcp_twstart(struct tcpcb *);
#if 0
int tcp_twrecycleable(struct tcptw *tw);
#endif
void tcp_twclose(struct tcptw *_tw, int _reuse);
void tcp_ctlinput(int, struct sockaddr *, void *);
int tcp_ctloutput(struct socket *, struct sockopt *);