Add locking to TCP syncache and drop the global tcpinfo lock as early

as possible for the syncache_add() case.  The syncache timer no longer
aquires the tcpinfo lock and timeout/retransmit runs can happen in
parallel with bucket granularity.

On a P4 the additional locks cause a slight degression of 0.7% in tcp
connections per second.  When IP and TCP input are deserialized and
can run in parallel this little overhead can be neglected. The syncookie
handling still leaves room for improvement and its random salts may be
moved to the syncache bucket head structures to remove the second lock
operation currently required for it.  However this would be a more
involved change from the way syncookies work at the moment.

Reviewed by:	rwatson
Tested by:	rwatson, ps (earlier version)
Sponsored by:	TCP/IP Optimization Fundraise 2005
This commit is contained in:
Andre Oppermann 2006-06-17 17:32:38 +00:00
parent 5630148ab8
commit 351630c40d
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=159695
4 changed files with 313 additions and 273 deletions

View File

@ -973,18 +973,18 @@ tcp_input(m, off0)
(void *)tcp_saveipgen, &tcp_savetcp, 0);
#endif
tcp_dooptions(&to, optp, optlen, 1);
if (!syncache_add(&inc, &to, th, &so, m))
goto drop;
if (!syncache_add(&inc, &to, th, inp, &so, m))
goto drop; /* XXX: does not happen */
if (so == NULL) {
/*
* Entry added to syncache, mbuf used to
* send SYN,ACK packet.
* send SYN,ACK packet. Everything unlocked
* already.
*/
KASSERT(headlocked, ("headlocked"));
INP_UNLOCK(inp);
INP_INFO_WUNLOCK(&tcbinfo);
return;
}
panic("T/TCP not supported at the moment");
#if 0 /* T/TCP */
/*
* Segment passed TAO tests.
* XXX: Can't happen at the moment.
@ -1011,6 +1011,7 @@ tcp_input(m, off0)
tcpstat.tcps_connects++;
soisconnected(so);
goto trimthenstep6;
#endif /* T/TCP */
}
goto drop;
}
@ -1437,7 +1438,9 @@ tcp_input(m, off0)
tp->t_state = TCPS_SYN_RECEIVED;
}
#if 0 /* T/TCP */
trimthenstep6:
#endif
KASSERT(headlocked, ("tcp_input: trimthenstep6: head not "
"locked"));
INP_LOCK_ASSERT(inp);

View File

@ -973,18 +973,18 @@ tcp_input(m, off0)
(void *)tcp_saveipgen, &tcp_savetcp, 0);
#endif
tcp_dooptions(&to, optp, optlen, 1);
if (!syncache_add(&inc, &to, th, &so, m))
goto drop;
if (!syncache_add(&inc, &to, th, inp, &so, m))
goto drop; /* XXX: does not happen */
if (so == NULL) {
/*
* Entry added to syncache, mbuf used to
* send SYN,ACK packet.
* send SYN,ACK packet. Everything unlocked
* already.
*/
KASSERT(headlocked, ("headlocked"));
INP_UNLOCK(inp);
INP_INFO_WUNLOCK(&tcbinfo);
return;
}
panic("T/TCP not supported at the moment");
#if 0 /* T/TCP */
/*
* Segment passed TAO tests.
* XXX: Can't happen at the moment.
@ -1011,6 +1011,7 @@ tcp_input(m, off0)
tcpstat.tcps_connects++;
soisconnected(so);
goto trimthenstep6;
#endif /* T/TCP */
}
goto drop;
}
@ -1437,7 +1438,9 @@ tcp_input(m, off0)
tp->t_state = TCPS_SYN_RECEIVED;
}
#if 0 /* T/TCP */
trimthenstep6:
#endif
KASSERT(headlocked, ("tcp_input: trimthenstep6: head not "
"locked"));
INP_LOCK_ASSERT(inp);

File diff suppressed because it is too large Load Diff

View File

@ -245,8 +245,6 @@ struct tcpopt {
#ifdef _NETINET_IN_PCB_H_
struct syncache {
inp_gen_t sc_inp_gencnt; /* pointer check */
struct tcpcb *sc_tp; /* tcb for listening socket */
struct mbuf *sc_ipopts; /* source route */
struct in_conninfo sc_inc; /* addresses */
u_int32_t sc_tsrecent;
@ -254,9 +252,12 @@ struct syncache {
tcp_seq sc_irs; /* seq from peer */
tcp_seq sc_iss; /* our ISS */
u_long sc_rxttime; /* retransmit time */
u_int16_t sc_rxtslot; /* retransmit counter */
u_int16_t sc_rxmits; /* retransmit counter */
u_int16_t sc_peer_mss; /* peer's MSS */
u_int16_t sc_wnd; /* advertised window */
u_int8_t sc_ip_ttl; /* IPv4 TTL */
u_int8_t sc_ip_tos; /* IPv4 TOS */
u_int8_t sc_requested_s_scale:4,
sc_request_r_scale:4;
u_int8_t sc_flags;
@ -267,11 +268,13 @@ struct syncache {
#define SCF_SIGNATURE 0x20 /* send MD5 digests */
#define SCF_SACK 0x80 /* send SACK option */
TAILQ_ENTRY(syncache) sc_hash;
TAILQ_ENTRY(syncache) sc_timerq;
};
struct syncache_head {
TAILQ_HEAD(, syncache) sch_bucket;
TAILQ_HEAD(sch_head, syncache) sch_bucket;
struct mtx sch_mtx;
struct callout sch_timer;
int sch_nextc;
u_int sch_length;
};
#else
@ -563,7 +566,7 @@ void syncache_unreach(struct in_conninfo *, struct tcphdr *);
int syncache_expand(struct in_conninfo *, struct tcphdr *,
struct socket **, struct mbuf *);
int syncache_add(struct in_conninfo *, struct tcpopt *,
struct tcphdr *, struct socket **, struct mbuf *);
struct tcphdr *, struct inpcb *, struct socket **, struct mbuf *);
void syncache_chkrst(struct in_conninfo *, struct tcphdr *);
void syncache_badack(struct in_conninfo *);
/*