In tcp_input(), don't acquire the pcbinfo global write lock for SYN

packets targeting a listening socket.  Permit to reduce TCP input
processing starvation in context of high SYN load (e.g. short-lived TCP
connections or SYN flood).

Submitted by:	Julien Charbon <jcharbon@verisign.com>
Reviewed by:	adrian, hiren, jhb, Mike Bentkofsky
This commit is contained in:
John Baldwin 2014-09-04 19:09:08 +00:00
parent 9908eab82e
commit a7c7f2a7e2
2 changed files with 15 additions and 15 deletions

View File

@ -748,12 +748,12 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
/*
* Locate pcb for segment; if we're likely to add or remove a
* connection then first acquire pcbinfo lock. There are two cases
* connection then first acquire pcbinfo lock. There are three cases
* where we might discover later we need a write lock despite the
* flags: ACKs moving a connection out of the syncache, and ACKs for
* a connection in TIMEWAIT.
* flags: ACKs moving a connection out of the syncache, ACKs for a
* connection in TIMEWAIT and SYNs not targeting a listening socket.
*/
if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0) {
if ((thflags & (TH_FIN | TH_RST)) != 0) {
INP_INFO_WLOCK(&V_tcbinfo);
ti_locked = TI_WLOCKED;
} else
@ -982,10 +982,11 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
* now be in TIMEWAIT.
*/
#ifdef INVARIANTS
if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0)
if ((thflags & (TH_FIN | TH_RST)) != 0)
INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
#endif
if (tp->t_state != TCPS_ESTABLISHED) {
if (!((tp->t_state == TCPS_ESTABLISHED && (thflags & TH_SYN) == 0) ||
(tp->t_state == TCPS_LISTEN && (thflags & TH_SYN)))) {
if (ti_locked == TI_UNLOCKED) {
if (INP_INFO_TRY_WLOCK(&V_tcbinfo) == 0) {
in_pcbref(inp);
@ -1026,17 +1027,13 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
/*
* When the socket is accepting connections (the INPCB is in LISTEN
* state) we look into the SYN cache if this is a new connection
* attempt or the completion of a previous one. Because listen
* sockets are never in TCPS_ESTABLISHED, the V_tcbinfo lock will be
* held in this case.
* attempt or the completion of a previous one.
*/
if (so->so_options & SO_ACCEPTCONN) {
struct in_conninfo inc;
KASSERT(tp->t_state == TCPS_LISTEN, ("%s: so accepting but "
"tp not listening", __func__));
INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
bzero(&inc, sizeof(inc));
#ifdef INET6
if (isipv6) {
@ -1059,6 +1056,8 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
* socket appended to the listen queue in SYN_RECEIVED state.
*/
if ((thflags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK) {
INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
/*
* Parse the TCP options here because
* syncookies need access to the reflected
@ -1339,8 +1338,12 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL);
/*
* Entry added to syncache and mbuf consumed.
* Everything already unlocked by syncache_add().
* Only the listen socket is unlocked by syncache_add().
*/
if (ti_locked == TI_WLOCKED) {
INP_INFO_WUNLOCK(&V_tcbinfo);
ti_locked = TI_UNLOCKED;
}
INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
return (IPPROTO_DONE);
} else if (tp->t_state == TCPS_LISTEN) {

View File

@ -1118,7 +1118,6 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
struct syncache scs;
struct ucred *cred;
INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(inp); /* listen socket */
KASSERT((th->th_flags & (TH_RST|TH_ACK|TH_SYN)) == TH_SYN,
("%s: unexpected tcp flags", __func__));
@ -1149,13 +1148,11 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
#ifdef MAC
if (mac_syncache_init(&maclabel) != 0) {
INP_WUNLOCK(inp);
INP_INFO_WUNLOCK(&V_tcbinfo);
goto done;
} else
mac_syncache_create(maclabel, inp);
#endif
INP_WUNLOCK(inp);
INP_INFO_WUNLOCK(&V_tcbinfo);
/*
* Remember the IP options, if any.