In tcp_input(), don't acquire the pcbinfo global write lock for SYN
packets targeting a listening socket. Permit to reduce TCP input processing starvation in context of high SYN load (e.g. short-lived TCP connections or SYN flood). Submitted by: Julien Charbon <jcharbon@verisign.com> Reviewed by: adrian, hiren, jhb, Mike Bentkofsky
This commit is contained in:
parent
9908eab82e
commit
a7c7f2a7e2
@ -748,12 +748,12 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
|
||||
|
||||
/*
|
||||
* Locate pcb for segment; if we're likely to add or remove a
|
||||
* connection then first acquire pcbinfo lock. There are two cases
|
||||
* connection then first acquire pcbinfo lock. There are three cases
|
||||
* where we might discover later we need a write lock despite the
|
||||
* flags: ACKs moving a connection out of the syncache, and ACKs for
|
||||
* a connection in TIMEWAIT.
|
||||
* flags: ACKs moving a connection out of the syncache, ACKs for a
|
||||
* connection in TIMEWAIT and SYNs not targeting a listening socket.
|
||||
*/
|
||||
if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0) {
|
||||
if ((thflags & (TH_FIN | TH_RST)) != 0) {
|
||||
INP_INFO_WLOCK(&V_tcbinfo);
|
||||
ti_locked = TI_WLOCKED;
|
||||
} else
|
||||
@ -982,10 +982,11 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
|
||||
* now be in TIMEWAIT.
|
||||
*/
|
||||
#ifdef INVARIANTS
|
||||
if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0)
|
||||
if ((thflags & (TH_FIN | TH_RST)) != 0)
|
||||
INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
|
||||
#endif
|
||||
if (tp->t_state != TCPS_ESTABLISHED) {
|
||||
if (!((tp->t_state == TCPS_ESTABLISHED && (thflags & TH_SYN) == 0) ||
|
||||
(tp->t_state == TCPS_LISTEN && (thflags & TH_SYN)))) {
|
||||
if (ti_locked == TI_UNLOCKED) {
|
||||
if (INP_INFO_TRY_WLOCK(&V_tcbinfo) == 0) {
|
||||
in_pcbref(inp);
|
||||
@ -1026,17 +1027,13 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
|
||||
/*
|
||||
* When the socket is accepting connections (the INPCB is in LISTEN
|
||||
* state) we look into the SYN cache if this is a new connection
|
||||
* attempt or the completion of a previous one. Because listen
|
||||
* sockets are never in TCPS_ESTABLISHED, the V_tcbinfo lock will be
|
||||
* held in this case.
|
||||
* attempt or the completion of a previous one.
|
||||
*/
|
||||
if (so->so_options & SO_ACCEPTCONN) {
|
||||
struct in_conninfo inc;
|
||||
|
||||
KASSERT(tp->t_state == TCPS_LISTEN, ("%s: so accepting but "
|
||||
"tp not listening", __func__));
|
||||
INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
|
||||
|
||||
bzero(&inc, sizeof(inc));
|
||||
#ifdef INET6
|
||||
if (isipv6) {
|
||||
@ -1059,6 +1056,8 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
|
||||
* socket appended to the listen queue in SYN_RECEIVED state.
|
||||
*/
|
||||
if ((thflags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK) {
|
||||
|
||||
INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
|
||||
/*
|
||||
* Parse the TCP options here because
|
||||
* syncookies need access to the reflected
|
||||
@ -1339,8 +1338,12 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
|
||||
syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL);
|
||||
/*
|
||||
* Entry added to syncache and mbuf consumed.
|
||||
* Everything already unlocked by syncache_add().
|
||||
* Only the listen socket is unlocked by syncache_add().
|
||||
*/
|
||||
if (ti_locked == TI_WLOCKED) {
|
||||
INP_INFO_WUNLOCK(&V_tcbinfo);
|
||||
ti_locked = TI_UNLOCKED;
|
||||
}
|
||||
INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
|
||||
return (IPPROTO_DONE);
|
||||
} else if (tp->t_state == TCPS_LISTEN) {
|
||||
|
@ -1118,7 +1118,6 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
|
||||
struct syncache scs;
|
||||
struct ucred *cred;
|
||||
|
||||
INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
|
||||
INP_WLOCK_ASSERT(inp); /* listen socket */
|
||||
KASSERT((th->th_flags & (TH_RST|TH_ACK|TH_SYN)) == TH_SYN,
|
||||
("%s: unexpected tcp flags", __func__));
|
||||
@ -1149,13 +1148,11 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
|
||||
#ifdef MAC
|
||||
if (mac_syncache_init(&maclabel) != 0) {
|
||||
INP_WUNLOCK(inp);
|
||||
INP_INFO_WUNLOCK(&V_tcbinfo);
|
||||
goto done;
|
||||
} else
|
||||
mac_syncache_create(maclabel, inp);
|
||||
#endif
|
||||
INP_WUNLOCK(inp);
|
||||
INP_INFO_WUNLOCK(&V_tcbinfo);
|
||||
|
||||
/*
|
||||
* Remember the IP options, if any.
|
||||
|
Loading…
Reference in New Issue
Block a user