In tcp_input(), don't acquire the pcbinfo global write lock for SYN
packets targeting a listening socket. Permit to reduce TCP input processing starvation in context of high SYN load (e.g. short-lived TCP connections or SYN flood). Submitted by: Julien Charbon <jcharbon@verisign.com> Reviewed by: adrian, hiren, jhb, Mike Bentkofsky
This commit is contained in:
parent
9908eab82e
commit
a7c7f2a7e2
@ -748,12 +748,12 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Locate pcb for segment; if we're likely to add or remove a
|
* Locate pcb for segment; if we're likely to add or remove a
|
||||||
* connection then first acquire pcbinfo lock. There are two cases
|
* connection then first acquire pcbinfo lock. There are three cases
|
||||||
* where we might discover later we need a write lock despite the
|
* where we might discover later we need a write lock despite the
|
||||||
* flags: ACKs moving a connection out of the syncache, and ACKs for
|
* flags: ACKs moving a connection out of the syncache, ACKs for a
|
||||||
* a connection in TIMEWAIT.
|
* connection in TIMEWAIT and SYNs not targeting a listening socket.
|
||||||
*/
|
*/
|
||||||
if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0) {
|
if ((thflags & (TH_FIN | TH_RST)) != 0) {
|
||||||
INP_INFO_WLOCK(&V_tcbinfo);
|
INP_INFO_WLOCK(&V_tcbinfo);
|
||||||
ti_locked = TI_WLOCKED;
|
ti_locked = TI_WLOCKED;
|
||||||
} else
|
} else
|
||||||
@ -982,10 +982,11 @@ relocked:
|
|||||||
* now be in TIMEWAIT.
|
* now be in TIMEWAIT.
|
||||||
*/
|
*/
|
||||||
#ifdef INVARIANTS
|
#ifdef INVARIANTS
|
||||||
if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0)
|
if ((thflags & (TH_FIN | TH_RST)) != 0)
|
||||||
INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
|
INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
|
||||||
#endif
|
#endif
|
||||||
if (tp->t_state != TCPS_ESTABLISHED) {
|
if (!((tp->t_state == TCPS_ESTABLISHED && (thflags & TH_SYN) == 0) ||
|
||||||
|
(tp->t_state == TCPS_LISTEN && (thflags & TH_SYN)))) {
|
||||||
if (ti_locked == TI_UNLOCKED) {
|
if (ti_locked == TI_UNLOCKED) {
|
||||||
if (INP_INFO_TRY_WLOCK(&V_tcbinfo) == 0) {
|
if (INP_INFO_TRY_WLOCK(&V_tcbinfo) == 0) {
|
||||||
in_pcbref(inp);
|
in_pcbref(inp);
|
||||||
@ -1026,17 +1027,13 @@ relocked:
|
|||||||
/*
|
/*
|
||||||
* When the socket is accepting connections (the INPCB is in LISTEN
|
* When the socket is accepting connections (the INPCB is in LISTEN
|
||||||
* state) we look into the SYN cache if this is a new connection
|
* state) we look into the SYN cache if this is a new connection
|
||||||
* attempt or the completion of a previous one. Because listen
|
* attempt or the completion of a previous one.
|
||||||
* sockets are never in TCPS_ESTABLISHED, the V_tcbinfo lock will be
|
|
||||||
* held in this case.
|
|
||||||
*/
|
*/
|
||||||
if (so->so_options & SO_ACCEPTCONN) {
|
if (so->so_options & SO_ACCEPTCONN) {
|
||||||
struct in_conninfo inc;
|
struct in_conninfo inc;
|
||||||
|
|
||||||
KASSERT(tp->t_state == TCPS_LISTEN, ("%s: so accepting but "
|
KASSERT(tp->t_state == TCPS_LISTEN, ("%s: so accepting but "
|
||||||
"tp not listening", __func__));
|
"tp not listening", __func__));
|
||||||
INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
|
|
||||||
|
|
||||||
bzero(&inc, sizeof(inc));
|
bzero(&inc, sizeof(inc));
|
||||||
#ifdef INET6
|
#ifdef INET6
|
||||||
if (isipv6) {
|
if (isipv6) {
|
||||||
@ -1059,6 +1056,8 @@ relocked:
|
|||||||
* socket appended to the listen queue in SYN_RECEIVED state.
|
* socket appended to the listen queue in SYN_RECEIVED state.
|
||||||
*/
|
*/
|
||||||
if ((thflags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK) {
|
if ((thflags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK) {
|
||||||
|
|
||||||
|
INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
|
||||||
/*
|
/*
|
||||||
* Parse the TCP options here because
|
* Parse the TCP options here because
|
||||||
* syncookies need access to the reflected
|
* syncookies need access to the reflected
|
||||||
@ -1339,8 +1338,12 @@ relocked:
|
|||||||
syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL);
|
syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL);
|
||||||
/*
|
/*
|
||||||
* Entry added to syncache and mbuf consumed.
|
* Entry added to syncache and mbuf consumed.
|
||||||
* Everything already unlocked by syncache_add().
|
* Only the listen socket is unlocked by syncache_add().
|
||||||
*/
|
*/
|
||||||
|
if (ti_locked == TI_WLOCKED) {
|
||||||
|
INP_INFO_WUNLOCK(&V_tcbinfo);
|
||||||
|
ti_locked = TI_UNLOCKED;
|
||||||
|
}
|
||||||
INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
|
INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
|
||||||
return (IPPROTO_DONE);
|
return (IPPROTO_DONE);
|
||||||
} else if (tp->t_state == TCPS_LISTEN) {
|
} else if (tp->t_state == TCPS_LISTEN) {
|
||||||
|
@ -1118,7 +1118,6 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
|
|||||||
struct syncache scs;
|
struct syncache scs;
|
||||||
struct ucred *cred;
|
struct ucred *cred;
|
||||||
|
|
||||||
INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
|
|
||||||
INP_WLOCK_ASSERT(inp); /* listen socket */
|
INP_WLOCK_ASSERT(inp); /* listen socket */
|
||||||
KASSERT((th->th_flags & (TH_RST|TH_ACK|TH_SYN)) == TH_SYN,
|
KASSERT((th->th_flags & (TH_RST|TH_ACK|TH_SYN)) == TH_SYN,
|
||||||
("%s: unexpected tcp flags", __func__));
|
("%s: unexpected tcp flags", __func__));
|
||||||
@ -1149,13 +1148,11 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
|
|||||||
#ifdef MAC
|
#ifdef MAC
|
||||||
if (mac_syncache_init(&maclabel) != 0) {
|
if (mac_syncache_init(&maclabel) != 0) {
|
||||||
INP_WUNLOCK(inp);
|
INP_WUNLOCK(inp);
|
||||||
INP_INFO_WUNLOCK(&V_tcbinfo);
|
|
||||||
goto done;
|
goto done;
|
||||||
} else
|
} else
|
||||||
mac_syncache_create(maclabel, inp);
|
mac_syncache_create(maclabel, inp);
|
||||||
#endif
|
#endif
|
||||||
INP_WUNLOCK(inp);
|
INP_WUNLOCK(inp);
|
||||||
INP_INFO_WUNLOCK(&V_tcbinfo);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Remember the IP options, if any.
|
* Remember the IP options, if any.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user