tcp: utilize new solisten_clone() and solisten_enqueue()

This streamlines cloning of a socket from a listener.  Now we do not
drop the inpcb lock during creation of a new socket, do not do useless
state transitions, and put a fully initialized socket+inpcb+tcpcb into
the listen queue.

Before this change, first we would allocate the socket and inpcb+tcpcb via
tcp_usr_attach() as TCPS_CLOSED, link them into global list of pcbs, unlock
pcb and put this onto incomplete queue (see 6f3caa6d81).  Then, after
sonewconn() we would lock it again, transition into TCPS_SYN_RECEIVED,
insert into inpcb hash, finalize initialization of tcpcb.  And then, in
call into tcp_do_segment() and upon transition to TCPS_ESTABLISHED call
soisconnected().  This call would lock the listening socket once again
with a LOR protection sequence and then we would relocate the socket onto
the complete queue and only now it is ready for accept(2).

Reviewed by:		rrs, tuexen
Differential revision:	https://reviews.freebsd.org/D36064
This commit is contained in:
Gleb Smirnoff 2022-08-10 11:09:34 -07:00
parent 8f5a0a2e4f
commit 07285bb4c2
4 changed files with 64 additions and 77 deletions

View File

@ -455,7 +455,7 @@ soalloc(struct vnet *vnet)
* locks, labels, etc. All protocol state is assumed already to have been
* torn down (and possibly never set up) by the caller.
*/
static void
void
sodealloc(struct socket *so)
{

View File

@ -804,44 +804,25 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
/*
* Ok, create the full blown connection, and set things up
* as they would have been set up if we had created the
* connection when the SYN arrived. If we can't create
* the connection, abort it.
* connection when the SYN arrived.
*/
so = sonewconn(lso, 0);
if (so == NULL) {
/*
* Drop the connection; we will either send a RST or
* have the peer retransmit its SYN again after its
* RTO and try again.
*/
TCPSTAT_INC(tcps_listendrop);
if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) {
log(LOG_DEBUG, "%s; %s: Socket create failed "
"due to limits or memory shortage\n",
s, __func__);
free(s, M_TCPLOG);
}
goto abort2;
}
if ((so = solisten_clone(lso)) == NULL)
goto allocfail;
#ifdef MAC
mac_socketpeer_set_from_mbuf(m, so);
#endif
error = in_pcballoc(so, &V_tcbinfo);
if (error) {
sodealloc(so);
goto allocfail;
}
inp = sotoinpcb(so);
inp->inp_inc.inc_fibnum = so->so_fibnum;
INP_WLOCK(inp);
/*
* Exclusive pcbinfo lock is not required in syncache socket case even
* if two inpcb locks can be acquired simultaneously:
* - the inpcb in LISTEN state,
* - the newly created inp.
*
* In this case, an inp cannot be at same time in LISTEN state and
* just created by an accept() call.
*/
INP_HASH_WLOCK(&V_tcbinfo);
/* Insert new socket into PCB hash list. */
if ((tp = tcp_newtcpcb(inp)) == NULL) {
in_pcbdetach(inp);
in_pcbfree(inp);
sodealloc(so);
goto allocfail;
}
inp->inp_inc.inc_flags = sc->sc_inc.inc_flags;
#ifdef INET6
if (sc->sc_inc.inc_flags & INC_ISIPV6) {
@ -904,16 +885,12 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
laddr6 = inp->in6p_laddr;
if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
inp->in6p_laddr = sc->sc_inc.inc6_laddr;
if ((error = in6_pcbconnect_mbuf(inp, (struct sockaddr *)&sin6,
thread0.td_ucred, m, false)) != 0) {
INP_HASH_WLOCK(&V_tcbinfo);
error = in6_pcbconnect_mbuf(inp, (struct sockaddr *)&sin6,
thread0.td_ucred, m, false);
INP_HASH_WUNLOCK(&V_tcbinfo);
if (error != 0) {
inp->in6p_laddr = laddr6;
if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) {
log(LOG_DEBUG, "%s; %s: in6_pcbconnect failed "
"with error %i\n",
s, __func__, error);
free(s, M_TCPLOG);
}
INP_HASH_WUNLOCK(&V_tcbinfo);
goto abort;
}
/* Override flowlabel from in6_pcbconnect. */
@ -944,16 +921,12 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
laddr = inp->inp_laddr;
if (inp->inp_laddr.s_addr == INADDR_ANY)
inp->inp_laddr = sc->sc_inc.inc_laddr;
if ((error = in_pcbconnect(inp, (struct sockaddr *)&sin,
thread0.td_ucred, false)) != 0) {
INP_HASH_WLOCK(&V_tcbinfo);
error = in_pcbconnect(inp, (struct sockaddr *)&sin,
thread0.td_ucred, false);
INP_HASH_WUNLOCK(&V_tcbinfo);
if (error != 0) {
inp->inp_laddr = laddr;
if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) {
log(LOG_DEBUG, "%s; %s: in_pcbconnect failed "
"with error %i\n",
s, __func__, error);
free(s, M_TCPLOG);
}
INP_HASH_WUNLOCK(&V_tcbinfo);
goto abort;
}
}
@ -963,9 +936,7 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
if (ipsec_copy_pcbpolicy(sotoinpcb(lso), inp) != 0)
printf("syncache_socket: could not copy policy\n");
#endif
INP_HASH_WUNLOCK(&V_tcbinfo);
tp = intotcpcb(inp);
tcp_state_change(tp, TCPS_SYN_RECEIVED);
tp->t_state = TCPS_SYN_RECEIVED;
tp->iss = sc->sc_iss;
tp->irs = sc->sc_irs;
tp->t_port = sc->sc_port;
@ -1066,13 +1037,37 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
TCPSTAT_INC(tcps_accepts);
TCP_PROBE6(state__change, NULL, tp, NULL, tp, NULL, TCPS_LISTEN);
solisten_enqueue(so, SS_ISCONNECTED);
return (so);
allocfail:
/*
* Drop the connection; we will either send a RST or have the peer
* retransmit its SYN again after its RTO and try again.
*/
if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) {
log(LOG_DEBUG, "%s; %s: Socket create failed "
"due to limits or memory shortage\n",
s, __func__);
free(s, M_TCPLOG);
}
TCPSTAT_INC(tcps_listendrop);
return (NULL);
abort:
INP_WUNLOCK(inp);
abort2:
if (so != NULL)
soabort(so);
in_pcbdetach(inp);
in_pcbfree(inp);
sodealloc(so);
if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) {
log(LOG_DEBUG, "%s; %s: in%s_pcbconnect failed with error %i\n",
s, __func__, (sc->sc_inc.inc_flags & INC_ISIPV6) ? "6" : "",
error);
free(s, M_TCPLOG);
}
TCPSTAT_INC(tcps_listendrop);
return (NULL);
}
@ -1176,6 +1171,7 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
return (-1); /* Do not send RST */
}
#endif /* TCP_SIGNATURE */
TCPSTATES_INC(TCPS_SYN_RECEIVED);
} else {
if (sc->sc_port != port) {
SCH_UNLOCK(sch);
@ -1282,17 +1278,6 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
return (-1); /* Do not send RST */
}
}
/*
* Pull out the entry to unlock the bucket row.
*
* NOTE: We must decrease TCPS_SYN_RECEIVED count here, not
* tcp_state_change(). The tcpcb is not existent at this
* moment. A new one will be allocated via syncache_socket->
* sonewconn->tcp_usr_attach in TCPS_CLOSED state, then
* syncache_socket() will change it to TCPS_SYN_RECEIVED.
*/
TCPSTATES_DEC(TCPS_SYN_RECEIVED);
TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash);
sch->sch_length--;
#ifdef TCP_OFFLOAD
@ -1340,8 +1325,11 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
syncache_free(sc);
return (1);
failed:
if (sc != NULL && sc != &scs)
syncache_free(sc);
if (sc != NULL) {
TCPSTATES_DEC(TCPS_SYN_RECEIVED);
if (sc != &scs)
syncache_free(sc);
}
if (s != NULL)
free(s, M_TCPLOG);
*lsop = NULL;

View File

@ -172,11 +172,9 @@ tcp_usr_attach(struct socket *so, int proto, struct thread *td)
KASSERT(inp == NULL, ("tcp_usr_attach: inp != NULL"));
TCPDEBUG1();
if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
error = soreserve(so, V_tcp_sendspace, V_tcp_recvspace);
if (error)
goto out;
}
error = soreserve(so, V_tcp_sendspace, V_tcp_recvspace);
if (error)
goto out;
so->so_rcv.sb_flags |= SB_AUTOSIZE;
so->so_snd.sb_flags |= SB_AUTOSIZE;

View File

@ -498,6 +498,7 @@ int soreceive_generic(struct socket *so, struct sockaddr **paddr,
struct uio *uio, struct mbuf **mp0, struct mbuf **controlp,
int *flagsp);
void sorele_locked(struct socket *so);
void sodealloc(struct socket *);
int soreserve(struct socket *so, u_long sndcc, u_long rcvcc);
void sorflush(struct socket *so);
int sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,