Now that all of the tcp_input() and all its branches are executed
in the network epoch, we can greatly simplify synchronization. Remove all unneccesary epoch enters hidden under INP_INFO_RLOCK macro. Remove some unneccesary assertions and convert necessary ones into the NET_EPOCH_ASSERT macro.
This commit is contained in:
parent
bff7f83d39
commit
d40c0d47cd
@ -561,7 +561,6 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
|
||||
int rstreason = 0; /* For badport_bandlim accounting purposes */
|
||||
uint8_t iptos;
|
||||
struct m_tag *fwd_tag = NULL;
|
||||
struct epoch_tracker et;
|
||||
#ifdef INET6
|
||||
struct ip6_hdr *ip6 = NULL;
|
||||
int isipv6;
|
||||
@ -570,7 +569,6 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
|
||||
#endif /* INET6 */
|
||||
struct tcpopt to; /* options in this segment */
|
||||
char *s = NULL; /* address and port logging */
|
||||
int ti_locked;
|
||||
#ifdef TCPDEBUG
|
||||
/*
|
||||
* The size of tcp_saveipgen must be the size of the max ip header,
|
||||
@ -581,6 +579,8 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
|
||||
short ostate = 0;
|
||||
#endif
|
||||
|
||||
NET_EPOCH_ASSERT();
|
||||
|
||||
#ifdef INET6
|
||||
isipv6 = (mtod(m, struct ip *)->ip_v == 6) ? 1 : 0;
|
||||
#endif
|
||||
@ -746,19 +746,6 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
|
||||
*/
|
||||
drop_hdrlen = off0 + off;
|
||||
|
||||
/*
|
||||
* Locate pcb for segment; if we're likely to add or remove a
|
||||
* connection then first acquire pcbinfo lock. There are three cases
|
||||
* where we might discover later we need a write lock despite the
|
||||
* flags: ACKs moving a connection out of the syncache, ACKs for a
|
||||
* connection in TIMEWAIT and SYNs not targeting a listening socket.
|
||||
*/
|
||||
if ((thflags & (TH_FIN | TH_RST)) != 0) {
|
||||
INP_INFO_RLOCK_ET(&V_tcbinfo, et);
|
||||
ti_locked = TI_RLOCKED;
|
||||
} else
|
||||
ti_locked = TI_UNLOCKED;
|
||||
|
||||
/*
|
||||
* Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain.
|
||||
*/
|
||||
@ -776,13 +763,6 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
|
||||
fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
|
||||
|
||||
findpcb:
|
||||
#ifdef INVARIANTS
|
||||
if (ti_locked == TI_RLOCKED) {
|
||||
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
|
||||
} else {
|
||||
INP_INFO_WUNLOCK_ASSERT(&V_tcbinfo);
|
||||
}
|
||||
#endif
|
||||
#ifdef INET6
|
||||
if (isipv6 && fwd_tag != NULL) {
|
||||
struct sockaddr_in6 *next_hop6;
|
||||
@ -942,12 +922,6 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
|
||||
* XXXRW: It may be time to rethink timewait locking.
|
||||
*/
|
||||
if (inp->inp_flags & INP_TIMEWAIT) {
|
||||
if (ti_locked == TI_UNLOCKED) {
|
||||
INP_INFO_RLOCK_ET(&V_tcbinfo, et);
|
||||
ti_locked = TI_RLOCKED;
|
||||
}
|
||||
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
|
||||
|
||||
if (thflags & TH_SYN)
|
||||
tcp_dooptions(&to, optp, optlen, TO_SYN);
|
||||
/*
|
||||
@ -955,7 +929,6 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
|
||||
*/
|
||||
if (tcp_twcheck(inp, &to, th, m, tlen))
|
||||
goto findpcb;
|
||||
INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
|
||||
return (IPPROTO_DONE);
|
||||
}
|
||||
/*
|
||||
@ -977,27 +950,6 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* We've identified a valid inpcb, but it could be that we need an
|
||||
* inpcbinfo write lock but don't hold it. In this case, attempt to
|
||||
* acquire using the same strategy as the TIMEWAIT case above. If we
|
||||
* relock, we have to jump back to 'relocked' as the connection might
|
||||
* now be in TIMEWAIT.
|
||||
*/
|
||||
#ifdef INVARIANTS
|
||||
if ((thflags & (TH_FIN | TH_RST)) != 0)
|
||||
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
|
||||
#endif
|
||||
if (!((tp->t_state == TCPS_ESTABLISHED && (thflags & TH_SYN) == 0) ||
|
||||
(tp->t_state == TCPS_LISTEN && (thflags & TH_SYN) &&
|
||||
!IS_FASTOPEN(tp->t_flags)))) {
|
||||
if (ti_locked == TI_UNLOCKED) {
|
||||
INP_INFO_RLOCK_ET(&V_tcbinfo, et);
|
||||
ti_locked = TI_RLOCKED;
|
||||
}
|
||||
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
|
||||
}
|
||||
|
||||
#ifdef MAC
|
||||
INP_WLOCK_ASSERT(inp);
|
||||
if (mac_inpcb_check_deliver(inp, m))
|
||||
@ -1052,7 +1004,6 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
|
||||
*/
|
||||
if ((thflags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK) {
|
||||
|
||||
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
|
||||
/*
|
||||
* Parse the TCP options here because
|
||||
* syncookies need access to the reflected
|
||||
@ -1130,8 +1081,6 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
|
||||
TCP_PROBE5(receive, NULL, tp, m, tp, th);
|
||||
tp->t_fb->tfb_tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen,
|
||||
iptos);
|
||||
if (ti_locked == TI_RLOCKED)
|
||||
INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
|
||||
return (IPPROTO_DONE);
|
||||
}
|
||||
/*
|
||||
@ -1334,10 +1283,6 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
|
||||
* Entry added to syncache and mbuf consumed.
|
||||
* Only the listen socket is unlocked by syncache_add().
|
||||
*/
|
||||
if (ti_locked == TI_RLOCKED) {
|
||||
INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
|
||||
ti_locked = TI_UNLOCKED;
|
||||
}
|
||||
INP_INFO_WUNLOCK_ASSERT(&V_tcbinfo);
|
||||
return (IPPROTO_DONE);
|
||||
} else if (tp->t_state == TCPS_LISTEN) {
|
||||
@ -1370,25 +1315,11 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
|
||||
* the inpcb, and unlocks pcbinfo.
|
||||
*/
|
||||
tp->t_fb->tfb_tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen, iptos);
|
||||
if (ti_locked == TI_RLOCKED)
|
||||
INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
|
||||
return (IPPROTO_DONE);
|
||||
|
||||
dropwithreset:
|
||||
TCP_PROBE5(receive, NULL, tp, m, tp, th);
|
||||
|
||||
if (ti_locked == TI_RLOCKED) {
|
||||
INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
|
||||
ti_locked = TI_UNLOCKED;
|
||||
}
|
||||
#ifdef INVARIANTS
|
||||
else {
|
||||
KASSERT(ti_locked == TI_UNLOCKED, ("%s: dropwithreset "
|
||||
"ti_locked: %d", __func__, ti_locked));
|
||||
INP_INFO_WUNLOCK_ASSERT(&V_tcbinfo);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (inp != NULL) {
|
||||
tcp_dropwithreset(m, th, tp, tlen, rstreason);
|
||||
INP_WUNLOCK(inp);
|
||||
@ -1401,18 +1332,6 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
|
||||
if (m != NULL)
|
||||
TCP_PROBE5(receive, NULL, tp, m, tp, th);
|
||||
|
||||
if (ti_locked == TI_RLOCKED) {
|
||||
INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
|
||||
ti_locked = TI_UNLOCKED;
|
||||
}
|
||||
#ifdef INVARIANTS
|
||||
else {
|
||||
KASSERT(ti_locked == TI_UNLOCKED, ("%s: dropunlock "
|
||||
"ti_locked: %d", __func__, ti_locked));
|
||||
INP_INFO_WUNLOCK_ASSERT(&V_tcbinfo);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (inp != NULL)
|
||||
INP_WUNLOCK(inp);
|
||||
|
||||
@ -1501,7 +1420,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
struct mbuf *mfree;
|
||||
struct tcpopt to;
|
||||
int tfo_syn;
|
||||
|
||||
|
||||
#ifdef TCPDEBUG
|
||||
/*
|
||||
* The size of tcp_saveipgen must be the size of the max ip header,
|
||||
@ -1516,16 +1435,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
tp->sackhint.last_sack_ack = 0;
|
||||
sack_changed = 0;
|
||||
nsegs = max(1, m->m_pkthdr.lro_nsegs);
|
||||
/*
|
||||
* If this is either a state-changing packet or current state isn't
|
||||
* established, we require a write lock on tcbinfo. Otherwise, we
|
||||
* allow the tcbinfo to be in either alocked or unlocked, as the
|
||||
* caller may have unnecessarily acquired a write lock due to a race.
|
||||
*/
|
||||
if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 ||
|
||||
tp->t_state != TCPS_ESTABLISHED) {
|
||||
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
|
||||
}
|
||||
|
||||
NET_EPOCH_ASSERT();
|
||||
INP_WLOCK_ASSERT(tp->t_inpcb);
|
||||
KASSERT(tp->t_state > TCPS_LISTEN, ("%s: TCPS_LISTEN",
|
||||
__func__));
|
||||
@ -2047,7 +1958,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
tcp_state_change(tp, TCPS_SYN_RECEIVED);
|
||||
}
|
||||
|
||||
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
|
||||
INP_WLOCK_ASSERT(tp->t_inpcb);
|
||||
|
||||
/*
|
||||
@ -2120,7 +2030,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) ||
|
||||
(tp->rcv_wnd == 0 && tp->last_ack_sent == th->th_seq)) {
|
||||
|
||||
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
|
||||
KASSERT(tp->t_state != TCPS_SYN_SENT,
|
||||
("%s: TH_RST for TCPS_SYN_SENT th %p tp %p",
|
||||
__func__, th, tp));
|
||||
@ -2163,8 +2072,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
*/
|
||||
if ((thflags & TH_SYN) && tp->t_state != TCPS_SYN_SENT &&
|
||||
tp->t_state != TCPS_SYN_RECEIVED) {
|
||||
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
|
||||
|
||||
TCPSTAT_INC(tcps_badsyn);
|
||||
if (V_tcp_insecure_syn &&
|
||||
SEQ_GEQ(th->th_seq, tp->last_ack_sent) &&
|
||||
@ -2288,8 +2195,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
*/
|
||||
if ((so->so_state & SS_NOFDREF) &&
|
||||
tp->t_state > TCPS_CLOSE_WAIT && tlen) {
|
||||
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
|
||||
|
||||
if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
|
||||
log(LOG_DEBUG, "%s; %s: %s: Received %d bytes of data "
|
||||
"after socket was closed, "
|
||||
@ -2875,7 +2780,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
*/
|
||||
case TCPS_CLOSING:
|
||||
if (ourfinisacked) {
|
||||
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
|
||||
tcp_twstart(tp);
|
||||
m_freem(m);
|
||||
return;
|
||||
@ -2890,7 +2794,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
*/
|
||||
case TCPS_LAST_ACK:
|
||||
if (ourfinisacked) {
|
||||
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
|
||||
tp = tcp_close(tp);
|
||||
goto drop;
|
||||
}
|
||||
@ -3139,8 +3042,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
* standard timers.
|
||||
*/
|
||||
case TCPS_FIN_WAIT_2:
|
||||
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
|
||||
|
||||
tcp_twstart(tp);
|
||||
return;
|
||||
}
|
||||
|
@ -8618,7 +8618,6 @@ bbr_process_data(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
bbr->rc_timer_first = 1;
|
||||
bbr_timer_cancel(bbr,
|
||||
__LINE__, bbr->r_ctl.rc_rcvtime);
|
||||
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
|
||||
INP_WLOCK_ASSERT(tp->t_inpcb);
|
||||
tcp_twstart(tp);
|
||||
return (1);
|
||||
@ -9619,7 +9618,6 @@ bbr_check_data_after_close(struct mbuf *m, struct tcp_bbr *bbr,
|
||||
struct tcpcb *tp, int32_t * tlen, struct tcphdr *th, struct socket *so)
|
||||
{
|
||||
|
||||
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
|
||||
if (bbr->rc_allow_data_af_clo == 0) {
|
||||
close_now:
|
||||
tp = tcp_close(tp);
|
||||
@ -9861,7 +9859,6 @@ bbr_do_closing(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
return (ret_val);
|
||||
}
|
||||
if (ourfinisacked) {
|
||||
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
|
||||
tcp_twstart(tp);
|
||||
m_freem(m);
|
||||
return (1);
|
||||
@ -9974,7 +9971,6 @@ bbr_do_lastack(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
return (ret_val);
|
||||
}
|
||||
if (ourfinisacked) {
|
||||
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
|
||||
tp = tcp_close(tp);
|
||||
ctf_do_drop(m, tp);
|
||||
return (1);
|
||||
|
@ -5875,7 +5875,6 @@ rack_process_data(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
case TCPS_FIN_WAIT_2:
|
||||
rack_timer_cancel(tp, rack,
|
||||
rack->r_ctl.rc_rcvtime, __LINE__);
|
||||
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
|
||||
tcp_twstart(tp);
|
||||
return (1);
|
||||
}
|
||||
@ -6353,7 +6352,6 @@ rack_do_syn_sent(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
tp->t_flags |= (TF_ACKNOW | TF_NEEDSYN);
|
||||
tcp_state_change(tp, TCPS_SYN_RECEIVED);
|
||||
}
|
||||
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
|
||||
INP_WLOCK_ASSERT(tp->t_inpcb);
|
||||
/*
|
||||
* Advance th->th_seq to correspond to first data byte. If data,
|
||||
@ -6847,7 +6845,6 @@ rack_check_data_after_close(struct mbuf *m,
|
||||
{
|
||||
struct tcp_rack *rack;
|
||||
|
||||
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
|
||||
rack = (struct tcp_rack *)tp->t_fb_ptr;
|
||||
if (rack->rc_allow_data_af_clo == 0) {
|
||||
close_now:
|
||||
@ -7079,7 +7076,6 @@ rack_do_closing(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
return (ret_val);
|
||||
}
|
||||
if (ourfinisacked) {
|
||||
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
|
||||
tcp_twstart(tp);
|
||||
m_freem(m);
|
||||
return (1);
|
||||
@ -7187,7 +7183,6 @@ rack_do_lastack(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
return (ret_val);
|
||||
}
|
||||
if (ourfinisacked) {
|
||||
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
|
||||
tp = tcp_close(tp);
|
||||
ctf_do_drop(m, tp);
|
||||
return (1);
|
||||
@ -7650,16 +7645,8 @@ rack_do_segment_nounlock(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
kern_prefetch(rack, &prev_state);
|
||||
prev_state = 0;
|
||||
thflags = th->th_flags;
|
||||
/*
|
||||
* If this is either a state-changing packet or current state isn't
|
||||
* established, we require a read lock on tcbinfo. Otherwise, we
|
||||
* allow the tcbinfo to be in either locked or unlocked, as the
|
||||
* caller may have unnecessarily acquired a lock due to a race.
|
||||
*/
|
||||
if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 ||
|
||||
tp->t_state != TCPS_ESTABLISHED) {
|
||||
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
|
||||
}
|
||||
|
||||
NET_EPOCH_ASSERT();
|
||||
INP_WLOCK_ASSERT(tp->t_inpcb);
|
||||
KASSERT(tp->t_state > TCPS_LISTEN, ("%s: TCPS_LISTEN",
|
||||
__func__));
|
||||
|
@ -253,7 +253,6 @@ ctf_process_inbound_raw(struct tcpcb *tp, struct socket *so, struct mbuf *m, int
|
||||
*/
|
||||
struct mbuf *m_save;
|
||||
struct ether_header *eh;
|
||||
struct epoch_tracker et;
|
||||
struct tcphdr *th;
|
||||
#ifdef INET6
|
||||
struct ip6_hdr *ip6 = NULL; /* Keep compiler happy. */
|
||||
@ -268,14 +267,8 @@ ctf_process_inbound_raw(struct tcpcb *tp, struct socket *so, struct mbuf *m, int
|
||||
uint16_t drop_hdrlen;
|
||||
uint8_t iptos, no_vn=0, bpf_req=0;
|
||||
|
||||
/*
|
||||
* This is a bit deceptive, we get the
|
||||
* "info epoch" which is really the network
|
||||
* epoch. This covers us on both any INP
|
||||
* type change but also if the ifp goes
|
||||
* away it covers us as well.
|
||||
*/
|
||||
INP_INFO_RLOCK_ET(&V_tcbinfo, et);
|
||||
NET_EPOCH_ASSERT();
|
||||
|
||||
if (m && m->m_pkthdr.rcvif)
|
||||
ifp = m->m_pkthdr.rcvif;
|
||||
else
|
||||
@ -445,7 +438,6 @@ ctf_process_inbound_raw(struct tcpcb *tp, struct socket *so, struct mbuf *m, int
|
||||
}
|
||||
if (no_vn == 0)
|
||||
CURVNET_RESTORE();
|
||||
INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
|
||||
return(retval);
|
||||
}
|
||||
skipped_pkt:
|
||||
@ -453,7 +445,6 @@ ctf_process_inbound_raw(struct tcpcb *tp, struct socket *so, struct mbuf *m, int
|
||||
}
|
||||
if (no_vn == 0)
|
||||
CURVNET_RESTORE();
|
||||
INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
|
||||
return(retval);
|
||||
}
|
||||
|
||||
@ -680,7 +671,6 @@ ctf_process_rst(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcp
|
||||
SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) ||
|
||||
(tp->rcv_wnd == 0 && tp->last_ack_sent == th->th_seq)) {
|
||||
|
||||
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
|
||||
KASSERT(tp->t_state != TCPS_SYN_SENT,
|
||||
("%s: TH_RST for TCPS_SYN_SENT th %p tp %p",
|
||||
__func__, th, tp));
|
||||
@ -732,7 +722,8 @@ ctf_process_rst(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcp
|
||||
void
|
||||
ctf_challenge_ack(struct mbuf *m, struct tcphdr *th, struct tcpcb *tp, int32_t * ret_val)
|
||||
{
|
||||
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
|
||||
|
||||
NET_EPOCH_ASSERT();
|
||||
|
||||
TCPSTAT_INC(tcps_badsyn);
|
||||
if (V_tcp_insecure_syn &&
|
||||
|
Loading…
Reference in New Issue
Block a user