epoch(9): allow preemptible epochs to compose

- Add tracker argument to preemptible epochs
- Inline epoch read path in kernel and tied modules
- Change in_epoch to take an epoch as argument
- Simplify tfb_tcp_do_segment to not take a ti_locked argument,
  there's no longer any benefit to dropping the pcbinfo lock
  and trying to do so just adds an error prone branchfest to
  these functions
- Remove cases of same function recursion on the epoch as
  recursing is no longer free.
- Remove the the TAILQ_ENTRY and epoch_section from struct
  thread as the tracker field is now stack or heap allocated
  as appropriate.

Tested by: pho and Limelight Networks
Reviewed by: kbowling at llnw dot com
Sponsored by: Limelight Networks
Differential Revision: https://reviews.freebsd.org/D16066
This commit is contained in:
Matt Macy 2018-07-04 02:47:16 +00:00
parent e98bd75073
commit 6573d7580b
44 changed files with 800 additions and 870 deletions

View File

@ -343,6 +343,7 @@ do_rx_iscsi_ddp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
struct icl_cxgbei_pdu *icp = toep->ulpcb2; struct icl_cxgbei_pdu *icp = toep->ulpcb2;
struct icl_pdu *ip; struct icl_pdu *ip;
u_int pdu_len, val; u_int pdu_len, val;
struct epoch_tracker et;
MPASS(m == NULL); MPASS(m == NULL);
@ -411,12 +412,12 @@ do_rx_iscsi_ddp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
SOCKBUF_UNLOCK(sb); SOCKBUF_UNLOCK(sb);
INP_WUNLOCK(inp); INP_WUNLOCK(inp);
INP_INFO_RLOCK(&V_tcbinfo); INP_INFO_RLOCK_ET(&V_tcbinfo, et);
INP_WLOCK(inp); INP_WLOCK(inp);
tp = tcp_drop(tp, ECONNRESET); tp = tcp_drop(tp, ECONNRESET);
if (tp) if (tp)
INP_WUNLOCK(inp); INP_WUNLOCK(inp);
INP_INFO_RUNLOCK(&V_tcbinfo); INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
icl_cxgbei_conn_pdu_free(NULL, ip); icl_cxgbei_conn_pdu_free(NULL, ip);
#ifdef INVARIANTS #ifdef INVARIANTS

View File

@ -115,18 +115,19 @@ act_open_failure_cleanup(struct adapter *sc, u_int atid, u_int status)
struct toepcb *toep = lookup_atid(sc, atid); struct toepcb *toep = lookup_atid(sc, atid);
struct inpcb *inp = toep->inp; struct inpcb *inp = toep->inp;
struct toedev *tod = &toep->td->tod; struct toedev *tod = &toep->td->tod;
struct epoch_tracker et;
free_atid(sc, atid); free_atid(sc, atid);
toep->tid = -1; toep->tid = -1;
CURVNET_SET(toep->vnet); CURVNET_SET(toep->vnet);
if (status != EAGAIN) if (status != EAGAIN)
INP_INFO_RLOCK(&V_tcbinfo); INP_INFO_RLOCK_ET(&V_tcbinfo, et);
INP_WLOCK(inp); INP_WLOCK(inp);
toe_connect_failed(tod, inp, status); toe_connect_failed(tod, inp, status);
final_cpl_received(toep); /* unlocks inp */ final_cpl_received(toep); /* unlocks inp */
if (status != EAGAIN) if (status != EAGAIN)
INP_INFO_RUNLOCK(&V_tcbinfo); INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
CURVNET_RESTORE(); CURVNET_RESTORE();
} }

View File

@ -1235,6 +1235,7 @@ do_peer_close(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
struct inpcb *inp = toep->inp; struct inpcb *inp = toep->inp;
struct tcpcb *tp = NULL; struct tcpcb *tp = NULL;
struct socket *so; struct socket *so;
struct epoch_tracker et;
#ifdef INVARIANTS #ifdef INVARIANTS
unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
#endif #endif
@ -1268,7 +1269,7 @@ do_peer_close(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
CURVNET_SET(toep->vnet); CURVNET_SET(toep->vnet);
INP_INFO_RLOCK(&V_tcbinfo); INP_INFO_RLOCK_ET(&V_tcbinfo, et);
INP_WLOCK(inp); INP_WLOCK(inp);
tp = intotcpcb(inp); tp = intotcpcb(inp);
@ -1312,7 +1313,7 @@ do_peer_close(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
case TCPS_FIN_WAIT_2: case TCPS_FIN_WAIT_2:
tcp_twstart(tp); tcp_twstart(tp);
INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */ INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */
INP_INFO_RUNLOCK(&V_tcbinfo); INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
CURVNET_RESTORE(); CURVNET_RESTORE();
INP_WLOCK(inp); INP_WLOCK(inp);
@ -1325,7 +1326,7 @@ do_peer_close(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
} }
done: done:
INP_WUNLOCK(inp); INP_WUNLOCK(inp);
INP_INFO_RUNLOCK(&V_tcbinfo); INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
CURVNET_RESTORE(); CURVNET_RESTORE();
return (0); return (0);
} }
@ -1344,6 +1345,7 @@ do_close_con_rpl(struct sge_iq *iq, const struct rss_header *rss,
struct inpcb *inp = toep->inp; struct inpcb *inp = toep->inp;
struct tcpcb *tp = NULL; struct tcpcb *tp = NULL;
struct socket *so = NULL; struct socket *so = NULL;
struct epoch_tracker et;
#ifdef INVARIANTS #ifdef INVARIANTS
unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
#endif #endif
@ -1354,7 +1356,7 @@ do_close_con_rpl(struct sge_iq *iq, const struct rss_header *rss,
KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
CURVNET_SET(toep->vnet); CURVNET_SET(toep->vnet);
INP_INFO_RLOCK(&V_tcbinfo); INP_INFO_RLOCK_ET(&V_tcbinfo, et);
INP_WLOCK(inp); INP_WLOCK(inp);
tp = intotcpcb(inp); tp = intotcpcb(inp);
@ -1372,7 +1374,7 @@ do_close_con_rpl(struct sge_iq *iq, const struct rss_header *rss,
tcp_twstart(tp); tcp_twstart(tp);
release: release:
INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */ INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */
INP_INFO_RUNLOCK(&V_tcbinfo); INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
CURVNET_RESTORE(); CURVNET_RESTORE();
INP_WLOCK(inp); INP_WLOCK(inp);
@ -1397,7 +1399,7 @@ do_close_con_rpl(struct sge_iq *iq, const struct rss_header *rss,
} }
done: done:
INP_WUNLOCK(inp); INP_WUNLOCK(inp);
INP_INFO_RUNLOCK(&V_tcbinfo); INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
CURVNET_RESTORE(); CURVNET_RESTORE();
return (0); return (0);
} }
@ -1452,6 +1454,7 @@ do_abort_req(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
struct sge_wrq *ofld_txq = toep->ofld_txq; struct sge_wrq *ofld_txq = toep->ofld_txq;
struct inpcb *inp; struct inpcb *inp;
struct tcpcb *tp; struct tcpcb *tp;
struct epoch_tracker et;
#ifdef INVARIANTS #ifdef INVARIANTS
unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
#endif #endif
@ -1473,7 +1476,7 @@ do_abort_req(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
inp = toep->inp; inp = toep->inp;
CURVNET_SET(toep->vnet); CURVNET_SET(toep->vnet);
INP_INFO_RLOCK(&V_tcbinfo); /* for tcp_close */ INP_INFO_RLOCK_ET(&V_tcbinfo, et); /* for tcp_close */
INP_WLOCK(inp); INP_WLOCK(inp);
tp = intotcpcb(inp); tp = intotcpcb(inp);
@ -1507,7 +1510,7 @@ do_abort_req(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
final_cpl_received(toep); final_cpl_received(toep);
done: done:
INP_INFO_RUNLOCK(&V_tcbinfo); INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
CURVNET_RESTORE(); CURVNET_RESTORE();
send_abort_rpl(sc, ofld_txq, tid, CPL_ABORT_NO_RST); send_abort_rpl(sc, ofld_txq, tid, CPL_ABORT_NO_RST);
return (0); return (0);
@ -1560,6 +1563,7 @@ do_rx_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
struct tcpcb *tp; struct tcpcb *tp;
struct socket *so; struct socket *so;
struct sockbuf *sb; struct sockbuf *sb;
struct epoch_tracker et;
int len; int len;
uint32_t ddp_placed = 0; uint32_t ddp_placed = 0;
@ -1631,12 +1635,12 @@ do_rx_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
INP_WUNLOCK(inp); INP_WUNLOCK(inp);
CURVNET_SET(toep->vnet); CURVNET_SET(toep->vnet);
INP_INFO_RLOCK(&V_tcbinfo); INP_INFO_RLOCK_ET(&V_tcbinfo, et);
INP_WLOCK(inp); INP_WLOCK(inp);
tp = tcp_drop(tp, ECONNRESET); tp = tcp_drop(tp, ECONNRESET);
if (tp) if (tp)
INP_WUNLOCK(inp); INP_WUNLOCK(inp);
INP_INFO_RUNLOCK(&V_tcbinfo); INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
CURVNET_RESTORE(); CURVNET_RESTORE();
return (0); return (0);

View File

@ -1255,6 +1255,7 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss,
int reject_reason, v, ntids; int reject_reason, v, ntids;
uint16_t vid; uint16_t vid;
u_int wnd; u_int wnd;
struct epoch_tracker et;
#ifdef INVARIANTS #ifdef INVARIANTS
unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
#endif #endif
@ -1369,15 +1370,15 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss,
REJECT_PASS_ACCEPT(); REJECT_PASS_ACCEPT();
rpl = wrtod(wr); rpl = wrtod(wr);
INP_INFO_RLOCK(&V_tcbinfo); /* for 4-tuple check */ INP_INFO_RLOCK_ET(&V_tcbinfo, et); /* for 4-tuple check */
/* Don't offload if the 4-tuple is already in use */ /* Don't offload if the 4-tuple is already in use */
if (toe_4tuple_check(&inc, &th, ifp) != 0) { if (toe_4tuple_check(&inc, &th, ifp) != 0) {
INP_INFO_RUNLOCK(&V_tcbinfo); INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
free(wr, M_CXGBE); free(wr, M_CXGBE);
REJECT_PASS_ACCEPT(); REJECT_PASS_ACCEPT();
} }
INP_INFO_RUNLOCK(&V_tcbinfo); INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
inp = lctx->inp; /* listening socket, not owned by TOE */ inp = lctx->inp; /* listening socket, not owned by TOE */
INP_WLOCK(inp); INP_WLOCK(inp);
@ -1574,6 +1575,7 @@ do_pass_establish(struct sge_iq *iq, const struct rss_header *rss,
struct tcpopt to; struct tcpopt to;
struct in_conninfo inc; struct in_conninfo inc;
struct toepcb *toep; struct toepcb *toep;
struct epoch_tracker et;
u_int txqid, rxqid; u_int txqid, rxqid;
#ifdef INVARIANTS #ifdef INVARIANTS
unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
@ -1587,7 +1589,7 @@ do_pass_establish(struct sge_iq *iq, const struct rss_header *rss,
("%s: tid %u (ctx %p) not a synqe", __func__, tid, synqe)); ("%s: tid %u (ctx %p) not a synqe", __func__, tid, synqe));
CURVNET_SET(lctx->vnet); CURVNET_SET(lctx->vnet);
INP_INFO_RLOCK(&V_tcbinfo); /* for syncache_expand */ INP_INFO_RLOCK_ET(&V_tcbinfo, et); /* for syncache_expand */
INP_WLOCK(inp); INP_WLOCK(inp);
CTR6(KTR_CXGBE, CTR6(KTR_CXGBE,
@ -1603,7 +1605,7 @@ do_pass_establish(struct sge_iq *iq, const struct rss_header *rss,
} }
INP_WUNLOCK(inp); INP_WUNLOCK(inp);
INP_INFO_RUNLOCK(&V_tcbinfo); INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
CURVNET_RESTORE(); CURVNET_RESTORE();
return (0); return (0);
} }
@ -1629,7 +1631,7 @@ do_pass_establish(struct sge_iq *iq, const struct rss_header *rss,
*/ */
send_reset_synqe(TOEDEV(ifp), synqe); send_reset_synqe(TOEDEV(ifp), synqe);
INP_WUNLOCK(inp); INP_WUNLOCK(inp);
INP_INFO_RUNLOCK(&V_tcbinfo); INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
CURVNET_RESTORE(); CURVNET_RESTORE();
return (0); return (0);
} }
@ -1695,7 +1697,7 @@ do_pass_establish(struct sge_iq *iq, const struct rss_header *rss,
inp = release_lctx(sc, lctx); inp = release_lctx(sc, lctx);
if (inp != NULL) if (inp != NULL)
INP_WUNLOCK(inp); INP_WUNLOCK(inp);
INP_INFO_RUNLOCK(&V_tcbinfo); INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
CURVNET_RESTORE(); CURVNET_RESTORE();
release_synqe(synqe); release_synqe(synqe);

View File

@ -1559,6 +1559,8 @@ do_rx_tls_cmp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
SOCKBUF_LOCK(sb); SOCKBUF_LOCK(sb);
if (__predict_false(sb->sb_state & SBS_CANTRCVMORE)) { if (__predict_false(sb->sb_state & SBS_CANTRCVMORE)) {
struct epoch_tracker et;
CTR3(KTR_CXGBE, "%s: tid %u, excess rx (%d bytes)", CTR3(KTR_CXGBE, "%s: tid %u, excess rx (%d bytes)",
__func__, tid, pdu_length); __func__, tid, pdu_length);
m_freem(m); m_freem(m);
@ -1566,12 +1568,12 @@ do_rx_tls_cmp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
INP_WUNLOCK(inp); INP_WUNLOCK(inp);
CURVNET_SET(toep->vnet); CURVNET_SET(toep->vnet);
INP_INFO_RLOCK(&V_tcbinfo); INP_INFO_RLOCK_ET(&V_tcbinfo, et);
INP_WLOCK(inp); INP_WLOCK(inp);
tp = tcp_drop(tp, ECONNRESET); tp = tcp_drop(tp, ECONNRESET);
if (tp) if (tp)
INP_WUNLOCK(inp); INP_WUNLOCK(inp);
INP_INFO_RUNLOCK(&V_tcbinfo); INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
CURVNET_RESTORE(); CURVNET_RESTORE();
return (0); return (0);

View File

@ -85,6 +85,9 @@ __FBSDID("$FreeBSD$");
#define free_domain(addr, type) free(addr, type) #define free_domain(addr, type) free(addr, type)
#endif #endif
#define PMC_EPOCH_ENTER() struct epoch_tracker pmc_et; epoch_enter_preempt(global_epoch_preempt, &pmc_et)
#define PMC_EPOCH_EXIT() epoch_exit_preempt(global_epoch_preempt, &pmc_et)
/* /*
* Types * Types
*/ */
@ -1752,12 +1755,12 @@ pmc_process_mmap(struct thread *td, struct pmckern_map_in *pkm)
const struct pmc_process *pp; const struct pmc_process *pp;
freepath = fullpath = NULL; freepath = fullpath = NULL;
MPASS(!in_epoch()); MPASS(!in_epoch(global_epoch_preempt));
pmc_getfilename((struct vnode *) pkm->pm_file, &fullpath, &freepath); pmc_getfilename((struct vnode *) pkm->pm_file, &fullpath, &freepath);
pid = td->td_proc->p_pid; pid = td->td_proc->p_pid;
epoch_enter_preempt(global_epoch_preempt); PMC_EPOCH_ENTER();
/* Inform owners of all system-wide sampling PMCs. */ /* Inform owners of all system-wide sampling PMCs. */
CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
if (po->po_flags & PMC_PO_OWNS_LOGFILE) if (po->po_flags & PMC_PO_OWNS_LOGFILE)
@ -1778,7 +1781,7 @@ pmc_process_mmap(struct thread *td, struct pmckern_map_in *pkm)
done: done:
if (freepath) if (freepath)
free(freepath, M_TEMP); free(freepath, M_TEMP);
epoch_exit_preempt(global_epoch_preempt); PMC_EPOCH_EXIT();
} }
@ -1797,12 +1800,12 @@ pmc_process_munmap(struct thread *td, struct pmckern_map_out *pkm)
pid = td->td_proc->p_pid; pid = td->td_proc->p_pid;
epoch_enter_preempt(global_epoch_preempt); PMC_EPOCH_ENTER();
CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
if (po->po_flags & PMC_PO_OWNS_LOGFILE) if (po->po_flags & PMC_PO_OWNS_LOGFILE)
pmclog_process_map_out(po, pid, pkm->pm_address, pmclog_process_map_out(po, pid, pkm->pm_address,
pkm->pm_address + pkm->pm_size); pkm->pm_address + pkm->pm_size);
epoch_exit_preempt(global_epoch_preempt); PMC_EPOCH_EXIT();
if ((pp = pmc_find_process_descriptor(td->td_proc, 0)) == NULL) if ((pp = pmc_find_process_descriptor(td->td_proc, 0)) == NULL)
return; return;
@ -1824,7 +1827,7 @@ pmc_log_kernel_mappings(struct pmc *pm)
struct pmc_owner *po; struct pmc_owner *po;
struct pmckern_map_in *km, *kmbase; struct pmckern_map_in *km, *kmbase;
MPASS(in_epoch() || sx_xlocked(&pmc_sx)); MPASS(in_epoch(global_epoch_preempt) || sx_xlocked(&pmc_sx));
KASSERT(PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)), KASSERT(PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)),
("[pmc,%d] non-sampling PMC (%p) desires mapping information", ("[pmc,%d] non-sampling PMC (%p) desires mapping information",
__LINE__, (void *) pm)); __LINE__, (void *) pm));
@ -2106,13 +2109,13 @@ pmc_hook_handler(struct thread *td, int function, void *arg)
pk = (struct pmckern_procexec *) arg; pk = (struct pmckern_procexec *) arg;
epoch_enter_preempt(global_epoch_preempt); PMC_EPOCH_ENTER();
/* Inform owners of SS mode PMCs of the exec event. */ /* Inform owners of SS mode PMCs of the exec event. */
CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
if (po->po_flags & PMC_PO_OWNS_LOGFILE) if (po->po_flags & PMC_PO_OWNS_LOGFILE)
pmclog_process_procexec(po, PMC_ID_INVALID, pmclog_process_procexec(po, PMC_ID_INVALID,
p->p_pid, pk->pm_entryaddr, fullpath); p->p_pid, pk->pm_entryaddr, fullpath);
epoch_exit_preempt(global_epoch_preempt); PMC_EPOCH_EXIT();
PROC_LOCK(p); PROC_LOCK(p);
is_using_hwpmcs = p->p_flag & P_HWPMC; is_using_hwpmcs = p->p_flag & P_HWPMC;
@ -2242,7 +2245,7 @@ pmc_hook_handler(struct thread *td, int function, void *arg)
break; break;
case PMC_FN_MUNMAP: case PMC_FN_MUNMAP:
MPASS(in_epoch() || sx_xlocked(&pmc_sx)); MPASS(in_epoch(global_epoch_preempt) || sx_xlocked(&pmc_sx));
pmc_process_munmap(td, (struct pmckern_map_out *) arg); pmc_process_munmap(td, (struct pmckern_map_out *) arg);
break; break;
@ -2479,7 +2482,7 @@ pmc_find_thread_descriptor(struct pmc_process *pp, struct thread *td,
if (mode & PMC_FLAG_ALLOCATE) { if (mode & PMC_FLAG_ALLOCATE) {
if ((ptnew = pmc_thread_descriptor_pool_alloc()) == NULL) { if ((ptnew = pmc_thread_descriptor_pool_alloc()) == NULL) {
wait_flag = M_WAITOK; wait_flag = M_WAITOK;
if ((mode & PMC_FLAG_NOWAIT) || in_epoch()) if ((mode & PMC_FLAG_NOWAIT) || in_epoch(global_epoch_preempt))
wait_flag = M_NOWAIT; wait_flag = M_NOWAIT;
ptnew = malloc(THREADENTRY_SIZE, M_PMC, ptnew = malloc(THREADENTRY_SIZE, M_PMC,
@ -5070,11 +5073,11 @@ pmc_process_exit(void *arg __unused, struct proc *p)
/* /*
* Log a sysexit event to all SS PMC owners. * Log a sysexit event to all SS PMC owners.
*/ */
epoch_enter_preempt(global_epoch_preempt); PMC_EPOCH_ENTER();
CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
if (po->po_flags & PMC_PO_OWNS_LOGFILE) if (po->po_flags & PMC_PO_OWNS_LOGFILE)
pmclog_process_sysexit(po, p->p_pid); pmclog_process_sysexit(po, p->p_pid);
epoch_exit_preempt(global_epoch_preempt); PMC_EPOCH_EXIT();
if (!is_using_hwpmcs) if (!is_using_hwpmcs)
return; return;
@ -5255,13 +5258,13 @@ pmc_process_fork(void *arg __unused, struct proc *p1, struct proc *newproc,
* If there are system-wide sampling PMCs active, we need to * If there are system-wide sampling PMCs active, we need to
* log all fork events to their owner's logs. * log all fork events to their owner's logs.
*/ */
epoch_enter_preempt(global_epoch_preempt); PMC_EPOCH_ENTER();
CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
if (po->po_flags & PMC_PO_OWNS_LOGFILE) { if (po->po_flags & PMC_PO_OWNS_LOGFILE) {
pmclog_process_procfork(po, p1->p_pid, newproc->p_pid); pmclog_process_procfork(po, p1->p_pid, newproc->p_pid);
pmclog_process_proccreate(po, newproc, 1); pmclog_process_proccreate(po, newproc, 1);
} }
epoch_exit_preempt(global_epoch_preempt); PMC_EPOCH_EXIT();
if (!is_using_hwpmcs) if (!is_using_hwpmcs)
return; return;
@ -5327,11 +5330,11 @@ pmc_process_threadcreate(struct thread *td)
{ {
struct pmc_owner *po; struct pmc_owner *po;
epoch_enter_preempt(global_epoch_preempt); PMC_EPOCH_ENTER();
CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
if (po->po_flags & PMC_PO_OWNS_LOGFILE) if (po->po_flags & PMC_PO_OWNS_LOGFILE)
pmclog_process_threadcreate(po, td, 1); pmclog_process_threadcreate(po, td, 1);
epoch_exit_preempt(global_epoch_preempt); PMC_EPOCH_EXIT();
} }
static void static void
@ -5339,11 +5342,11 @@ pmc_process_threadexit(struct thread *td)
{ {
struct pmc_owner *po; struct pmc_owner *po;
epoch_enter_preempt(global_epoch_preempt); PMC_EPOCH_ENTER();
CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
if (po->po_flags & PMC_PO_OWNS_LOGFILE) if (po->po_flags & PMC_PO_OWNS_LOGFILE)
pmclog_process_threadexit(po, td); pmclog_process_threadexit(po, td);
epoch_exit_preempt(global_epoch_preempt); PMC_EPOCH_EXIT();
} }
static void static void
@ -5351,11 +5354,11 @@ pmc_process_proccreate(struct proc *p)
{ {
struct pmc_owner *po; struct pmc_owner *po;
epoch_enter_preempt(global_epoch_preempt); PMC_EPOCH_ENTER();
CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
if (po->po_flags & PMC_PO_OWNS_LOGFILE) if (po->po_flags & PMC_PO_OWNS_LOGFILE)
pmclog_process_proccreate(po, p, 1 /* sync */); pmclog_process_proccreate(po, p, 1 /* sync */);
epoch_exit_preempt(global_epoch_preempt); PMC_EPOCH_EXIT();
} }
static void static void
@ -5388,12 +5391,12 @@ pmc_kld_load(void *arg __unused, linker_file_t lf)
/* /*
* Notify owners of system sampling PMCs about KLD operations. * Notify owners of system sampling PMCs about KLD operations.
*/ */
epoch_enter_preempt(global_epoch_preempt); PMC_EPOCH_ENTER();
CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
if (po->po_flags & PMC_PO_OWNS_LOGFILE) if (po->po_flags & PMC_PO_OWNS_LOGFILE)
pmclog_process_map_in(po, (pid_t) -1, pmclog_process_map_in(po, (pid_t) -1,
(uintfptr_t) lf->address, lf->filename); (uintfptr_t) lf->address, lf->filename);
epoch_exit_preempt(global_epoch_preempt); PMC_EPOCH_EXIT();
/* /*
* TODO: Notify owners of (all) process-sampling PMCs too. * TODO: Notify owners of (all) process-sampling PMCs too.
@ -5406,12 +5409,12 @@ pmc_kld_unload(void *arg __unused, const char *filename __unused,
{ {
struct pmc_owner *po; struct pmc_owner *po;
epoch_enter_preempt(global_epoch_preempt); PMC_EPOCH_ENTER();
CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
if (po->po_flags & PMC_PO_OWNS_LOGFILE) if (po->po_flags & PMC_PO_OWNS_LOGFILE)
pmclog_process_map_out(po, (pid_t) -1, pmclog_process_map_out(po, (pid_t) -1,
(uintfptr_t) address, (uintfptr_t) address + size); (uintfptr_t) address, (uintfptr_t) address + size);
epoch_exit_preempt(global_epoch_preempt); PMC_EPOCH_EXIT();
/* /*
* TODO: Notify owners of process-sampling PMCs. * TODO: Notify owners of process-sampling PMCs.

View File

@ -58,18 +58,10 @@ static MALLOC_DEFINE(M_EPOCH, "epoch", "epoch based reclamation");
#define MAX_ADAPTIVE_SPIN 1000 #define MAX_ADAPTIVE_SPIN 1000
#define MAX_EPOCHS 64 #define MAX_EPOCHS 64
#ifdef __amd64__
#define EPOCH_ALIGN CACHE_LINE_SIZE*2
#else
#define EPOCH_ALIGN CACHE_LINE_SIZE
#endif
CTASSERT(sizeof(epoch_section_t) == sizeof(ck_epoch_section_t));
CTASSERT(sizeof(ck_epoch_entry_t) == sizeof(struct epoch_context)); CTASSERT(sizeof(ck_epoch_entry_t) == sizeof(struct epoch_context));
SYSCTL_NODE(_kern, OID_AUTO, epoch, CTLFLAG_RW, 0, "epoch information"); SYSCTL_NODE(_kern, OID_AUTO, epoch, CTLFLAG_RW, 0, "epoch information");
SYSCTL_NODE(_kern_epoch, OID_AUTO, stats, CTLFLAG_RW, 0, "epoch stats"); SYSCTL_NODE(_kern_epoch, OID_AUTO, stats, CTLFLAG_RW, 0, "epoch stats");
/* Stats. */ /* Stats. */
static counter_u64_t block_count; static counter_u64_t block_count;
@ -100,26 +92,8 @@ TAILQ_HEAD (threadlist, thread);
CK_STACK_CONTAINER(struct ck_epoch_entry, stack_entry, CK_STACK_CONTAINER(struct ck_epoch_entry, stack_entry,
ck_epoch_entry_container) ck_epoch_entry_container)
typedef struct epoch_record {
ck_epoch_record_t er_record;
volatile struct threadlist er_tdlist;
volatile uint32_t er_gen;
uint32_t er_cpuid;
} *epoch_record_t;
struct epoch_pcpu_state { epoch_t allepochs[MAX_EPOCHS];
struct epoch_record eps_record;
} __aligned(EPOCH_ALIGN);
struct epoch {
struct ck_epoch e_epoch __aligned(EPOCH_ALIGN);
struct epoch_pcpu_state *e_pcpu_dom[MAXMEMDOM] __aligned(EPOCH_ALIGN);
int e_idx;
int e_flags;
struct epoch_pcpu_state *e_pcpu[0];
};
epoch_t allepochs[MAX_EPOCHS];
DPCPU_DEFINE(struct grouptask, epoch_cb_task); DPCPU_DEFINE(struct grouptask, epoch_cb_task);
DPCPU_DEFINE(int, epoch_cb_count); DPCPU_DEFINE(int, epoch_cb_count);
@ -192,17 +166,15 @@ static void
epoch_init_numa(epoch_t epoch) epoch_init_numa(epoch_t epoch)
{ {
int domain, cpu_offset; int domain, cpu_offset;
struct epoch_pcpu_state *eps;
epoch_record_t er; epoch_record_t er;
for (domain = 0; domain < vm_ndomains; domain++) { for (domain = 0; domain < vm_ndomains; domain++) {
eps = malloc_domain(sizeof(*eps) * domcount[domain], M_EPOCH, er = malloc_domain(sizeof(*er) * domcount[domain], M_EPOCH,
domain, M_ZERO | M_WAITOK); domain, M_ZERO | M_WAITOK);
epoch->e_pcpu_dom[domain] = eps; epoch->e_pcpu_dom[domain] = er;
cpu_offset = domoffsets[domain]; cpu_offset = domoffsets[domain];
for (int i = 0; i < domcount[domain]; i++, eps++) { for (int i = 0; i < domcount[domain]; i++, er++) {
epoch->e_pcpu[cpu_offset + i] = eps; epoch->e_pcpu[cpu_offset + i] = er;
er = &eps->eps_record;
ck_epoch_register(&epoch->e_epoch, &er->er_record, NULL); ck_epoch_register(&epoch->e_epoch, &er->er_record, NULL);
TAILQ_INIT((struct threadlist *)(uintptr_t)&er->er_tdlist); TAILQ_INIT((struct threadlist *)(uintptr_t)&er->er_tdlist);
er->er_cpuid = cpu_offset + i; er->er_cpuid = cpu_offset + i;
@ -213,14 +185,12 @@ epoch_init_numa(epoch_t epoch)
static void static void
epoch_init_legacy(epoch_t epoch) epoch_init_legacy(epoch_t epoch)
{ {
struct epoch_pcpu_state *eps;
epoch_record_t er; epoch_record_t er;
eps = malloc(sizeof(*eps) * mp_ncpus, M_EPOCH, M_ZERO | M_WAITOK); er = malloc(sizeof(*er) * mp_ncpus, M_EPOCH, M_ZERO | M_WAITOK);
epoch->e_pcpu_dom[0] = eps; epoch->e_pcpu_dom[0] = er;
for (int i = 0; i < mp_ncpus; i++, eps++) { for (int i = 0; i < mp_ncpus; i++, er++) {
epoch->e_pcpu[i] = eps; epoch->e_pcpu[i] = er;
er = &eps->eps_record;
ck_epoch_register(&epoch->e_epoch, &er->er_record, NULL); ck_epoch_register(&epoch->e_epoch, &er->er_record, NULL);
TAILQ_INIT((struct threadlist *)(uintptr_t)&er->er_tdlist); TAILQ_INIT((struct threadlist *)(uintptr_t)&er->er_tdlist);
er->er_cpuid = i; er->er_cpuid = i;
@ -253,12 +223,12 @@ epoch_free(epoch_t epoch)
{ {
int domain; int domain;
#ifdef INVARIANTS #ifdef INVARIANTS
struct epoch_pcpu_state *eps; struct epoch_record *er;
int cpu; int cpu;
CPU_FOREACH(cpu) { CPU_FOREACH(cpu) {
eps = epoch->e_pcpu[cpu]; er = epoch->e_pcpu[cpu];
MPASS(TAILQ_EMPTY(&eps->eps_record.er_tdlist)); MPASS(TAILQ_EMPTY(&er->er_tdlist));
} }
#endif #endif
allepochs[epoch->e_idx] = NULL; allepochs[epoch->e_idx] = NULL;
@ -271,95 +241,32 @@ epoch_free(epoch_t epoch)
free(epoch, M_EPOCH); free(epoch, M_EPOCH);
} }
#define INIT_CHECK(epoch) \
do { \
if (__predict_false((epoch) == NULL)) \
return; \
} while (0)
void void
epoch_enter_preempt_internal(epoch_t epoch, struct thread *td) epoch_enter_preempt_KBI(epoch_t epoch, epoch_tracker_t et)
{ {
struct epoch_pcpu_state *eps;
MPASS(cold || epoch != NULL); epoch_enter_preempt(epoch, et);
INIT_CHECK(epoch);
MPASS(epoch->e_flags & EPOCH_PREEMPT);
critical_enter();
td->td_pre_epoch_prio = td->td_priority;
eps = epoch->e_pcpu[curcpu];
#ifdef INVARIANTS
MPASS(td->td_epochnest < UCHAR_MAX - 2);
if (td->td_epochnest > 1) {
struct thread *curtd;
int found = 0;
TAILQ_FOREACH(curtd, &eps->eps_record.er_tdlist, td_epochq)
if (curtd == td)
found = 1;
KASSERT(found, ("recursing on a second epoch"));
critical_exit();
return;
}
#endif
TAILQ_INSERT_TAIL(&eps->eps_record.er_tdlist, td, td_epochq);
sched_pin();
ck_epoch_begin(&eps->eps_record.er_record, (ck_epoch_section_t *)&td->td_epoch_section);
critical_exit();
}
void
epoch_enter(epoch_t epoch)
{
ck_epoch_record_t *record;
struct thread *td;
MPASS(cold || epoch != NULL);
INIT_CHECK(epoch);
td = curthread;
critical_enter();
td->td_epochnest++;
record = &epoch->e_pcpu[curcpu]->eps_record.er_record;
ck_epoch_begin(record, NULL);
} }
void void
epoch_exit_preempt_internal(epoch_t epoch, struct thread *td) epoch_exit_preempt_KBI(epoch_t epoch, epoch_tracker_t et)
{ {
struct epoch_pcpu_state *eps;
MPASS(td->td_epochnest == 0); epoch_exit_preempt(epoch, et);
INIT_CHECK(epoch);
critical_enter();
eps = epoch->e_pcpu[curcpu];
MPASS(epoch->e_flags & EPOCH_PREEMPT);
ck_epoch_end(&eps->eps_record.er_record, (ck_epoch_section_t *)&td->td_epoch_section);
TAILQ_REMOVE(&eps->eps_record.er_tdlist, td, td_epochq);
eps->eps_record.er_gen++;
sched_unpin();
if (__predict_false(td->td_pre_epoch_prio != td->td_priority)) {
thread_lock(td);
sched_prio(td, td->td_pre_epoch_prio);
thread_unlock(td);
}
critical_exit();
} }
void void
epoch_exit(epoch_t epoch) epoch_enter_KBI(epoch_t epoch)
{ {
ck_epoch_record_t *record;
struct thread *td;
INIT_CHECK(epoch); epoch_enter(epoch);
td = curthread; }
td->td_epochnest--;
record = &epoch->e_pcpu[curcpu]->eps_record.er_record; void
ck_epoch_end(record, NULL); epoch_exit_KBI(epoch_t epoch)
critical_exit(); {
epoch_exit(epoch);
} }
/* /*
@ -371,7 +278,8 @@ epoch_block_handler_preempt(struct ck_epoch *global __unused, ck_epoch_record_t
void *arg __unused) void *arg __unused)
{ {
epoch_record_t record; epoch_record_t record;
struct thread *td, *tdwait, *owner; struct thread *td, *owner, *curwaittd;
struct epoch_thread *tdwait;
struct turnstile *ts; struct turnstile *ts;
struct lock_object *lock; struct lock_object *lock;
int spincount, gen; int spincount, gen;
@ -389,13 +297,13 @@ epoch_block_handler_preempt(struct ck_epoch *global __unused, ck_epoch_record_t
* overhead of a migration * overhead of a migration
*/ */
if ((tdwait = TAILQ_FIRST(&record->er_tdlist)) != NULL && if ((tdwait = TAILQ_FIRST(&record->er_tdlist)) != NULL &&
TD_IS_RUNNING(tdwait)) { TD_IS_RUNNING(tdwait->et_td)) {
gen = record->er_gen; gen = record->er_gen;
thread_unlock(td); thread_unlock(td);
do { do {
cpu_spinwait(); cpu_spinwait();
} while (tdwait == TAILQ_FIRST(&record->er_tdlist) && } while (tdwait == TAILQ_FIRST(&record->er_tdlist) &&
gen == record->er_gen && TD_IS_RUNNING(tdwait) && gen == record->er_gen && TD_IS_RUNNING(tdwait->et_td) &&
spincount++ < MAX_ADAPTIVE_SPIN); spincount++ < MAX_ADAPTIVE_SPIN);
thread_lock(td); thread_lock(td);
return; return;
@ -426,28 +334,29 @@ epoch_block_handler_preempt(struct ck_epoch *global __unused, ck_epoch_record_t
* priority thread (highest prio value) and drop our priority * priority thread (highest prio value) and drop our priority
* to match to allow it to run. * to match to allow it to run.
*/ */
TAILQ_FOREACH(tdwait, &record->er_tdlist, td_epochq) { TAILQ_FOREACH(tdwait, &record->er_tdlist, et_link) {
/* /*
* Propagate our priority to any other waiters to prevent us * Propagate our priority to any other waiters to prevent us
* from starving them. They will have their original priority * from starving them. They will have their original priority
* restore on exit from epoch_wait(). * restore on exit from epoch_wait().
*/ */
if (!TD_IS_INHIBITED(tdwait) && tdwait->td_priority > td->td_priority) { curwaittd = tdwait->et_td;
if (!TD_IS_INHIBITED(curwaittd) && curwaittd->td_priority > td->td_priority) {
critical_enter(); critical_enter();
thread_unlock(td); thread_unlock(td);
thread_lock(tdwait); thread_lock(curwaittd);
sched_prio(tdwait, td->td_priority); sched_prio(curwaittd, td->td_priority);
thread_unlock(tdwait); thread_unlock(curwaittd);
thread_lock(td); thread_lock(td);
critical_exit(); critical_exit();
} }
if (TD_IS_INHIBITED(tdwait) && TD_ON_LOCK(tdwait) && if (TD_IS_INHIBITED(curwaittd) && TD_ON_LOCK(curwaittd) &&
((ts = tdwait->td_blocked) != NULL)) { ((ts = curwaittd->td_blocked) != NULL)) {
/* /*
* We unlock td to allow turnstile_wait to reacquire the * We unlock td to allow turnstile_wait to reacquire the
* the thread lock. Before unlocking it we enter a critical * the thread lock. Before unlocking it we enter a critical
* section to prevent preemption after we reenable interrupts * section to prevent preemption after we reenable interrupts
* by dropping the thread lock in order to prevent tdwait * by dropping the thread lock in order to prevent curwaittd
* from getting to run. * from getting to run.
*/ */
critical_enter(); critical_enter();
@ -456,15 +365,15 @@ epoch_block_handler_preempt(struct ck_epoch *global __unused, ck_epoch_record_t
/* /*
* The owner pointer indicates that the lock succeeded. Only * The owner pointer indicates that the lock succeeded. Only
* in case we hold the lock and the turnstile we locked is still * in case we hold the lock and the turnstile we locked is still
* the one that tdwait is blocked on can we continue. Otherwise * the one that curwaittd is blocked on can we continue. Otherwise
* The turnstile pointer has been changed out from underneath * The turnstile pointer has been changed out from underneath
* us, as in the case where the lock holder has signalled tdwait, * us, as in the case where the lock holder has signalled curwaittd,
* and we need to continue. * and we need to continue.
*/ */
if (owner != NULL && ts == tdwait->td_blocked) { if (owner != NULL && ts == curwaittd->td_blocked) {
MPASS(TD_IS_INHIBITED(tdwait) && TD_ON_LOCK(tdwait)); MPASS(TD_IS_INHIBITED(curwaittd) && TD_ON_LOCK(curwaittd));
critical_exit(); critical_exit();
turnstile_wait(ts, owner, tdwait->td_tsqueue); turnstile_wait(ts, owner, curwaittd->td_tsqueue);
counter_u64_add(turnstile_count, 1); counter_u64_add(turnstile_count, 1);
thread_lock(td); thread_lock(td);
return; return;
@ -569,7 +478,7 @@ epoch_wait(epoch_t epoch)
void void
epoch_call(epoch_t epoch, epoch_context_t ctx, void (*callback) (epoch_context_t)) epoch_call(epoch_t epoch, epoch_context_t ctx, void (*callback) (epoch_context_t))
{ {
struct epoch_pcpu_state *eps; epoch_record_t er;
ck_epoch_entry_t *cb; ck_epoch_entry_t *cb;
cb = (void *)ctx; cb = (void *)ctx;
@ -585,8 +494,8 @@ epoch_call(epoch_t epoch, epoch_context_t ctx, void (*callback) (epoch_context_t
critical_enter(); critical_enter();
*DPCPU_PTR(epoch_cb_count) += 1; *DPCPU_PTR(epoch_cb_count) += 1;
eps = epoch->e_pcpu[curcpu]; er = epoch->e_pcpu[curcpu];
ck_epoch_call(&eps->eps_record.er_record, cb, (ck_epoch_cb_t *)callback); ck_epoch_call(&er->er_record, cb, (ck_epoch_cb_t *)callback);
critical_exit(); critical_exit();
return; return;
boottime: boottime:
@ -608,7 +517,7 @@ epoch_call_task(void *arg __unused)
for (total = i = 0; i < epoch_count; i++) { for (total = i = 0; i < epoch_count; i++) {
if (__predict_false((epoch = allepochs[i]) == NULL)) if (__predict_false((epoch = allepochs[i]) == NULL))
continue; continue;
record = &epoch->e_pcpu[curcpu]->eps_record.er_record; record = &epoch->e_pcpu[curcpu]->er_record;
if ((npending = record->n_pending) == 0) if ((npending = record->n_pending) == 0)
continue; continue;
ck_epoch_poll_deferred(record, &cb_stack); ck_epoch_poll_deferred(record, &cb_stack);
@ -632,7 +541,47 @@ epoch_call_task(void *arg __unused)
} }
int int
in_epoch(void) in_epoch_verbose(epoch_t epoch, int dump_onfail)
{ {
return (curthread->td_epochnest != 0); struct epoch_thread *tdwait;
struct thread *td;
epoch_record_t er;
td = curthread;
if (td->td_epochnest == 0)
return (0);
if (__predict_false((epoch) == NULL))
return (0);
critical_enter();
er = epoch->e_pcpu[curcpu];
TAILQ_FOREACH(tdwait, &er->er_tdlist, et_link)
if (tdwait->et_td == td) {
critical_exit();
return (1);
}
#ifdef INVARIANTS
if (dump_onfail) {
MPASS(td->td_pinned);
printf("cpu: %d id: %d\n", curcpu, td->td_tid);
TAILQ_FOREACH(tdwait, &er->er_tdlist, et_link)
printf("td_tid: %d ", tdwait->et_td->td_tid);
printf("\n");
}
#endif
critical_exit();
return (0);
}
int
in_epoch(epoch_t epoch)
{
return (in_epoch_verbose(epoch, 0));
}
void
epoch_adjust_prio(struct thread *td, u_char prio)
{
thread_lock(td);
sched_prio(td, prio);
thread_unlock(td);
} }

View File

@ -1760,29 +1760,35 @@ if_data_copy(struct ifnet *ifp, struct if_data *ifd)
void void
if_addr_rlock(struct ifnet *ifp) if_addr_rlock(struct ifnet *ifp)
{ {
MPASS(*(uint64_t *)&ifp->if_addr_et == 0);
IF_ADDR_RLOCK(ifp); epoch_enter_preempt(net_epoch_preempt, &ifp->if_addr_et);
} }
void void
if_addr_runlock(struct ifnet *ifp) if_addr_runlock(struct ifnet *ifp)
{ {
epoch_exit_preempt(net_epoch_preempt, &ifp->if_addr_et);
IF_ADDR_RUNLOCK(ifp); #ifdef INVARIANTS
bzero(&ifp->if_addr_et, sizeof(struct epoch_tracker));
#endif
} }
void void
if_maddr_rlock(if_t ifp) if_maddr_rlock(if_t ifp)
{ {
IF_ADDR_RLOCK((struct ifnet *)ifp); MPASS(*(uint64_t *)&ifp->if_maddr_et == 0);
epoch_enter_preempt(net_epoch_preempt, &ifp->if_maddr_et);
} }
void void
if_maddr_runlock(if_t ifp) if_maddr_runlock(if_t ifp)
{ {
IF_ADDR_RUNLOCK((struct ifnet *)ifp); epoch_exit_preempt(net_epoch_preempt, &ifp->if_maddr_et);
#ifdef INVARIANTS
bzero(&ifp->if_maddr_et, sizeof(struct epoch_tracker));
#endif
} }
/* /*
@ -1926,7 +1932,7 @@ ifa_ifwithaddr(const struct sockaddr *addr)
struct ifnet *ifp; struct ifnet *ifp;
struct ifaddr *ifa; struct ifaddr *ifa;
MPASS(in_epoch()); MPASS(in_epoch(net_epoch_preempt));
CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family != addr->sa_family) if (ifa->ifa_addr->sa_family != addr->sa_family)
@ -1969,7 +1975,7 @@ ifa_ifwithbroadaddr(const struct sockaddr *addr, int fibnum)
struct ifnet *ifp; struct ifnet *ifp;
struct ifaddr *ifa; struct ifaddr *ifa;
MPASS(in_epoch()); MPASS(in_epoch(net_epoch_preempt));
CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum)) if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum))
continue; continue;
@ -1999,7 +2005,7 @@ ifa_ifwithdstaddr(const struct sockaddr *addr, int fibnum)
struct ifnet *ifp; struct ifnet *ifp;
struct ifaddr *ifa; struct ifaddr *ifa;
MPASS(in_epoch()); MPASS(in_epoch(net_epoch_preempt));
CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
if ((ifp->if_flags & IFF_POINTOPOINT) == 0) if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
continue; continue;
@ -2032,7 +2038,7 @@ ifa_ifwithnet(const struct sockaddr *addr, int ignore_ptp, int fibnum)
u_int af = addr->sa_family; u_int af = addr->sa_family;
const char *addr_data = addr->sa_data, *cplim; const char *addr_data = addr->sa_data, *cplim;
MPASS(in_epoch()); MPASS(in_epoch(net_epoch_preempt));
/* /*
* AF_LINK addresses can be looked up directly by their index number, * AF_LINK addresses can be looked up directly by their index number,
* so do that if we can. * so do that if we can.
@ -2069,7 +2075,6 @@ next: continue;
*/ */
if (ifa->ifa_dstaddr != NULL && if (ifa->ifa_dstaddr != NULL &&
sa_equal(addr, ifa->ifa_dstaddr)) { sa_equal(addr, ifa->ifa_dstaddr)) {
IF_ADDR_RUNLOCK(ifp);
goto done; goto done;
} }
} else { } else {
@ -2128,7 +2133,8 @@ ifaof_ifpforaddr(const struct sockaddr *addr, struct ifnet *ifp)
if (af >= AF_MAX) if (af >= AF_MAX)
return (NULL); return (NULL);
MPASS(in_epoch());
MPASS(in_epoch(net_epoch_preempt));
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family != af) if (ifa->ifa_addr->sa_family != af)
continue; continue;

View File

@ -96,8 +96,8 @@ struct etherip_header {
/* mbuf adjust factor to force 32-bit alignment of IP header */ /* mbuf adjust factor to force 32-bit alignment of IP header */
#define ETHERIP_ALIGN 2 #define ETHERIP_ALIGN 2
#define GIF_RLOCK() epoch_enter_preempt(net_epoch_preempt) #define GIF_RLOCK() struct epoch_tracker gif_et; epoch_enter_preempt(net_epoch_preempt, &gif_et)
#define GIF_RUNLOCK() epoch_exit_preempt(net_epoch_preempt) #define GIF_RUNLOCK() epoch_exit_preempt(net_epoch_preempt, &gif_et)
#define GIF_WAIT() epoch_wait_preempt(net_epoch_preempt) #define GIF_WAIT() epoch_wait_preempt(net_epoch_preempt)
/* Prototypes */ /* Prototypes */

View File

@ -91,8 +91,8 @@ MALLOC_DECLARE(M_GRE);
#endif #endif
#define GRE2IFP(sc) ((sc)->gre_ifp) #define GRE2IFP(sc) ((sc)->gre_ifp)
#define GRE_RLOCK() epoch_enter_preempt(net_epoch_preempt) #define GRE_RLOCK() struct epoch_tracker gre_et; epoch_enter_preempt(net_epoch_preempt, &gre_et)
#define GRE_RUNLOCK() epoch_exit_preempt(net_epoch_preempt) #define GRE_RUNLOCK() epoch_exit_preempt(net_epoch_preempt, &gre_et)
#define GRE_WAIT() epoch_wait_preempt(net_epoch_preempt) #define GRE_WAIT() epoch_wait_preempt(net_epoch_preempt)
#define gre_hdr gre_uhdr.hdr #define gre_hdr gre_uhdr.hdr

View File

@ -73,10 +73,10 @@ __FBSDID("$FreeBSD$");
#include <net/if_lagg.h> #include <net/if_lagg.h>
#include <net/ieee8023ad_lacp.h> #include <net/ieee8023ad_lacp.h>
#define LAGG_RLOCK() epoch_enter_preempt(net_epoch_preempt) #define LAGG_RLOCK() struct epoch_tracker lagg_et; epoch_enter_preempt(net_epoch_preempt, &lagg_et)
#define LAGG_RUNLOCK() epoch_exit_preempt(net_epoch_preempt) #define LAGG_RUNLOCK() epoch_exit_preempt(net_epoch_preempt, &lagg_et)
#define LAGG_RLOCK_ASSERT() MPASS(in_epoch()) #define LAGG_RLOCK_ASSERT() MPASS(in_epoch(net_epoch_preempt))
#define LAGG_UNLOCK_ASSERT() MPASS(!in_epoch()) #define LAGG_UNLOCK_ASSERT() MPASS(!in_epoch(net_epoch_preempt))
#define LAGG_SX_INIT(_sc) sx_init(&(_sc)->sc_sx, "if_lagg sx") #define LAGG_SX_INIT(_sc) sx_init(&(_sc)->sc_sx, "if_lagg sx")
#define LAGG_SX_DESTROY(_sc) sx_destroy(&(_sc)->sc_sx) #define LAGG_SX_DESTROY(_sc) sx_destroy(&(_sc)->sc_sx)
@ -1791,6 +1791,7 @@ struct lagg_port *
lagg_link_active(struct lagg_softc *sc, struct lagg_port *lp) lagg_link_active(struct lagg_softc *sc, struct lagg_port *lp)
{ {
struct lagg_port *lp_next, *rval = NULL; struct lagg_port *lp_next, *rval = NULL;
struct epoch_tracker net_et;
/* /*
* Search a port which reports an active link state. * Search a port which reports an active link state.
@ -1809,15 +1810,14 @@ lagg_link_active(struct lagg_softc *sc, struct lagg_port *lp)
} }
search: search:
LAGG_RLOCK(); epoch_enter_preempt(net_epoch_preempt, &net_et);
CK_SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) { CK_SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) {
if (LAGG_PORTACTIVE(lp_next)) { if (LAGG_PORTACTIVE(lp_next)) {
LAGG_RUNLOCK(); epoch_exit_preempt(net_epoch_preempt, &net_et);
rval = lp_next; return (lp_next);
goto found;
} }
} }
LAGG_RUNLOCK(); epoch_exit_preempt(net_epoch_preempt, &net_et);
found: found:
return (rval); return (rval);
} }

View File

@ -87,8 +87,8 @@ struct me_softc {
CK_LIST_HEAD(me_list, me_softc); CK_LIST_HEAD(me_list, me_softc);
#define ME2IFP(sc) ((sc)->me_ifp) #define ME2IFP(sc) ((sc)->me_ifp)
#define ME_READY(sc) ((sc)->me_src.s_addr != 0) #define ME_READY(sc) ((sc)->me_src.s_addr != 0)
#define ME_RLOCK() epoch_enter_preempt(net_epoch_preempt) #define ME_RLOCK() struct epoch_tracker me_et; epoch_enter_preempt(net_epoch_preempt, &me_et)
#define ME_RUNLOCK() epoch_exit_preempt(net_epoch_preempt) #define ME_RUNLOCK() epoch_exit_preempt(net_epoch_preempt, &me_et)
#define ME_WAIT() epoch_wait_preempt(net_epoch_preempt) #define ME_WAIT() epoch_wait_preempt(net_epoch_preempt)
#ifndef ME_HASH_SIZE #ifndef ME_HASH_SIZE
@ -315,7 +315,7 @@ me_lookup(const struct mbuf *m, int off, int proto, void **arg)
if (V_me_hashtbl == NULL) if (V_me_hashtbl == NULL)
return (0); return (0);
MPASS(in_epoch()); MPASS(in_epoch(net_epoch_preempt));
ip = mtod(m, const struct ip *); ip = mtod(m, const struct ip *);
CK_LIST_FOREACH(sc, &ME_HASH(ip->ip_dst.s_addr, CK_LIST_FOREACH(sc, &ME_HASH(ip->ip_dst.s_addr,
ip->ip_src.s_addr), chain) { ip->ip_src.s_addr), chain) {

View File

@ -381,6 +381,8 @@ struct ifnet {
*/ */
struct netdump_methods *if_netdump_methods; struct netdump_methods *if_netdump_methods;
struct epoch_context if_epoch_ctx; struct epoch_context if_epoch_ctx;
struct epoch_tracker if_addr_et;
struct epoch_tracker if_maddr_et;
/* /*
* Spare fields to be added before branching a stable branch, so * Spare fields to be added before branching a stable branch, so
@ -398,15 +400,17 @@ struct ifnet {
*/ */
#define IF_ADDR_LOCK_INIT(if) mtx_init(&(if)->if_addr_lock, "if_addr_lock", NULL, MTX_DEF) #define IF_ADDR_LOCK_INIT(if) mtx_init(&(if)->if_addr_lock, "if_addr_lock", NULL, MTX_DEF)
#define IF_ADDR_LOCK_DESTROY(if) mtx_destroy(&(if)->if_addr_lock) #define IF_ADDR_LOCK_DESTROY(if) mtx_destroy(&(if)->if_addr_lock)
#define IF_ADDR_RLOCK(if) epoch_enter_preempt(net_epoch_preempt); #define IF_ADDR_RLOCK(if) struct epoch_tracker if_addr_et; epoch_enter_preempt(net_epoch_preempt, &if_addr_et);
#define IF_ADDR_RUNLOCK(if) epoch_exit_preempt(net_epoch_preempt); #define IF_ADDR_RUNLOCK(if) epoch_exit_preempt(net_epoch_preempt, &if_addr_et);
#define IF_ADDR_WLOCK(if) mtx_lock(&(if)->if_addr_lock) #define IF_ADDR_WLOCK(if) mtx_lock(&(if)->if_addr_lock)
#define IF_ADDR_WUNLOCK(if) mtx_unlock(&(if)->if_addr_lock) #define IF_ADDR_WUNLOCK(if) mtx_unlock(&(if)->if_addr_lock)
#define IF_ADDR_LOCK_ASSERT(if) MPASS(in_epoch() || mtx_owned(&(if)->if_addr_lock)) #define IF_ADDR_LOCK_ASSERT(if) MPASS(in_epoch(net_epoch_preempt) || mtx_owned(&(if)->if_addr_lock))
#define IF_ADDR_WLOCK_ASSERT(if) mtx_assert(&(if)->if_addr_lock, MA_OWNED) #define IF_ADDR_WLOCK_ASSERT(if) mtx_assert(&(if)->if_addr_lock, MA_OWNED)
#define NET_EPOCH_ENTER() epoch_enter_preempt(net_epoch_preempt) #define NET_EPOCH_ENTER() struct epoch_tracker nep_et; epoch_enter_preempt(net_epoch_preempt, &nep_et)
#define NET_EPOCH_EXIT() epoch_exit_preempt(net_epoch_preempt) #define NET_EPOCH_ENTER_ET(et) epoch_enter_preempt(net_epoch_preempt, &(et))
#define NET_EPOCH_EXIT() epoch_exit_preempt(net_epoch_preempt, &nep_et)
#define NET_EPOCH_EXIT_ET(et) epoch_exit_preempt(net_epoch_preempt, &(et))
/* /*
@ -482,16 +486,16 @@ EVENTHANDLER_DECLARE(group_change_event, group_change_event_handler_t);
mtx_init(&(ifp)->if_afdata_lock, "if_afdata", NULL, MTX_DEF) mtx_init(&(ifp)->if_afdata_lock, "if_afdata", NULL, MTX_DEF)
#define IF_AFDATA_WLOCK(ifp) mtx_lock(&(ifp)->if_afdata_lock) #define IF_AFDATA_WLOCK(ifp) mtx_lock(&(ifp)->if_afdata_lock)
#define IF_AFDATA_RLOCK(ifp) epoch_enter_preempt(net_epoch_preempt) #define IF_AFDATA_RLOCK(ifp) struct epoch_tracker if_afdata_et; epoch_enter_preempt(net_epoch_preempt, &if_afdata_et)
#define IF_AFDATA_WUNLOCK(ifp) mtx_unlock(&(ifp)->if_afdata_lock) #define IF_AFDATA_WUNLOCK(ifp) mtx_unlock(&(ifp)->if_afdata_lock)
#define IF_AFDATA_RUNLOCK(ifp) epoch_exit_preempt(net_epoch_preempt) #define IF_AFDATA_RUNLOCK(ifp) epoch_exit_preempt(net_epoch_preempt, &if_afdata_et)
#define IF_AFDATA_LOCK(ifp) IF_AFDATA_WLOCK(ifp) #define IF_AFDATA_LOCK(ifp) IF_AFDATA_WLOCK(ifp)
#define IF_AFDATA_UNLOCK(ifp) IF_AFDATA_WUNLOCK(ifp) #define IF_AFDATA_UNLOCK(ifp) IF_AFDATA_WUNLOCK(ifp)
#define IF_AFDATA_TRYLOCK(ifp) mtx_trylock(&(ifp)->if_afdata_lock) #define IF_AFDATA_TRYLOCK(ifp) mtx_trylock(&(ifp)->if_afdata_lock)
#define IF_AFDATA_DESTROY(ifp) mtx_destroy(&(ifp)->if_afdata_lock) #define IF_AFDATA_DESTROY(ifp) mtx_destroy(&(ifp)->if_afdata_lock)
#define IF_AFDATA_LOCK_ASSERT(ifp) MPASS(in_epoch() || mtx_owned(&(ifp)->if_afdata_lock)) #define IF_AFDATA_LOCK_ASSERT(ifp) MPASS(in_epoch(net_epoch_preempt) || mtx_owned(&(ifp)->if_afdata_lock))
#define IF_AFDATA_RLOCK_ASSERT(ifp) MPASS(in_epoch()); #define IF_AFDATA_RLOCK_ASSERT(ifp) MPASS(in_epoch(net_epoch_preempt));
#define IF_AFDATA_WLOCK_ASSERT(ifp) mtx_assert(&(ifp)->if_afdata_lock, MA_OWNED) #define IF_AFDATA_WLOCK_ASSERT(ifp) mtx_assert(&(ifp)->if_afdata_lock, MA_OWNED)
#define IF_AFDATA_UNLOCK_ASSERT(ifp) mtx_assert(&(ifp)->if_afdata_lock, MA_NOTOWNED) #define IF_AFDATA_UNLOCK_ASSERT(ifp) mtx_assert(&(ifp)->if_afdata_lock, MA_NOTOWNED)
@ -573,16 +577,16 @@ extern struct sx ifnet_sxlock;
* write, but also whether it was acquired with sleep support or not. * write, but also whether it was acquired with sleep support or not.
*/ */
#define IFNET_RLOCK_ASSERT() sx_assert(&ifnet_sxlock, SA_SLOCKED) #define IFNET_RLOCK_ASSERT() sx_assert(&ifnet_sxlock, SA_SLOCKED)
#define IFNET_RLOCK_NOSLEEP_ASSERT() MPASS(in_epoch()) #define IFNET_RLOCK_NOSLEEP_ASSERT() MPASS(in_epoch(net_epoch_preempt))
#define IFNET_WLOCK_ASSERT() do { \ #define IFNET_WLOCK_ASSERT() do { \
sx_assert(&ifnet_sxlock, SA_XLOCKED); \ sx_assert(&ifnet_sxlock, SA_XLOCKED); \
rw_assert(&ifnet_rwlock, RA_WLOCKED); \ rw_assert(&ifnet_rwlock, RA_WLOCKED); \
} while (0) } while (0)
#define IFNET_RLOCK() sx_slock(&ifnet_sxlock) #define IFNET_RLOCK() sx_slock(&ifnet_sxlock)
#define IFNET_RLOCK_NOSLEEP() epoch_enter_preempt(net_epoch_preempt) #define IFNET_RLOCK_NOSLEEP() struct epoch_tracker ifnet_rlock_et; epoch_enter_preempt(net_epoch_preempt, &ifnet_rlock_et)
#define IFNET_RUNLOCK() sx_sunlock(&ifnet_sxlock) #define IFNET_RUNLOCK() sx_sunlock(&ifnet_sxlock)
#define IFNET_RUNLOCK_NOSLEEP() epoch_exit_preempt(net_epoch_preempt) #define IFNET_RUNLOCK_NOSLEEP() epoch_exit_preempt(net_epoch_preempt, &ifnet_rlock_et)
/* /*
* Look up an ifnet given its index; the _ref variant also acquires a * Look up an ifnet given its index; the _ref variant also acquires a

View File

@ -733,7 +733,7 @@ ifa_ifwithroute(int flags, const struct sockaddr *dst, struct sockaddr *gateway,
struct ifaddr *ifa; struct ifaddr *ifa;
int not_found = 0; int not_found = 0;
MPASS(in_epoch()); MPASS(in_epoch(net_epoch_preempt));
if ((flags & RTF_GATEWAY) == 0) { if ((flags & RTF_GATEWAY) == 0) {
/* /*
* If we are adding a route to an interface, * If we are adding a route to an interface,

View File

@ -1736,15 +1736,15 @@ sysctl_iflist(int af, struct walkarg *w)
struct rt_addrinfo info; struct rt_addrinfo info;
int len, error = 0; int len, error = 0;
struct sockaddr_storage ss; struct sockaddr_storage ss;
struct epoch_tracker et;
bzero((caddr_t)&info, sizeof(info)); bzero((caddr_t)&info, sizeof(info));
bzero(&ifd, sizeof(ifd)); bzero(&ifd, sizeof(ifd));
IFNET_RLOCK_NOSLEEP(); NET_EPOCH_ENTER_ET(et);
CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
if (w->w_arg && w->w_arg != ifp->if_index) if (w->w_arg && w->w_arg != ifp->if_index)
continue; continue;
if_data_copy(ifp, &ifd); if_data_copy(ifp, &ifd);
IF_ADDR_RLOCK(ifp);
ifa = ifp->if_addr; ifa = ifp->if_addr;
info.rti_info[RTAX_IFP] = ifa->ifa_addr; info.rti_info[RTAX_IFP] = ifa->ifa_addr;
error = rtsock_msg_buffer(RTM_IFINFO, &info, w, &len); error = rtsock_msg_buffer(RTM_IFINFO, &info, w, &len);
@ -1785,15 +1785,12 @@ sysctl_iflist(int af, struct walkarg *w)
goto done; goto done;
} }
} }
IF_ADDR_RUNLOCK(ifp);
info.rti_info[RTAX_IFA] = NULL; info.rti_info[RTAX_IFA] = NULL;
info.rti_info[RTAX_NETMASK] = NULL; info.rti_info[RTAX_NETMASK] = NULL;
info.rti_info[RTAX_BRD] = NULL; info.rti_info[RTAX_BRD] = NULL;
} }
done: done:
if (ifp != NULL) NET_EPOCH_EXIT_ET(et);
IF_ADDR_RUNLOCK(ifp);
IFNET_RUNLOCK_NOSLEEP();
return (error); return (error);
} }

View File

@ -48,6 +48,7 @@ __FBSDID("$FreeBSD$");
#include <sys/kernel.h> #include <sys/kernel.h>
#include <sys/sysctl.h> #include <sys/sysctl.h>
#include <sys/malloc.h> #include <sys/malloc.h>
#include <sys/proc.h>
#include <net/ethernet.h> #include <net/ethernet.h>
#include <net/if.h> #include <net/if.h>
@ -224,7 +225,7 @@ in_gif_output(struct ifnet *ifp, struct mbuf *m, int proto, uint8_t ecn)
int len; int len;
/* prepend new IP header */ /* prepend new IP header */
MPASS(in_epoch()); MPASS(in_epoch(net_epoch_preempt));
len = sizeof(struct ip); len = sizeof(struct ip);
#ifndef __NO_STRICT_ALIGNMENT #ifndef __NO_STRICT_ALIGNMENT
if (proto == IPPROTO_ETHERIP) if (proto == IPPROTO_ETHERIP)
@ -263,7 +264,7 @@ in_gif_input(struct mbuf *m, int off, int proto, void *arg)
struct ip *ip; struct ip *ip;
uint8_t ecn; uint8_t ecn;
MPASS(in_epoch()); MPASS(in_epoch(net_epoch_preempt));
if (sc == NULL) { if (sc == NULL) {
m_freem(m); m_freem(m);
KMOD_IPSTAT_INC(ips_nogif); KMOD_IPSTAT_INC(ips_nogif);
@ -292,7 +293,7 @@ in_gif_lookup(const struct mbuf *m, int off, int proto, void **arg)
if (V_ipv4_hashtbl == NULL) if (V_ipv4_hashtbl == NULL)
return (0); return (0);
MPASS(in_epoch()); MPASS(in_epoch(net_epoch_preempt));
ip = mtod(m, const struct ip *); ip = mtod(m, const struct ip *);
/* /*
* NOTE: it is safe to iterate without any locking here, because softc * NOTE: it is safe to iterate without any locking here, because softc

View File

@ -1084,7 +1084,6 @@ in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr,
ifp = ia->ia_ifp; ifp = ia->ia_ifp;
ia = NULL; ia = NULL;
IF_ADDR_RLOCK(ifp);
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
sa = ifa->ifa_addr; sa = ifa->ifa_addr;
@ -1098,10 +1097,8 @@ in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr,
} }
if (ia != NULL) { if (ia != NULL) {
laddr->s_addr = ia->ia_addr.sin_addr.s_addr; laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
IF_ADDR_RUNLOCK(ifp);
goto done; goto done;
} }
IF_ADDR_RUNLOCK(ifp);
/* 3. As a last resort return the 'default' jail address. */ /* 3. As a last resort return the 'default' jail address. */
error = prison_get_ip4(cred, laddr); error = prison_get_ip4(cred, laddr);
@ -1143,7 +1140,6 @@ in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr,
*/ */
ia = NULL; ia = NULL;
ifp = sro.ro_rt->rt_ifp; ifp = sro.ro_rt->rt_ifp;
IF_ADDR_RLOCK(ifp);
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
sa = ifa->ifa_addr; sa = ifa->ifa_addr;
if (sa->sa_family != AF_INET) if (sa->sa_family != AF_INET)
@ -1156,10 +1152,8 @@ in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr,
} }
if (ia != NULL) { if (ia != NULL) {
laddr->s_addr = ia->ia_addr.sin_addr.s_addr; laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
IF_ADDR_RUNLOCK(ifp);
goto done; goto done;
} }
IF_ADDR_RUNLOCK(ifp);
/* 3. As a last resort return the 'default' jail address. */ /* 3. As a last resort return the 'default' jail address. */
error = prison_get_ip4(cred, laddr); error = prison_get_ip4(cred, laddr);
@ -1207,9 +1201,7 @@ in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr,
ifp = ia->ia_ifp; ifp = ia->ia_ifp;
ia = NULL; ia = NULL;
IF_ADDR_RLOCK(ifp);
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
sa = ifa->ifa_addr; sa = ifa->ifa_addr;
if (sa->sa_family != AF_INET) if (sa->sa_family != AF_INET)
continue; continue;
@ -1222,10 +1214,8 @@ in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr,
} }
if (ia != NULL) { if (ia != NULL) {
laddr->s_addr = ia->ia_addr.sin_addr.s_addr; laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
IF_ADDR_RUNLOCK(ifp);
goto done; goto done;
} }
IF_ADDR_RUNLOCK(ifp);
} }
/* 3. As a last resort return the 'default' jail address. */ /* 3. As a last resort return the 'default' jail address. */
@ -1673,6 +1663,10 @@ in_pcbdrop(struct inpcb *inp)
{ {
INP_WLOCK_ASSERT(inp); INP_WLOCK_ASSERT(inp);
#ifdef INVARIANTS
if (inp->inp_socket != NULL && inp->inp_ppcb != NULL)
MPASS(inp->inp_refcount > 1);
#endif
/* /*
* XXXRW: Possibly we should protect the setting of INP_DROPPED with * XXXRW: Possibly we should protect the setting of INP_DROPPED with
@ -2251,11 +2245,12 @@ in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in_addr faddr,
struct inpcb *inp, *tmpinp; struct inpcb *inp, *tmpinp;
u_short fport = fport_arg, lport = lport_arg; u_short fport = fport_arg, lport = lport_arg;
#ifdef INVARIANTS
KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0, KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0,
("%s: invalid lookup flags %d", __func__, lookupflags)); ("%s: invalid lookup flags %d", __func__, lookupflags));
if (!mtx_owned(&pcbinfo->ipi_hash_lock))
INP_HASH_LOCK_ASSERT(pcbinfo); MPASS(in_epoch_verbose(net_epoch_preempt, 1));
#endif
/* /*
* First look for an exact match. * First look for an exact match.
*/ */

View File

@ -632,16 +632,17 @@ int inp_so_options(const struct inpcb *inp);
#define INP_INFO_LOCK_INIT(ipi, d) \ #define INP_INFO_LOCK_INIT(ipi, d) \
mtx_init(&(ipi)->ipi_lock, (d), NULL, MTX_DEF| MTX_RECURSE) mtx_init(&(ipi)->ipi_lock, (d), NULL, MTX_DEF| MTX_RECURSE)
#define INP_INFO_LOCK_DESTROY(ipi) mtx_destroy(&(ipi)->ipi_lock) #define INP_INFO_LOCK_DESTROY(ipi) mtx_destroy(&(ipi)->ipi_lock)
#define INP_INFO_RLOCK(ipi) NET_EPOCH_ENTER() #define INP_INFO_RLOCK_ET(ipi, et) NET_EPOCH_ENTER_ET((et))
#define INP_INFO_WLOCK(ipi) mtx_lock(&(ipi)->ipi_lock) #define INP_INFO_WLOCK(ipi) mtx_lock(&(ipi)->ipi_lock)
#define INP_INFO_TRY_WLOCK(ipi) mtx_trylock(&(ipi)->ipi_lock) #define INP_INFO_TRY_WLOCK(ipi) mtx_trylock(&(ipi)->ipi_lock)
#define INP_INFO_WLOCKED(ipi) mtx_owned(&(ipi)->ipi_lock) #define INP_INFO_WLOCKED(ipi) mtx_owned(&(ipi)->ipi_lock)
#define INP_INFO_RUNLOCK(ipi) NET_EPOCH_EXIT() #define INP_INFO_RUNLOCK_ET(ipi, et) NET_EPOCH_EXIT_ET((et))
#define INP_INFO_RUNLOCK_TP(ipi, tp) NET_EPOCH_EXIT_ET(*(tp)->t_inpcb->inp_et)
#define INP_INFO_WUNLOCK(ipi) mtx_unlock(&(ipi)->ipi_lock) #define INP_INFO_WUNLOCK(ipi) mtx_unlock(&(ipi)->ipi_lock)
#define INP_INFO_LOCK_ASSERT(ipi) MPASS(in_epoch() || mtx_owned(&(ipi)->ipi_lock)) #define INP_INFO_LOCK_ASSERT(ipi) MPASS(in_epoch(net_epoch_preempt) || mtx_owned(&(ipi)->ipi_lock))
#define INP_INFO_RLOCK_ASSERT(ipi) MPASS(in_epoch()) #define INP_INFO_RLOCK_ASSERT(ipi) MPASS(in_epoch(net_epoch_preempt))
#define INP_INFO_WLOCK_ASSERT(ipi) mtx_assert(&(ipi)->ipi_lock, MA_OWNED) #define INP_INFO_WLOCK_ASSERT(ipi) mtx_assert(&(ipi)->ipi_lock, MA_OWNED)
#define INP_INFO_UNLOCK_ASSERT(ipi) MPASS(!in_epoch() && !mtx_owned(&(ipi)->ipi_lock)) #define INP_INFO_UNLOCK_ASSERT(ipi) MPASS(!in_epoch(net_epoch_preempt) && !mtx_owned(&(ipi)->ipi_lock))
#define INP_LIST_LOCK_INIT(ipi, d) \ #define INP_LIST_LOCK_INIT(ipi, d) \
rw_init_flags(&(ipi)->ipi_list_lock, (d), 0) rw_init_flags(&(ipi)->ipi_list_lock, (d), 0)
@ -664,11 +665,13 @@ int inp_so_options(const struct inpcb *inp);
#define INP_HASH_LOCK_INIT(ipi, d) mtx_init(&(ipi)->ipi_hash_lock, (d), NULL, MTX_DEF) #define INP_HASH_LOCK_INIT(ipi, d) mtx_init(&(ipi)->ipi_hash_lock, (d), NULL, MTX_DEF)
#define INP_HASH_LOCK_DESTROY(ipi) mtx_destroy(&(ipi)->ipi_hash_lock) #define INP_HASH_LOCK_DESTROY(ipi) mtx_destroy(&(ipi)->ipi_hash_lock)
#define INP_HASH_RLOCK(ipi) NET_EPOCH_ENTER() #define INP_HASH_RLOCK(ipi) struct epoch_tracker inp_hash_et; epoch_enter_preempt(net_epoch_preempt, &inp_hash_et)
#define INP_HASH_RLOCK_ET(ipi, et) epoch_enter_preempt(net_epoch_preempt, &(et))
#define INP_HASH_WLOCK(ipi) mtx_lock(&(ipi)->ipi_hash_lock) #define INP_HASH_WLOCK(ipi) mtx_lock(&(ipi)->ipi_hash_lock)
#define INP_HASH_RUNLOCK(ipi) NET_EPOCH_EXIT() #define INP_HASH_RUNLOCK(ipi) NET_EPOCH_EXIT_ET(inp_hash_et)
#define INP_HASH_RUNLOCK_ET(ipi, et) NET_EPOCH_EXIT_ET((et))
#define INP_HASH_WUNLOCK(ipi) mtx_unlock(&(ipi)->ipi_hash_lock) #define INP_HASH_WUNLOCK(ipi) mtx_unlock(&(ipi)->ipi_hash_lock)
#define INP_HASH_LOCK_ASSERT(ipi) MPASS(in_epoch() || mtx_owned(&(ipi)->ipi_hash_lock)) #define INP_HASH_LOCK_ASSERT(ipi) MPASS(in_epoch(net_epoch_preempt) || mtx_owned(&(ipi)->ipi_hash_lock))
#define INP_HASH_WLOCK_ASSERT(ipi) mtx_assert(&(ipi)->ipi_hash_lock, MA_OWNED); #define INP_HASH_WLOCK_ASSERT(ipi) mtx_assert(&(ipi)->ipi_hash_lock, MA_OWNED);
#define INP_GROUP_LOCK_INIT(ipg, d) mtx_init(&(ipg)->ipg_lock, (d), NULL, \ #define INP_GROUP_LOCK_INIT(ipg, d) mtx_init(&(ipg)->ipg_lock, (d), NULL, \

View File

@ -192,6 +192,7 @@ divert_packet(struct mbuf *m, int incoming)
u_int16_t nport; u_int16_t nport;
struct sockaddr_in divsrc; struct sockaddr_in divsrc;
struct m_tag *mtag; struct m_tag *mtag;
struct epoch_tracker et;
mtag = m_tag_locate(m, MTAG_IPFW_RULE, 0, NULL); mtag = m_tag_locate(m, MTAG_IPFW_RULE, 0, NULL);
if (mtag == NULL) { if (mtag == NULL) {
@ -272,7 +273,7 @@ divert_packet(struct mbuf *m, int incoming)
/* Put packet on socket queue, if any */ /* Put packet on socket queue, if any */
sa = NULL; sa = NULL;
nport = htons((u_int16_t)(((struct ipfw_rule_ref *)(mtag+1))->info)); nport = htons((u_int16_t)(((struct ipfw_rule_ref *)(mtag+1))->info));
INP_INFO_RLOCK(&V_divcbinfo); INP_INFO_RLOCK_ET(&V_divcbinfo, et);
CK_LIST_FOREACH(inp, &V_divcb, inp_list) { CK_LIST_FOREACH(inp, &V_divcb, inp_list) {
/* XXX why does only one socket match? */ /* XXX why does only one socket match? */
if (inp->inp_lport == nport) { if (inp->inp_lport == nport) {
@ -290,7 +291,7 @@ divert_packet(struct mbuf *m, int incoming)
break; break;
} }
} }
INP_INFO_RUNLOCK(&V_divcbinfo); INP_INFO_RUNLOCK_ET(&V_divcbinfo, et);
if (sa == NULL) { if (sa == NULL) {
m_freem(m); m_freem(m);
KMOD_IPSTAT_INC(ips_noproto); KMOD_IPSTAT_INC(ips_noproto);
@ -634,6 +635,7 @@ div_pcblist(SYSCTL_HANDLER_ARGS)
struct inpcb *inp, **inp_list; struct inpcb *inp, **inp_list;
inp_gen_t gencnt; inp_gen_t gencnt;
struct xinpgen xig; struct xinpgen xig;
struct epoch_tracker et;
/* /*
* The process of preparing the TCB list is too time-consuming and * The process of preparing the TCB list is too time-consuming and
@ -652,10 +654,10 @@ div_pcblist(SYSCTL_HANDLER_ARGS)
/* /*
* OK, now we're committed to doing something. * OK, now we're committed to doing something.
*/ */
INP_INFO_RLOCK(&V_divcbinfo); INP_INFO_WLOCK(&V_divcbinfo);
gencnt = V_divcbinfo.ipi_gencnt; gencnt = V_divcbinfo.ipi_gencnt;
n = V_divcbinfo.ipi_count; n = V_divcbinfo.ipi_count;
INP_INFO_RUNLOCK(&V_divcbinfo); INP_INFO_WUNLOCK(&V_divcbinfo);
error = sysctl_wire_old_buffer(req, error = sysctl_wire_old_buffer(req,
2 * sizeof(xig) + n*sizeof(struct xinpcb)); 2 * sizeof(xig) + n*sizeof(struct xinpcb));
@ -674,7 +676,7 @@ div_pcblist(SYSCTL_HANDLER_ARGS)
if (inp_list == NULL) if (inp_list == NULL)
return ENOMEM; return ENOMEM;
INP_INFO_RLOCK(&V_divcbinfo); INP_INFO_RLOCK_ET(&V_divcbinfo, et);
for (inp = CK_LIST_FIRST(V_divcbinfo.ipi_listhead), i = 0; inp && i < n; for (inp = CK_LIST_FIRST(V_divcbinfo.ipi_listhead), i = 0; inp && i < n;
inp = CK_LIST_NEXT(inp, inp_list)) { inp = CK_LIST_NEXT(inp, inp_list)) {
INP_WLOCK(inp); INP_WLOCK(inp);
@ -685,7 +687,7 @@ div_pcblist(SYSCTL_HANDLER_ARGS)
} }
INP_WUNLOCK(inp); INP_WUNLOCK(inp);
} }
INP_INFO_RUNLOCK(&V_divcbinfo); INP_INFO_RUNLOCK_ET(&V_divcbinfo, et);
n = i; n = i;
error = 0; error = 0;
@ -711,6 +713,7 @@ div_pcblist(SYSCTL_HANDLER_ARGS)
INP_INFO_WUNLOCK(&V_divcbinfo); INP_INFO_WUNLOCK(&V_divcbinfo);
if (!error) { if (!error) {
struct epoch_tracker et;
/* /*
* Give the user an updated idea of our state. * Give the user an updated idea of our state.
* If the generation differs from what we told * If the generation differs from what we told
@ -718,11 +721,11 @@ div_pcblist(SYSCTL_HANDLER_ARGS)
* while we were processing this request, and it * while we were processing this request, and it
* might be necessary to retry. * might be necessary to retry.
*/ */
INP_INFO_RLOCK(&V_divcbinfo); INP_INFO_RLOCK_ET(&V_divcbinfo, et);
xig.xig_gen = V_divcbinfo.ipi_gencnt; xig.xig_gen = V_divcbinfo.ipi_gencnt;
xig.xig_sogen = so_gencnt; xig.xig_sogen = so_gencnt;
xig.xig_count = V_divcbinfo.ipi_count; xig.xig_count = V_divcbinfo.ipi_count;
INP_INFO_RUNLOCK(&V_divcbinfo); INP_INFO_RUNLOCK_ET(&V_divcbinfo, et);
error = SYSCTL_OUT(req, &xig, sizeof xig); error = SYSCTL_OUT(req, &xig, sizeof xig);
} }
free(inp_list, M_TEMP); free(inp_list, M_TEMP);

View File

@ -112,8 +112,8 @@ static struct mtx encapmtx;
MTX_SYSINIT(encapmtx, &encapmtx, "encapmtx", MTX_DEF); MTX_SYSINIT(encapmtx, &encapmtx, "encapmtx", MTX_DEF);
#define ENCAP_WLOCK() mtx_lock(&encapmtx) #define ENCAP_WLOCK() mtx_lock(&encapmtx)
#define ENCAP_WUNLOCK() mtx_unlock(&encapmtx) #define ENCAP_WUNLOCK() mtx_unlock(&encapmtx)
#define ENCAP_RLOCK() epoch_enter_preempt(net_epoch_preempt) #define ENCAP_RLOCK() struct epoch_tracker encap_et; epoch_enter_preempt(net_epoch_preempt, &encap_et)
#define ENCAP_RUNLOCK() epoch_exit_preempt(net_epoch_preempt) #define ENCAP_RUNLOCK() epoch_exit_preempt(net_epoch_preempt, &encap_et)
#define ENCAP_WAIT() epoch_wait_preempt(net_epoch_preempt) #define ENCAP_WAIT() epoch_wait_preempt(net_epoch_preempt)
static struct encaptab * static struct encaptab *

View File

@ -50,6 +50,7 @@ __FBSDID("$FreeBSD$");
#include <sys/kernel.h> #include <sys/kernel.h>
#include <sys/sysctl.h> #include <sys/sysctl.h>
#include <sys/malloc.h> #include <sys/malloc.h>
#include <sys/proc.h>
#include <net/if.h> #include <net/if.h>
#include <net/if_var.h> #include <net/if_var.h>
@ -118,7 +119,7 @@ in_gre_lookup(const struct mbuf *m, int off, int proto, void **arg)
if (V_ipv4_hashtbl == NULL) if (V_ipv4_hashtbl == NULL)
return (0); return (0);
MPASS(in_epoch()); MPASS(in_epoch(net_epoch_preempt));
ip = mtod(m, const struct ip *); ip = mtod(m, const struct ip *);
CK_LIST_FOREACH(sc, &GRE_HASH(ip->ip_dst.s_addr, CK_LIST_FOREACH(sc, &GRE_HASH(ip->ip_dst.s_addr,
ip->ip_src.s_addr), chain) { ip->ip_src.s_addr), chain) {

View File

@ -285,6 +285,7 @@ rip_input(struct mbuf **mp, int *offp, int proto)
struct ip *ip = mtod(m, struct ip *); struct ip *ip = mtod(m, struct ip *);
struct inpcb *inp, *last; struct inpcb *inp, *last;
struct sockaddr_in ripsrc; struct sockaddr_in ripsrc;
struct epoch_tracker et;
int hash; int hash;
*mp = NULL; *mp = NULL;
@ -299,7 +300,7 @@ rip_input(struct mbuf **mp, int *offp, int proto)
hash = INP_PCBHASH_RAW(proto, ip->ip_src.s_addr, hash = INP_PCBHASH_RAW(proto, ip->ip_src.s_addr,
ip->ip_dst.s_addr, V_ripcbinfo.ipi_hashmask); ip->ip_dst.s_addr, V_ripcbinfo.ipi_hashmask);
INP_INFO_RLOCK(&V_ripcbinfo); INP_INFO_RLOCK_ET(&V_ripcbinfo, et);
CK_LIST_FOREACH(inp, &V_ripcbinfo.ipi_hashbase[hash], inp_hash) { CK_LIST_FOREACH(inp, &V_ripcbinfo.ipi_hashbase[hash], inp_hash) {
if (inp->inp_ip_p != proto) if (inp->inp_ip_p != proto)
continue; continue;
@ -422,7 +423,7 @@ rip_input(struct mbuf **mp, int *offp, int proto)
skip_2: skip_2:
INP_RUNLOCK(inp); INP_RUNLOCK(inp);
} }
INP_INFO_RUNLOCK(&V_ripcbinfo); INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et);
if (last != NULL) { if (last != NULL) {
if (rip_append(last, ip, m, &ripsrc) != 0) if (rip_append(last, ip, m, &ripsrc) != 0)
IPSTAT_INC(ips_delivered); IPSTAT_INC(ips_delivered);
@ -1035,6 +1036,7 @@ rip_pcblist(SYSCTL_HANDLER_ARGS)
struct inpcb *inp, **inp_list; struct inpcb *inp, **inp_list;
inp_gen_t gencnt; inp_gen_t gencnt;
struct xinpgen xig; struct xinpgen xig;
struct epoch_tracker et;
/* /*
* The process of preparing the TCB list is too time-consuming and * The process of preparing the TCB list is too time-consuming and
@ -1053,10 +1055,10 @@ rip_pcblist(SYSCTL_HANDLER_ARGS)
/* /*
* OK, now we're committed to doing something. * OK, now we're committed to doing something.
*/ */
INP_INFO_RLOCK(&V_ripcbinfo); INP_INFO_WLOCK(&V_ripcbinfo);
gencnt = V_ripcbinfo.ipi_gencnt; gencnt = V_ripcbinfo.ipi_gencnt;
n = V_ripcbinfo.ipi_count; n = V_ripcbinfo.ipi_count;
INP_INFO_RUNLOCK(&V_ripcbinfo); INP_INFO_WUNLOCK(&V_ripcbinfo);
xig.xig_len = sizeof xig; xig.xig_len = sizeof xig;
xig.xig_count = n; xig.xig_count = n;
@ -1070,7 +1072,7 @@ rip_pcblist(SYSCTL_HANDLER_ARGS)
if (inp_list == NULL) if (inp_list == NULL)
return (ENOMEM); return (ENOMEM);
INP_INFO_RLOCK(&V_ripcbinfo); INP_INFO_RLOCK_ET(&V_ripcbinfo, et);
for (inp = CK_LIST_FIRST(V_ripcbinfo.ipi_listhead), i = 0; inp && i < n; for (inp = CK_LIST_FIRST(V_ripcbinfo.ipi_listhead), i = 0; inp && i < n;
inp = CK_LIST_NEXT(inp, inp_list)) { inp = CK_LIST_NEXT(inp, inp_list)) {
INP_WLOCK(inp); INP_WLOCK(inp);
@ -1081,7 +1083,7 @@ rip_pcblist(SYSCTL_HANDLER_ARGS)
} }
INP_WUNLOCK(inp); INP_WUNLOCK(inp);
} }
INP_INFO_RUNLOCK(&V_ripcbinfo); INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et);
n = i; n = i;
error = 0; error = 0;
@ -1107,17 +1109,18 @@ rip_pcblist(SYSCTL_HANDLER_ARGS)
INP_INFO_WUNLOCK(&V_ripcbinfo); INP_INFO_WUNLOCK(&V_ripcbinfo);
if (!error) { if (!error) {
struct epoch_tracker et;
/* /*
* Give the user an updated idea of our state. If the * Give the user an updated idea of our state. If the
* generation differs from what we told her before, she knows * generation differs from what we told her before, she knows
* that something happened while we were processing this * that something happened while we were processing this
* request, and it might be necessary to retry. * request, and it might be necessary to retry.
*/ */
INP_INFO_RLOCK(&V_ripcbinfo); INP_INFO_RLOCK_ET(&V_ripcbinfo, et);
xig.xig_gen = V_ripcbinfo.ipi_gencnt; xig.xig_gen = V_ripcbinfo.ipi_gencnt;
xig.xig_sogen = so_gencnt; xig.xig_sogen = so_gencnt;
xig.xig_count = V_ripcbinfo.ipi_count; xig.xig_count = V_ripcbinfo.ipi_count;
INP_INFO_RUNLOCK(&V_ripcbinfo); INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et);
error = SYSCTL_OUT(req, &xig, sizeof xig); error = SYSCTL_OUT(req, &xig, sizeof xig);
} }
free(inp_list, M_TEMP); free(inp_list, M_TEMP);

View File

@ -998,7 +998,7 @@ __tcp_queue_to_input_locked(struct inpcb *inp, struct tcp_hpts_entry *hpts, int3
void void
tcp_queue_pkt_to_input(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th, tcp_queue_pkt_to_input(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
int32_t tlen, int32_t drop_hdrlen, uint8_t iptos, uint8_t ti_locked) int32_t tlen, int32_t drop_hdrlen, uint8_t iptos)
{ {
/* Setup packet for input first */ /* Setup packet for input first */
INP_WLOCK_ASSERT(tp->t_inpcb); INP_WLOCK_ASSERT(tp->t_inpcb);
@ -1006,7 +1006,7 @@ tcp_queue_pkt_to_input(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
m->m_pkthdr.pace_tlen = (uint16_t) tlen; m->m_pkthdr.pace_tlen = (uint16_t) tlen;
m->m_pkthdr.pace_drphdrlen = drop_hdrlen; m->m_pkthdr.pace_drphdrlen = drop_hdrlen;
m->m_pkthdr.pace_tos = iptos; m->m_pkthdr.pace_tos = iptos;
m->m_pkthdr.pace_lock = (uint8_t) ti_locked; m->m_pkthdr.pace_lock = (curthread->td_epochnest != 0);
if (tp->t_in_pkt == NULL) { if (tp->t_in_pkt == NULL) {
tp->t_in_pkt = m; tp->t_in_pkt = m;
tp->t_tail_pkt = m; tp->t_tail_pkt = m;
@ -1019,11 +1019,11 @@ tcp_queue_pkt_to_input(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
int32_t int32_t
__tcp_queue_to_input(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th, __tcp_queue_to_input(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
int32_t tlen, int32_t drop_hdrlen, uint8_t iptos, uint8_t ti_locked, int32_t line){ int32_t tlen, int32_t drop_hdrlen, uint8_t iptos, int32_t line){
struct tcp_hpts_entry *hpts; struct tcp_hpts_entry *hpts;
int32_t ret; int32_t ret;
tcp_queue_pkt_to_input(tp, m, th, tlen, drop_hdrlen, iptos, ti_locked); tcp_queue_pkt_to_input(tp, m, th, tlen, drop_hdrlen, iptos);
hpts = tcp_input_lock(tp->t_inpcb); hpts = tcp_input_lock(tp->t_inpcb);
ret = __tcp_queue_to_input_locked(tp->t_inpcb, hpts, line); ret = __tcp_queue_to_input_locked(tp->t_inpcb, hpts, line);
mtx_unlock(&hpts->p_mtx); mtx_unlock(&hpts->p_mtx);
@ -1145,6 +1145,7 @@ tcp_input_data(struct tcp_hpts_entry *hpts, struct timeval *tv)
int16_t set_cpu; int16_t set_cpu;
uint32_t did_prefetch = 0; uint32_t did_prefetch = 0;
int32_t ti_locked = TI_UNLOCKED; int32_t ti_locked = TI_UNLOCKED;
struct epoch_tracker et;
HPTS_MTX_ASSERT(hpts); HPTS_MTX_ASSERT(hpts);
while ((inp = TAILQ_FIRST(&hpts->p_input)) != NULL) { while ((inp = TAILQ_FIRST(&hpts->p_input)) != NULL) {
@ -1161,7 +1162,7 @@ tcp_input_data(struct tcp_hpts_entry *hpts, struct timeval *tv)
mtx_unlock(&hpts->p_mtx); mtx_unlock(&hpts->p_mtx);
CURVNET_SET(inp->inp_vnet); CURVNET_SET(inp->inp_vnet);
if (drop_reason) { if (drop_reason) {
INP_INFO_RLOCK(&V_tcbinfo); INP_INFO_RLOCK_ET(&V_tcbinfo, et);
ti_locked = TI_RLOCKED; ti_locked = TI_RLOCKED;
} else { } else {
ti_locked = TI_UNLOCKED; ti_locked = TI_UNLOCKED;
@ -1172,7 +1173,7 @@ tcp_input_data(struct tcp_hpts_entry *hpts, struct timeval *tv)
out: out:
hpts->p_inp = NULL; hpts->p_inp = NULL;
if (ti_locked == TI_RLOCKED) { if (ti_locked == TI_RLOCKED) {
INP_INFO_RUNLOCK(&V_tcbinfo); INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
} }
if (in_pcbrele_wlocked(inp) == 0) { if (in_pcbrele_wlocked(inp) == 0) {
INP_WUNLOCK(inp); INP_WUNLOCK(inp);
@ -1201,7 +1202,7 @@ tcp_input_data(struct tcp_hpts_entry *hpts, struct timeval *tv)
n = m->m_nextpkt; n = m->m_nextpkt;
} }
tp = tcp_drop(tp, drop_reason); tp = tcp_drop(tp, drop_reason);
INP_INFO_RUNLOCK(&V_tcbinfo); INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
if (tp == NULL) { if (tp == NULL) {
INP_WLOCK(inp); INP_WLOCK(inp);
} }
@ -1234,7 +1235,7 @@ tcp_input_data(struct tcp_hpts_entry *hpts, struct timeval *tv)
(m->m_pkthdr.pace_lock == TI_RLOCKED || (m->m_pkthdr.pace_lock == TI_RLOCKED ||
tp->t_state != TCPS_ESTABLISHED)) { tp->t_state != TCPS_ESTABLISHED)) {
ti_locked = TI_RLOCKED; ti_locked = TI_RLOCKED;
INP_INFO_RLOCK(&V_tcbinfo); INP_INFO_RLOCK_ET(&V_tcbinfo, et);
m = tp->t_in_pkt; m = tp->t_in_pkt;
} }
if (in_newts_every_tcb) { if (in_newts_every_tcb) {
@ -1270,13 +1271,15 @@ tcp_input_data(struct tcp_hpts_entry *hpts, struct timeval *tv)
/* Use the hpts specific do_segment */ /* Use the hpts specific do_segment */
(*tp->t_fb->tfb_tcp_hpts_do_segment) (m, th, inp->inp_socket, (*tp->t_fb->tfb_tcp_hpts_do_segment) (m, th, inp->inp_socket,
tp, drop_hdrlen, tp, drop_hdrlen,
tlen, iptos, ti_locked, nxt_pkt, tv); tlen, iptos, nxt_pkt, tv);
} else { } else {
/* Use the default do_segment */ /* Use the default do_segment */
(*tp->t_fb->tfb_tcp_do_segment) (m, th, inp->inp_socket, (*tp->t_fb->tfb_tcp_do_segment) (m, th, inp->inp_socket,
tp, drop_hdrlen, tp, drop_hdrlen,
tlen, iptos, ti_locked); tlen, iptos);
} }
if (ti_locked == TI_RLOCKED)
INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
/* /*
* Do segment returns unlocked we need the * Do segment returns unlocked we need the
* lock again but we also need some kasserts * lock again but we also need some kasserts
@ -1289,7 +1292,7 @@ tcp_input_data(struct tcp_hpts_entry *hpts, struct timeval *tv)
n = m->m_nextpkt; n = m->m_nextpkt;
if (m != NULL && if (m != NULL &&
m->m_pkthdr.pace_lock == TI_RLOCKED) { m->m_pkthdr.pace_lock == TI_RLOCKED) {
INP_INFO_RLOCK(&V_tcbinfo); INP_INFO_RLOCK_ET(&V_tcbinfo, et);
ti_locked = TI_RLOCKED; ti_locked = TI_RLOCKED;
} else } else
ti_locked = TI_UNLOCKED; ti_locked = TI_UNLOCKED;
@ -1316,14 +1319,14 @@ tcp_input_data(struct tcp_hpts_entry *hpts, struct timeval *tv)
if (ti_locked == TI_UNLOCKED && if (ti_locked == TI_UNLOCKED &&
(tp->t_state != TCPS_ESTABLISHED)) { (tp->t_state != TCPS_ESTABLISHED)) {
ti_locked = TI_RLOCKED; ti_locked = TI_RLOCKED;
INP_INFO_RLOCK(&V_tcbinfo); INP_INFO_RLOCK_ET(&V_tcbinfo, et);
} }
} /** end while(m) */ } /** end while(m) */
} /** end if ((m != NULL) && (m == tp->t_in_pkt)) */ } /** end if ((m != NULL) && (m == tp->t_in_pkt)) */
if (in_pcbrele_wlocked(inp) == 0) if (in_pcbrele_wlocked(inp) == 0)
INP_WUNLOCK(inp); INP_WUNLOCK(inp);
if (ti_locked == TI_RLOCKED) if (ti_locked == TI_RLOCKED)
INP_INFO_RUNLOCK(&V_tcbinfo); INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
INP_UNLOCK_ASSERT(inp); INP_UNLOCK_ASSERT(inp);
ti_locked = TI_UNLOCKED; ti_locked = TI_UNLOCKED;

View File

@ -238,10 +238,10 @@ int
#define tcp_queue_to_input_locked(a, b) __tcp_queue_to_input_locked(a, b, __LINE__); #define tcp_queue_to_input_locked(a, b) __tcp_queue_to_input_locked(a, b, __LINE__);
void void
tcp_queue_pkt_to_input(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th, tcp_queue_pkt_to_input(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
int32_t tlen, int32_t drop_hdrlen, uint8_t iptos, uint8_t ti_locked); int32_t tlen, int32_t drop_hdrlen, uint8_t iptos);
int int
__tcp_queue_to_input(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th, __tcp_queue_to_input(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
int32_t tlen, int32_t drop_hdrlen, uint8_t iptos, uint8_t ti_locked, int32_t line); int32_t tlen, int32_t drop_hdrlen, uint8_t iptos, int32_t line);
#define tcp_queue_to_input(a, b, c, d, e, f, g) __tcp_queue_to_input(a, b, c, d, e, f, g, __LINE__) #define tcp_queue_to_input(a, b, c, d, e, f, g) __tcp_queue_to_input(a, b, c, d, e, f, g, __LINE__)
uint16_t tcp_hpts_delayedby(struct inpcb *inp); uint16_t tcp_hpts_delayedby(struct inpcb *inp);

View File

@ -583,6 +583,7 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
int rstreason = 0; /* For badport_bandlim accounting purposes */ int rstreason = 0; /* For badport_bandlim accounting purposes */
uint8_t iptos; uint8_t iptos;
struct m_tag *fwd_tag = NULL; struct m_tag *fwd_tag = NULL;
struct epoch_tracker et;
#ifdef INET6 #ifdef INET6
struct ip6_hdr *ip6 = NULL; struct ip6_hdr *ip6 = NULL;
int isipv6; int isipv6;
@ -773,7 +774,7 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
* connection in TIMEWAIT and SYNs not targeting a listening socket. * connection in TIMEWAIT and SYNs not targeting a listening socket.
*/ */
if ((thflags & (TH_FIN | TH_RST)) != 0) { if ((thflags & (TH_FIN | TH_RST)) != 0) {
INP_INFO_RLOCK(&V_tcbinfo); INP_INFO_RLOCK_ET(&V_tcbinfo, et);
ti_locked = TI_RLOCKED; ti_locked = TI_RLOCKED;
} else } else
ti_locked = TI_UNLOCKED; ti_locked = TI_UNLOCKED;
@ -962,7 +963,7 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
*/ */
if (inp->inp_flags & INP_TIMEWAIT) { if (inp->inp_flags & INP_TIMEWAIT) {
if (ti_locked == TI_UNLOCKED) { if (ti_locked == TI_UNLOCKED) {
INP_INFO_RLOCK(); INP_INFO_RLOCK_ET(&V_tcbinfo, et);
ti_locked = TI_RLOCKED; ti_locked = TI_RLOCKED;
} }
INP_INFO_RLOCK_ASSERT(&V_tcbinfo); INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
@ -974,7 +975,7 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
*/ */
if (tcp_twcheck(inp, &to, th, m, tlen)) if (tcp_twcheck(inp, &to, th, m, tlen))
goto findpcb; goto findpcb;
INP_INFO_RUNLOCK(&V_tcbinfo); INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
return (IPPROTO_DONE); return (IPPROTO_DONE);
} }
/* /*
@ -1011,7 +1012,7 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
(tp->t_state == TCPS_LISTEN && (thflags & TH_SYN) && (tp->t_state == TCPS_LISTEN && (thflags & TH_SYN) &&
!IS_FASTOPEN(tp->t_flags)))) { !IS_FASTOPEN(tp->t_flags)))) {
if (ti_locked == TI_UNLOCKED) { if (ti_locked == TI_UNLOCKED) {
INP_INFO_RLOCK(); INP_INFO_RLOCK_ET(&V_tcbinfo, et);
ti_locked = TI_RLOCKED; ti_locked = TI_RLOCKED;
} }
INP_INFO_RLOCK_ASSERT(&V_tcbinfo); INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
@ -1145,8 +1146,9 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
* the mbuf chain and unlocks the inpcb. * the mbuf chain and unlocks the inpcb.
*/ */
tp->t_fb->tfb_tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen, tp->t_fb->tfb_tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen,
iptos, ti_locked); iptos);
INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); if (ti_locked == TI_RLOCKED)
INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
return (IPPROTO_DONE); return (IPPROTO_DONE);
} }
/* /*
@ -1350,7 +1352,7 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
* Only the listen socket is unlocked by syncache_add(). * Only the listen socket is unlocked by syncache_add().
*/ */
if (ti_locked == TI_RLOCKED) { if (ti_locked == TI_RLOCKED) {
INP_INFO_RUNLOCK(&V_tcbinfo); INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
ti_locked = TI_UNLOCKED; ti_locked = TI_UNLOCKED;
} }
INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
@ -1384,15 +1386,16 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
* state. tcp_do_segment() always consumes the mbuf chain, unlocks * state. tcp_do_segment() always consumes the mbuf chain, unlocks
* the inpcb, and unlocks pcbinfo. * the inpcb, and unlocks pcbinfo.
*/ */
tp->t_fb->tfb_tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen, iptos, ti_locked); tp->t_fb->tfb_tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen, iptos);
INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); if (ti_locked == TI_RLOCKED)
INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
return (IPPROTO_DONE); return (IPPROTO_DONE);
dropwithreset: dropwithreset:
TCP_PROBE5(receive, NULL, tp, m, tp, th); TCP_PROBE5(receive, NULL, tp, m, tp, th);
if (ti_locked == TI_RLOCKED) { if (ti_locked == TI_RLOCKED) {
INP_INFO_RUNLOCK(&V_tcbinfo); INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
ti_locked = TI_UNLOCKED; ti_locked = TI_UNLOCKED;
} }
#ifdef INVARIANTS #ifdef INVARIANTS
@ -1416,7 +1419,7 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
TCP_PROBE5(receive, NULL, tp, m, tp, th); TCP_PROBE5(receive, NULL, tp, m, tp, th);
if (ti_locked == TI_RLOCKED) { if (ti_locked == TI_RLOCKED) {
INP_INFO_RUNLOCK(&V_tcbinfo); INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
ti_locked = TI_UNLOCKED; ti_locked = TI_UNLOCKED;
} }
#ifdef INVARIANTS #ifdef INVARIANTS
@ -1503,8 +1506,7 @@ tcp_autorcvbuf(struct mbuf *m, struct tcphdr *th, struct socket *so,
void void
tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos, struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos)
int ti_locked)
{ {
int thflags, acked, ourfinisacked, needoutput = 0, sack_changed; int thflags, acked, ourfinisacked, needoutput = 0, sack_changed;
int rstreason, todrop, win; int rstreason, todrop, win;
@ -1530,7 +1532,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
tp->sackhint.last_sack_ack = 0; tp->sackhint.last_sack_ack = 0;
sack_changed = 0; sack_changed = 0;
nsegs = max(1, m->m_pkthdr.lro_nsegs); nsegs = max(1, m->m_pkthdr.lro_nsegs);
/* /*
* If this is either a state-changing packet or current state isn't * If this is either a state-changing packet or current state isn't
* established, we require a write lock on tcbinfo. Otherwise, we * established, we require a write lock on tcbinfo. Otherwise, we
@ -1539,19 +1540,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
*/ */
if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 || if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 ||
tp->t_state != TCPS_ESTABLISHED) { tp->t_state != TCPS_ESTABLISHED) {
KASSERT(ti_locked == TI_RLOCKED, ("%s ti_locked %d for "
"SYN/FIN/RST/!EST", __func__, ti_locked));
INP_INFO_RLOCK_ASSERT(&V_tcbinfo); INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
} else {
#ifdef INVARIANTS
if (ti_locked == TI_RLOCKED)
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
else {
KASSERT(ti_locked == TI_UNLOCKED, ("%s: EST "
"ti_locked: %d", __func__, ti_locked));
INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
}
#endif
} }
INP_WLOCK_ASSERT(tp->t_inpcb); INP_WLOCK_ASSERT(tp->t_inpcb);
KASSERT(tp->t_state > TCPS_LISTEN, ("%s: TCPS_LISTEN", KASSERT(tp->t_state > TCPS_LISTEN, ("%s: TCPS_LISTEN",
@ -1760,10 +1749,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
/* /*
* This is a pure ack for outstanding data. * This is a pure ack for outstanding data.
*/ */
if (ti_locked == TI_RLOCKED)
INP_INFO_RUNLOCK(&V_tcbinfo);
ti_locked = TI_UNLOCKED;
TCPSTAT_INC(tcps_predack); TCPSTAT_INC(tcps_predack);
/* /*
@ -1867,10 +1852,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
* nothing on the reassembly queue and we have enough * nothing on the reassembly queue and we have enough
* buffer space to take it. * buffer space to take it.
*/ */
if (ti_locked == TI_RLOCKED)
INP_INFO_RUNLOCK(&V_tcbinfo);
ti_locked = TI_UNLOCKED;
/* Clean receiver SACK report if present */ /* Clean receiver SACK report if present */
if ((tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks) if ((tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks)
tcp_clean_sackreport(tp); tcp_clean_sackreport(tp);
@ -2072,8 +2053,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
tcp_state_change(tp, TCPS_SYN_RECEIVED); tcp_state_change(tp, TCPS_SYN_RECEIVED);
} }
KASSERT(ti_locked == TI_RLOCKED, ("%s: trimthenstep6: "
"ti_locked %d", __func__, ti_locked));
INP_INFO_RLOCK_ASSERT(&V_tcbinfo); INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(tp->t_inpcb); INP_WLOCK_ASSERT(tp->t_inpcb);
@ -2148,9 +2127,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
(tp->rcv_wnd == 0 && tp->last_ack_sent == th->th_seq)) { (tp->rcv_wnd == 0 && tp->last_ack_sent == th->th_seq)) {
INP_INFO_RLOCK_ASSERT(&V_tcbinfo); INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
KASSERT(ti_locked == TI_RLOCKED,
("%s: TH_RST ti_locked %d, th %p tp %p",
__func__, ti_locked, th, tp));
KASSERT(tp->t_state != TCPS_SYN_SENT, KASSERT(tp->t_state != TCPS_SYN_SENT,
("%s: TH_RST for TCPS_SYN_SENT th %p tp %p", ("%s: TH_RST for TCPS_SYN_SENT th %p tp %p",
__func__, th, tp)); __func__, th, tp));
@ -2193,8 +2169,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
*/ */
if ((thflags & TH_SYN) && tp->t_state != TCPS_SYN_SENT && if ((thflags & TH_SYN) && tp->t_state != TCPS_SYN_SENT &&
tp->t_state != TCPS_SYN_RECEIVED) { tp->t_state != TCPS_SYN_RECEIVED) {
KASSERT(ti_locked == TI_RLOCKED,
("tcp_do_segment: TH_SYN ti_locked %d", ti_locked));
INP_INFO_RLOCK_ASSERT(&V_tcbinfo); INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
TCPSTAT_INC(tcps_badsyn); TCPSTAT_INC(tcps_badsyn);
@ -2308,8 +2282,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
*/ */
if ((so->so_state & SS_NOFDREF) && if ((so->so_state & SS_NOFDREF) &&
tp->t_state > TCPS_CLOSE_WAIT && tlen) { tp->t_state > TCPS_CLOSE_WAIT && tlen) {
KASSERT(ti_locked == TI_RLOCKED, ("%s: SS_NOFDEREF && "
"CLOSE_WAIT && tlen ti_locked %d", __func__, ti_locked));
INP_INFO_RLOCK_ASSERT(&V_tcbinfo); INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
if ((s = tcp_log_addrs(inc, th, NULL, NULL))) { if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
@ -2899,7 +2871,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
if (ourfinisacked) { if (ourfinisacked) {
INP_INFO_RLOCK_ASSERT(&V_tcbinfo); INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
tcp_twstart(tp); tcp_twstart(tp);
INP_INFO_RUNLOCK(&V_tcbinfo);
m_freem(m); m_freem(m);
return; return;
} }
@ -3131,19 +3102,11 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
*/ */
case TCPS_FIN_WAIT_2: case TCPS_FIN_WAIT_2:
INP_INFO_RLOCK_ASSERT(&V_tcbinfo); INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
KASSERT(ti_locked == TI_RLOCKED, ("%s: dodata "
"TCP_FIN_WAIT_2 ti_locked: %d", __func__,
ti_locked));
tcp_twstart(tp); tcp_twstart(tp);
INP_INFO_RUNLOCK(&V_tcbinfo);
return; return;
} }
} }
if (ti_locked == TI_RLOCKED)
INP_INFO_RUNLOCK(&V_tcbinfo);
ti_locked = TI_UNLOCKED;
#ifdef TCPDEBUG #ifdef TCPDEBUG
if (so->so_options & SO_DEBUG) if (so->so_options & SO_DEBUG)
tcp_trace(TA_INPUT, ostate, tp, (void *)tcp_saveipgen, tcp_trace(TA_INPUT, ostate, tp, (void *)tcp_saveipgen,
@ -3158,9 +3121,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
(void) tp->t_fb->tfb_tcp_output(tp); (void) tp->t_fb->tfb_tcp_output(tp);
check_delack: check_delack:
KASSERT(ti_locked == TI_UNLOCKED, ("%s: check_delack ti_locked %d",
__func__, ti_locked));
INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(tp->t_inpcb); INP_WLOCK_ASSERT(tp->t_inpcb);
if (tp->t_flags & TF_DELACK) { if (tp->t_flags & TF_DELACK) {
@ -3198,10 +3158,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
&tcp_savetcp, 0); &tcp_savetcp, 0);
#endif #endif
TCP_PROBE3(debug__input, tp, th, m); TCP_PROBE3(debug__input, tp, th, m);
if (ti_locked == TI_RLOCKED)
INP_INFO_RUNLOCK(&V_tcbinfo);
ti_locked = TI_UNLOCKED;
tp->t_flags |= TF_ACKNOW; tp->t_flags |= TF_ACKNOW;
(void) tp->t_fb->tfb_tcp_output(tp); (void) tp->t_fb->tfb_tcp_output(tp);
INP_WUNLOCK(tp->t_inpcb); INP_WUNLOCK(tp->t_inpcb);
@ -3209,10 +3165,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
return; return;
dropwithreset: dropwithreset:
if (ti_locked == TI_RLOCKED)
INP_INFO_RUNLOCK(&V_tcbinfo);
ti_locked = TI_UNLOCKED;
if (tp != NULL) { if (tp != NULL) {
tcp_dropwithreset(m, th, tp, tlen, rstreason); tcp_dropwithreset(m, th, tp, tlen, rstreason);
INP_WUNLOCK(tp->t_inpcb); INP_WUNLOCK(tp->t_inpcb);
@ -3221,15 +3173,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
return; return;
drop: drop:
if (ti_locked == TI_RLOCKED) {
INP_INFO_RUNLOCK(&V_tcbinfo);
ti_locked = TI_UNLOCKED;
}
#ifdef INVARIANTS
else
INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
#endif
/* /*
* Drop space held by incoming segment and return. * Drop space held by incoming segment and return.
*/ */

View File

@ -121,12 +121,10 @@ __FBSDID("$FreeBSD$");
#include <security/mac/mac_framework.h> #include <security/mac/mac_framework.h>
static void tcp_do_segment_fastslow(struct mbuf *, struct tcphdr *, static void tcp_do_segment_fastslow(struct mbuf *, struct tcphdr *,
struct socket *, struct tcpcb *, int, int, uint8_t, struct socket *, struct tcpcb *, int, int, uint8_t);
int);
static void tcp_do_segment_fastack(struct mbuf *, struct tcphdr *, static void tcp_do_segment_fastack(struct mbuf *, struct tcphdr *,
struct socket *, struct tcpcb *, int, int, uint8_t, struct socket *, struct tcpcb *, int, int, uint8_t);
int);
/* /*
* Indicate whether this ack should be delayed. We can delay the ack if * Indicate whether this ack should be delayed. We can delay the ack if
@ -154,7 +152,7 @@ static void tcp_do_segment_fastack(struct mbuf *, struct tcphdr *,
static void static void
tcp_do_fastack(struct mbuf *m, struct tcphdr *th, struct socket *so, tcp_do_fastack(struct mbuf *m, struct tcphdr *th, struct socket *so,
struct tcpcb *tp, struct tcpopt *to, int drop_hdrlen, int tlen, struct tcpcb *tp, struct tcpopt *to, int drop_hdrlen, int tlen,
int ti_locked, uint32_t tiwin) uint32_t tiwin)
{ {
int acked; int acked;
uint16_t nsegs; uint16_t nsegs;
@ -170,6 +168,7 @@ tcp_do_fastack(struct mbuf *m, struct tcphdr *th, struct socket *so,
struct tcphdr tcp_savetcp; struct tcphdr tcp_savetcp;
short ostate = 0; short ostate = 0;
#endif #endif
/* /*
* The following if statement will be true if * The following if statement will be true if
* we are doing the win_up_in_fp <and> * we are doing the win_up_in_fp <and>
@ -207,11 +206,6 @@ tcp_do_fastack(struct mbuf *m, struct tcphdr *th, struct socket *so,
/* /*
* This is a pure ack for outstanding data. * This is a pure ack for outstanding data.
*/ */
if (ti_locked == TI_RLOCKED) {
INP_INFO_RUNLOCK(&V_tcbinfo);
}
ti_locked = TI_UNLOCKED;
TCPSTAT_INC(tcps_predack); TCPSTAT_INC(tcps_predack);
/* /*
@ -310,9 +304,6 @@ tcp_do_fastack(struct mbuf *m, struct tcphdr *th, struct socket *so,
sowwakeup(so); sowwakeup(so);
if (sbavail(&so->so_snd)) if (sbavail(&so->so_snd))
(void) tcp_output(tp); (void) tcp_output(tp);
KASSERT(ti_locked == TI_UNLOCKED, ("%s: check_delack ti_locked %d",
__func__, ti_locked));
INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(tp->t_inpcb); INP_WLOCK_ASSERT(tp->t_inpcb);
if (tp->t_flags & TF_DELACK) { if (tp->t_flags & TF_DELACK) {
@ -330,7 +321,7 @@ tcp_do_fastack(struct mbuf *m, struct tcphdr *th, struct socket *so,
static void static void
tcp_do_fastnewdata(struct mbuf *m, struct tcphdr *th, struct socket *so, tcp_do_fastnewdata(struct mbuf *m, struct tcphdr *th, struct socket *so,
struct tcpcb *tp, struct tcpopt *to, int drop_hdrlen, int tlen, struct tcpcb *tp, struct tcpopt *to, int drop_hdrlen, int tlen,
int ti_locked, uint32_t tiwin) uint32_t tiwin)
{ {
int newsize = 0; /* automatic sockbuf scaling */ int newsize = 0; /* automatic sockbuf scaling */
#ifdef TCPDEBUG #ifdef TCPDEBUG
@ -354,16 +345,6 @@ tcp_do_fastnewdata(struct mbuf *m, struct tcphdr *th, struct socket *so,
tp->ts_recent = to->to_tsval; tp->ts_recent = to->to_tsval;
} }
/*
* This is a pure, in-sequence data packet with
* nothing on the reassembly queue and we have enough
* buffer space to take it.
*/
if (ti_locked == TI_RLOCKED) {
INP_INFO_RUNLOCK(&V_tcbinfo);
}
ti_locked = TI_UNLOCKED;
/* Clean receiver SACK report if present */ /* Clean receiver SACK report if present */
if ((tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks) if ((tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks)
tcp_clean_sackreport(tp); tcp_clean_sackreport(tp);
@ -413,9 +394,6 @@ tcp_do_fastnewdata(struct mbuf *m, struct tcphdr *th, struct socket *so,
tp->t_flags |= TF_ACKNOW; tp->t_flags |= TF_ACKNOW;
tcp_output(tp); tcp_output(tp);
} }
KASSERT(ti_locked == TI_UNLOCKED, ("%s: check_delack ti_locked %d",
__func__, ti_locked));
INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(tp->t_inpcb); INP_WLOCK_ASSERT(tp->t_inpcb);
if (tp->t_flags & TF_DELACK) { if (tp->t_flags & TF_DELACK) {
@ -434,7 +412,7 @@ tcp_do_fastnewdata(struct mbuf *m, struct tcphdr *th, struct socket *so,
static void static void
tcp_do_slowpath(struct mbuf *m, struct tcphdr *th, struct socket *so, tcp_do_slowpath(struct mbuf *m, struct tcphdr *th, struct socket *so,
struct tcpcb *tp, struct tcpopt *to, int drop_hdrlen, int tlen, struct tcpcb *tp, struct tcpopt *to, int drop_hdrlen, int tlen,
int ti_locked, uint32_t tiwin, int thflags) uint32_t tiwin, int thflags)
{ {
int acked, ourfinisacked, needoutput = 0; int acked, ourfinisacked, needoutput = 0;
int rstreason, todrop, win; int rstreason, todrop, win;
@ -464,7 +442,6 @@ tcp_do_slowpath(struct mbuf *m, struct tcphdr *th, struct socket *so,
if (win < 0) if (win < 0)
win = 0; win = 0;
tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt)); tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt));
switch (tp->t_state) { switch (tp->t_state) {
/* /*
@ -569,8 +546,6 @@ tcp_do_slowpath(struct mbuf *m, struct tcphdr *th, struct socket *so,
tcp_state_change(tp, TCPS_SYN_RECEIVED); tcp_state_change(tp, TCPS_SYN_RECEIVED);
} }
KASSERT(ti_locked == TI_RLOCKED, ("%s: trimthenstep6: "
"ti_locked %d", __func__, ti_locked));
INP_INFO_RLOCK_ASSERT(&V_tcbinfo); INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(tp->t_inpcb); INP_WLOCK_ASSERT(tp->t_inpcb);
@ -644,9 +619,6 @@ tcp_do_slowpath(struct mbuf *m, struct tcphdr *th, struct socket *so,
SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) || SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) ||
(tp->rcv_wnd == 0 && tp->last_ack_sent == th->th_seq)) { (tp->rcv_wnd == 0 && tp->last_ack_sent == th->th_seq)) {
INP_INFO_RLOCK_ASSERT(&V_tcbinfo); INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
KASSERT(ti_locked == TI_RLOCKED,
("%s: TH_RST ti_locked %d, th %p tp %p",
__func__, ti_locked, th, tp));
KASSERT(tp->t_state != TCPS_SYN_SENT, KASSERT(tp->t_state != TCPS_SYN_SENT,
("%s: TH_RST for TCPS_SYN_SENT th %p tp %p", ("%s: TH_RST for TCPS_SYN_SENT th %p tp %p",
__func__, th, tp)); __func__, th, tp));
@ -688,8 +660,6 @@ tcp_do_slowpath(struct mbuf *m, struct tcphdr *th, struct socket *so,
* Send challenge ACK for any SYN in synchronized state. * Send challenge ACK for any SYN in synchronized state.
*/ */
if ((thflags & TH_SYN) && tp->t_state != TCPS_SYN_SENT) { if ((thflags & TH_SYN) && tp->t_state != TCPS_SYN_SENT) {
KASSERT(ti_locked == TI_RLOCKED,
("tcp_do_segment: TH_SYN ti_locked %d", ti_locked));
INP_INFO_RLOCK_ASSERT(&V_tcbinfo); INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
TCPSTAT_INC(tcps_badsyn); TCPSTAT_INC(tcps_badsyn);
@ -803,8 +773,6 @@ tcp_do_slowpath(struct mbuf *m, struct tcphdr *th, struct socket *so,
*/ */
if ((so->so_state & SS_NOFDREF) && if ((so->so_state & SS_NOFDREF) &&
tp->t_state > TCPS_CLOSE_WAIT && tlen) { tp->t_state > TCPS_CLOSE_WAIT && tlen) {
KASSERT(ti_locked == TI_RLOCKED, ("%s: SS_NOFDEREF && "
"CLOSE_WAIT && tlen ti_locked %d", __func__, ti_locked));
INP_INFO_RLOCK_ASSERT(&V_tcbinfo); INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
if ((s = tcp_log_addrs(inc, th, NULL, NULL))) { if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
@ -1333,7 +1301,6 @@ tcp_do_slowpath(struct mbuf *m, struct tcphdr *th, struct socket *so,
if (ourfinisacked) { if (ourfinisacked) {
INP_INFO_RLOCK_ASSERT(&V_tcbinfo); INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
tcp_twstart(tp); tcp_twstart(tp);
INP_INFO_RUNLOCK(&V_tcbinfo);
m_freem(m); m_freem(m);
return; return;
} }
@ -1562,20 +1529,10 @@ tcp_do_slowpath(struct mbuf *m, struct tcphdr *th, struct socket *so,
*/ */
case TCPS_FIN_WAIT_2: case TCPS_FIN_WAIT_2:
INP_INFO_RLOCK_ASSERT(&V_tcbinfo); INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
KASSERT(ti_locked == TI_RLOCKED, ("%s: dodata "
"TCP_FIN_WAIT_2 ti_locked: %d", __func__,
ti_locked));
tcp_twstart(tp); tcp_twstart(tp);
INP_INFO_RUNLOCK(&V_tcbinfo);
return; return;
} }
} }
if (ti_locked == TI_RLOCKED) {
INP_INFO_RUNLOCK(&V_tcbinfo);
}
ti_locked = TI_UNLOCKED;
#ifdef TCPDEBUG #ifdef TCPDEBUG
if (so->so_options & SO_DEBUG) if (so->so_options & SO_DEBUG)
tcp_trace(TA_INPUT, ostate, tp, (void *)tcp_saveipgen, tcp_trace(TA_INPUT, ostate, tp, (void *)tcp_saveipgen,
@ -1589,9 +1546,6 @@ tcp_do_slowpath(struct mbuf *m, struct tcphdr *th, struct socket *so,
if (needoutput || (tp->t_flags & TF_ACKNOW)) if (needoutput || (tp->t_flags & TF_ACKNOW))
(void) tp->t_fb->tfb_tcp_output(tp); (void) tp->t_fb->tfb_tcp_output(tp);
KASSERT(ti_locked == TI_UNLOCKED, ("%s: check_delack ti_locked %d",
__func__, ti_locked));
INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(tp->t_inpcb); INP_WLOCK_ASSERT(tp->t_inpcb);
if (tp->t_flags & TF_DELACK) { if (tp->t_flags & TF_DELACK) {
@ -1629,11 +1583,6 @@ tcp_do_slowpath(struct mbuf *m, struct tcphdr *th, struct socket *so,
&tcp_savetcp, 0); &tcp_savetcp, 0);
#endif #endif
TCP_PROBE3(debug__drop, tp, th, m); TCP_PROBE3(debug__drop, tp, th, m);
if (ti_locked == TI_RLOCKED) {
INP_INFO_RUNLOCK(&V_tcbinfo);
}
ti_locked = TI_UNLOCKED;
tp->t_flags |= TF_ACKNOW; tp->t_flags |= TF_ACKNOW;
(void) tp->t_fb->tfb_tcp_output(tp); (void) tp->t_fb->tfb_tcp_output(tp);
INP_WUNLOCK(tp->t_inpcb); INP_WUNLOCK(tp->t_inpcb);
@ -1641,11 +1590,6 @@ tcp_do_slowpath(struct mbuf *m, struct tcphdr *th, struct socket *so,
return; return;
dropwithreset: dropwithreset:
if (ti_locked == TI_RLOCKED) {
INP_INFO_RUNLOCK(&V_tcbinfo);
}
ti_locked = TI_UNLOCKED;
if (tp != NULL) { if (tp != NULL) {
tcp_dropwithreset(m, th, tp, tlen, rstreason); tcp_dropwithreset(m, th, tp, tlen, rstreason);
INP_WUNLOCK(tp->t_inpcb); INP_WUNLOCK(tp->t_inpcb);
@ -1654,15 +1598,6 @@ tcp_do_slowpath(struct mbuf *m, struct tcphdr *th, struct socket *so,
return; return;
drop: drop:
if (ti_locked == TI_RLOCKED) {
INP_INFO_RUNLOCK(&V_tcbinfo);
ti_locked = TI_UNLOCKED;
}
#ifdef INVARIANTS
else
INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
#endif
/* /*
* Drop space held by incoming segment and return. * Drop space held by incoming segment and return.
*/ */
@ -1687,8 +1622,7 @@ tcp_do_slowpath(struct mbuf *m, struct tcphdr *th, struct socket *so,
*/ */
void void
tcp_do_segment_fastslow(struct mbuf *m, struct tcphdr *th, struct socket *so, tcp_do_segment_fastslow(struct mbuf *m, struct tcphdr *th, struct socket *so,
struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos, struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos)
int ti_locked)
{ {
int thflags; int thflags;
uint32_t tiwin; uint32_t tiwin;
@ -1709,19 +1643,7 @@ tcp_do_segment_fastslow(struct mbuf *m, struct tcphdr *th, struct socket *so,
*/ */
if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 || if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 ||
tp->t_state != TCPS_ESTABLISHED) { tp->t_state != TCPS_ESTABLISHED) {
KASSERT(ti_locked == TI_RLOCKED, ("%s ti_locked %d for "
"SYN/FIN/RST/!EST", __func__, ti_locked));
INP_INFO_RLOCK_ASSERT(&V_tcbinfo); INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
} else {
#ifdef INVARIANTS
if (ti_locked == TI_RLOCKED) {
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
} else {
KASSERT(ti_locked == TI_UNLOCKED, ("%s: EST "
"ti_locked: %d", __func__, ti_locked));
INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
}
#endif
} }
INP_WLOCK_ASSERT(tp->t_inpcb); INP_WLOCK_ASSERT(tp->t_inpcb);
KASSERT(tp->t_state > TCPS_LISTEN, ("%s: TCPS_LISTEN", KASSERT(tp->t_state > TCPS_LISTEN, ("%s: TCPS_LISTEN",
@ -1736,9 +1658,6 @@ tcp_do_segment_fastslow(struct mbuf *m, struct tcphdr *th, struct socket *so,
"sysctl setting)\n", s, __func__); "sysctl setting)\n", s, __func__);
free(s, M_TCPLOG); free(s, M_TCPLOG);
} }
if (ti_locked == TI_RLOCKED) {
INP_INFO_RUNLOCK(&V_tcbinfo);
}
INP_WUNLOCK(tp->t_inpcb); INP_WUNLOCK(tp->t_inpcb);
m_freem(m); m_freem(m);
return; return;
@ -1751,9 +1670,6 @@ tcp_do_segment_fastslow(struct mbuf *m, struct tcphdr *th, struct socket *so,
if ((tp->t_state == TCPS_SYN_SENT) && (thflags & TH_ACK) && if ((tp->t_state == TCPS_SYN_SENT) && (thflags & TH_ACK) &&
(SEQ_LEQ(th->th_ack, tp->iss) || SEQ_GT(th->th_ack, tp->snd_max))) { (SEQ_LEQ(th->th_ack, tp->iss) || SEQ_GT(th->th_ack, tp->snd_max))) {
tcp_dropwithreset(m, th, tp, tlen, BANDLIM_UNLIMITED); tcp_dropwithreset(m, th, tp, tlen, BANDLIM_UNLIMITED);
if (ti_locked == TI_RLOCKED) {
INP_INFO_RUNLOCK(&V_tcbinfo);
}
INP_WUNLOCK(tp->t_inpcb); INP_WUNLOCK(tp->t_inpcb);
return; return;
} }
@ -1919,19 +1835,19 @@ tcp_do_segment_fastslow(struct mbuf *m, struct tcphdr *th, struct socket *so,
TAILQ_EMPTY(&tp->snd_holes)))) { TAILQ_EMPTY(&tp->snd_holes)))) {
/* We are done */ /* We are done */
tcp_do_fastack(m, th, so, tp, &to, drop_hdrlen, tlen, tcp_do_fastack(m, th, so, tp, &to, drop_hdrlen, tlen,
ti_locked, tiwin); tiwin);
return; return;
} else if ((tlen) && } else if ((tlen) &&
(th->th_ack == tp->snd_una && (th->th_ack == tp->snd_una &&
tlen <= sbspace(&so->so_rcv))) { tlen <= sbspace(&so->so_rcv))) {
tcp_do_fastnewdata(m, th, so, tp, &to, drop_hdrlen, tlen, tcp_do_fastnewdata(m, th, so, tp, &to, drop_hdrlen, tlen,
ti_locked, tiwin); tiwin);
/* We are done */ /* We are done */
return; return;
} }
} }
tcp_do_slowpath(m, th, so, tp, &to, drop_hdrlen, tlen, tcp_do_slowpath(m, th, so, tp, &to, drop_hdrlen, tlen,
ti_locked, tiwin, thflags); tiwin, thflags);
} }
@ -1947,7 +1863,7 @@ tcp_do_segment_fastslow(struct mbuf *m, struct tcphdr *th, struct socket *so,
static int static int
tcp_fastack(struct mbuf *m, struct tcphdr *th, struct socket *so, tcp_fastack(struct mbuf *m, struct tcphdr *th, struct socket *so,
struct tcpcb *tp, struct tcpopt *to, int drop_hdrlen, int tlen, struct tcpcb *tp, struct tcpopt *to, int drop_hdrlen, int tlen,
int ti_locked, uint32_t tiwin) uint32_t tiwin)
{ {
int acked; int acked;
uint16_t nsegs; uint16_t nsegs;
@ -2039,11 +1955,6 @@ tcp_fastack(struct mbuf *m, struct tcphdr *th, struct socket *so,
/* /*
* This is a pure ack for outstanding data. * This is a pure ack for outstanding data.
*/ */
if (ti_locked == TI_RLOCKED) {
INP_INFO_RUNLOCK(&V_tcbinfo);
}
ti_locked = TI_UNLOCKED;
TCPSTAT_INC(tcps_predack); TCPSTAT_INC(tcps_predack);
/* /*
@ -2138,9 +2049,6 @@ tcp_fastack(struct mbuf *m, struct tcphdr *th, struct socket *so,
} }
if (sbavail(&so->so_snd)) if (sbavail(&so->so_snd))
(void) tcp_output(tp); (void) tcp_output(tp);
KASSERT(ti_locked == TI_UNLOCKED, ("%s: check_delack ti_locked %d",
__func__, ti_locked));
INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(tp->t_inpcb); INP_WLOCK_ASSERT(tp->t_inpcb);
if (tp->t_flags & TF_DELACK) { if (tp->t_flags & TF_DELACK) {
@ -2167,8 +2075,7 @@ tcp_fastack(struct mbuf *m, struct tcphdr *th, struct socket *so,
*/ */
void void
tcp_do_segment_fastack(struct mbuf *m, struct tcphdr *th, struct socket *so, tcp_do_segment_fastack(struct mbuf *m, struct tcphdr *th, struct socket *so,
struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos, struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos)
int ti_locked)
{ {
int thflags; int thflags;
uint32_t tiwin; uint32_t tiwin;
@ -2186,19 +2093,7 @@ tcp_do_segment_fastack(struct mbuf *m, struct tcphdr *th, struct socket *so,
*/ */
if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 || if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 ||
tp->t_state != TCPS_ESTABLISHED) { tp->t_state != TCPS_ESTABLISHED) {
KASSERT(ti_locked == TI_RLOCKED, ("%s ti_locked %d for "
"SYN/FIN/RST/!EST", __func__, ti_locked));
INP_INFO_RLOCK_ASSERT(&V_tcbinfo); INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
} else {
#ifdef INVARIANTS
if (ti_locked == TI_RLOCKED) {
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
} else {
KASSERT(ti_locked == TI_UNLOCKED, ("%s: EST "
"ti_locked: %d", __func__, ti_locked));
INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
}
#endif
} }
INP_WLOCK_ASSERT(tp->t_inpcb); INP_WLOCK_ASSERT(tp->t_inpcb);
KASSERT(tp->t_state > TCPS_LISTEN, ("%s: TCPS_LISTEN", KASSERT(tp->t_state > TCPS_LISTEN, ("%s: TCPS_LISTEN",
@ -2213,9 +2108,6 @@ tcp_do_segment_fastack(struct mbuf *m, struct tcphdr *th, struct socket *so,
"sysctl setting)\n", s, __func__); "sysctl setting)\n", s, __func__);
free(s, M_TCPLOG); free(s, M_TCPLOG);
} }
if (ti_locked == TI_RLOCKED) {
INP_INFO_RUNLOCK(&V_tcbinfo);
}
INP_WUNLOCK(tp->t_inpcb); INP_WUNLOCK(tp->t_inpcb);
m_freem(m); m_freem(m);
return; return;
@ -2228,9 +2120,6 @@ tcp_do_segment_fastack(struct mbuf *m, struct tcphdr *th, struct socket *so,
if ((tp->t_state == TCPS_SYN_SENT) && (thflags & TH_ACK) && if ((tp->t_state == TCPS_SYN_SENT) && (thflags & TH_ACK) &&
(SEQ_LEQ(th->th_ack, tp->iss) || SEQ_GT(th->th_ack, tp->snd_max))) { (SEQ_LEQ(th->th_ack, tp->iss) || SEQ_GT(th->th_ack, tp->snd_max))) {
tcp_dropwithreset(m, th, tp, tlen, BANDLIM_UNLIMITED); tcp_dropwithreset(m, th, tp, tlen, BANDLIM_UNLIMITED);
if (ti_locked == TI_RLOCKED) {
INP_INFO_RUNLOCK(&V_tcbinfo);
}
INP_WUNLOCK(tp->t_inpcb); INP_WUNLOCK(tp->t_inpcb);
return; return;
} }
@ -2367,12 +2256,12 @@ tcp_do_segment_fastack(struct mbuf *m, struct tcphdr *th, struct socket *so,
__predict_true(LIST_EMPTY(&tp->t_segq)) && __predict_true(LIST_EMPTY(&tp->t_segq)) &&
__predict_true(th->th_seq == tp->rcv_nxt)) { __predict_true(th->th_seq == tp->rcv_nxt)) {
if (tcp_fastack(m, th, so, tp, &to, drop_hdrlen, tlen, if (tcp_fastack(m, th, so, tp, &to, drop_hdrlen, tlen,
ti_locked, tiwin)) { tiwin)) {
return; return;
} }
} }
tcp_do_slowpath(m, th, so, tp, &to, drop_hdrlen, tlen, tcp_do_slowpath(m, th, so, tp, &to, drop_hdrlen, tlen,
ti_locked, tiwin, thflags); tiwin, thflags);
} }
struct tcp_function_block __tcp_fastslow = { struct tcp_function_block __tcp_fastslow = {

File diff suppressed because it is too large Load Diff

View File

@ -281,7 +281,7 @@ struct tcp_rack {
TAILQ_ENTRY(tcp_rack) r_hpts; /* hptsi queue next Lock(b) */ TAILQ_ENTRY(tcp_rack) r_hpts; /* hptsi queue next Lock(b) */
int32_t(*r_substate) (struct mbuf *, struct tcphdr *, int32_t(*r_substate) (struct mbuf *, struct tcphdr *,
struct socket *, struct tcpcb *, struct tcpopt *, struct socket *, struct tcpcb *, struct tcpopt *,
int32_t, int32_t, int32_t *, uint32_t, int, int); /* Lock(a) */ int32_t, int32_t, uint32_t, int, int); /* Lock(a) */
struct tcpcb *rc_tp; /* The tcpcb Lock(a) */ struct tcpcb *rc_tp; /* The tcpcb Lock(a) */
struct inpcb *rc_inp; /* The inpcb Lock(a) */ struct inpcb *rc_inp; /* The inpcb Lock(a) */
uint32_t rc_free_cnt; /* Number of free entries on the rc_free list uint32_t rc_free_cnt; /* Number of free entries on the rc_free list

View File

@ -1914,10 +1914,11 @@ tcp_timer_discard(void *ptp)
{ {
struct inpcb *inp; struct inpcb *inp;
struct tcpcb *tp; struct tcpcb *tp;
struct epoch_tracker et;
tp = (struct tcpcb *)ptp; tp = (struct tcpcb *)ptp;
CURVNET_SET(tp->t_vnet); CURVNET_SET(tp->t_vnet);
INP_INFO_RLOCK(&V_tcbinfo); INP_INFO_RLOCK_ET(&V_tcbinfo, et);
inp = tp->t_inpcb; inp = tp->t_inpcb;
KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL",
__func__, tp)); __func__, tp));
@ -1937,13 +1938,13 @@ tcp_timer_discard(void *ptp)
tp->t_inpcb = NULL; tp->t_inpcb = NULL;
uma_zfree(V_tcpcb_zone, tp); uma_zfree(V_tcpcb_zone, tp);
if (in_pcbrele_wlocked(inp)) { if (in_pcbrele_wlocked(inp)) {
INP_INFO_RUNLOCK(&V_tcbinfo); INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
CURVNET_RESTORE(); CURVNET_RESTORE();
return; return;
} }
} }
INP_WUNLOCK(inp); INP_WUNLOCK(inp);
INP_INFO_RUNLOCK(&V_tcbinfo); INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
CURVNET_RESTORE(); CURVNET_RESTORE();
} }
@ -2107,6 +2108,7 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS)
struct inpcb *inp, **inp_list; struct inpcb *inp, **inp_list;
inp_gen_t gencnt; inp_gen_t gencnt;
struct xinpgen xig; struct xinpgen xig;
struct epoch_tracker et;
/* /*
* The process of preparing the TCB list is too time-consuming and * The process of preparing the TCB list is too time-consuming and
@ -2193,14 +2195,14 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS)
} else } else
INP_RUNLOCK(inp); INP_RUNLOCK(inp);
} }
INP_INFO_RLOCK(&V_tcbinfo); INP_INFO_RLOCK_ET(&V_tcbinfo, et);
for (i = 0; i < n; i++) { for (i = 0; i < n; i++) {
inp = inp_list[i]; inp = inp_list[i];
INP_RLOCK(inp); INP_RLOCK(inp);
if (!in_pcbrele_rlocked(inp)) if (!in_pcbrele_rlocked(inp))
INP_RUNLOCK(inp); INP_RUNLOCK(inp);
} }
INP_INFO_RUNLOCK(&V_tcbinfo); INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
if (!error) { if (!error) {
/* /*
@ -2339,6 +2341,7 @@ tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
struct inpcb *(*notify)(struct inpcb *, int) = tcp_notify; struct inpcb *(*notify)(struct inpcb *, int) = tcp_notify;
struct icmp *icp; struct icmp *icp;
struct in_conninfo inc; struct in_conninfo inc;
struct epoch_tracker et;
tcp_seq icmp_tcp_seq; tcp_seq icmp_tcp_seq;
int mtu; int mtu;
@ -2370,7 +2373,7 @@ tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
icp = (struct icmp *)((caddr_t)ip - offsetof(struct icmp, icmp_ip)); icp = (struct icmp *)((caddr_t)ip - offsetof(struct icmp, icmp_ip));
th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2));
INP_INFO_RLOCK(&V_tcbinfo); INP_INFO_RLOCK_ET(&V_tcbinfo, et);
inp = in_pcblookup(&V_tcbinfo, faddr, th->th_dport, ip->ip_src, inp = in_pcblookup(&V_tcbinfo, faddr, th->th_dport, ip->ip_src,
th->th_sport, INPLOOKUP_WLOCKPCB, NULL); th->th_sport, INPLOOKUP_WLOCKPCB, NULL);
if (inp != NULL && PRC_IS_REDIRECT(cmd)) { if (inp != NULL && PRC_IS_REDIRECT(cmd)) {
@ -2435,7 +2438,7 @@ tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
out: out:
if (inp != NULL) if (inp != NULL)
INP_WUNLOCK(inp); INP_WUNLOCK(inp);
INP_INFO_RUNLOCK(&V_tcbinfo); INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
} }
#endif /* INET */ #endif /* INET */
@ -2453,6 +2456,7 @@ tcp6_ctlinput(int cmd, struct sockaddr *sa, void *d)
struct ip6ctlparam *ip6cp = NULL; struct ip6ctlparam *ip6cp = NULL;
const struct sockaddr_in6 *sa6_src = NULL; const struct sockaddr_in6 *sa6_src = NULL;
struct in_conninfo inc; struct in_conninfo inc;
struct epoch_tracker et;
struct tcp_ports { struct tcp_ports {
uint16_t th_sport; uint16_t th_sport;
uint16_t th_dport; uint16_t th_dport;
@ -2514,7 +2518,7 @@ tcp6_ctlinput(int cmd, struct sockaddr *sa, void *d)
} }
bzero(&t_ports, sizeof(struct tcp_ports)); bzero(&t_ports, sizeof(struct tcp_ports));
m_copydata(m, off, sizeof(struct tcp_ports), (caddr_t)&t_ports); m_copydata(m, off, sizeof(struct tcp_ports), (caddr_t)&t_ports);
INP_INFO_RLOCK(&V_tcbinfo); INP_INFO_RLOCK_ET(&V_tcbinfo, et);
inp = in6_pcblookup(&V_tcbinfo, &ip6->ip6_dst, t_ports.th_dport, inp = in6_pcblookup(&V_tcbinfo, &ip6->ip6_dst, t_ports.th_dport,
&ip6->ip6_src, t_ports.th_sport, INPLOOKUP_WLOCKPCB, NULL); &ip6->ip6_src, t_ports.th_sport, INPLOOKUP_WLOCKPCB, NULL);
if (inp != NULL && PRC_IS_REDIRECT(cmd)) { if (inp != NULL && PRC_IS_REDIRECT(cmd)) {
@ -2586,7 +2590,7 @@ tcp6_ctlinput(int cmd, struct sockaddr *sa, void *d)
out: out:
if (inp != NULL) if (inp != NULL)
INP_WUNLOCK(inp); INP_WUNLOCK(inp);
INP_INFO_RUNLOCK(&V_tcbinfo); INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
} }
#endif /* INET6 */ #endif /* INET6 */
@ -2925,6 +2929,7 @@ sysctl_drop(SYSCTL_HANDLER_ARGS)
struct tcpcb *tp; struct tcpcb *tp;
struct tcptw *tw; struct tcptw *tw;
struct sockaddr_in *fin, *lin; struct sockaddr_in *fin, *lin;
struct epoch_tracker et;
#ifdef INET6 #ifdef INET6
struct sockaddr_in6 *fin6, *lin6; struct sockaddr_in6 *fin6, *lin6;
#endif #endif
@ -2984,7 +2989,7 @@ sysctl_drop(SYSCTL_HANDLER_ARGS)
default: default:
return (EINVAL); return (EINVAL);
} }
INP_INFO_RLOCK(&V_tcbinfo); INP_INFO_RLOCK_ET(&V_tcbinfo, et);
switch (addrs[0].ss_family) { switch (addrs[0].ss_family) {
#ifdef INET6 #ifdef INET6
case AF_INET6: case AF_INET6:
@ -3023,7 +3028,7 @@ sysctl_drop(SYSCTL_HANDLER_ARGS)
INP_WUNLOCK(inp); INP_WUNLOCK(inp);
} else } else
error = ESRCH; error = ESRCH;
INP_INFO_RUNLOCK(&V_tcbinfo); INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
return (error); return (error);
} }

View File

@ -274,55 +274,10 @@ tcp_timer_delack(void *xtp)
CURVNET_RESTORE(); CURVNET_RESTORE();
} }
/*
* When a timer wants to remove a TCB it must
* hold the INP_INFO_RLOCK(). The timer function
* should only have grabbed the INP_WLOCK() when
* it entered. To safely switch to holding both the
* INP_INFO_RLOCK() and the INP_WLOCK() we must first
* grab a reference on the inp, which will hold the inp
* so that it can't be removed. We then unlock the INP_WLOCK(),
* and grab the INP_INFO_RLOCK() lock. Once we have the INP_INFO_RLOCK()
* we proceed again to get the INP_WLOCK() (this preserves proper
* lock order). After acquiring the INP_WLOCK we must check if someone
* else deleted the pcb i.e. the inp_flags check.
* If so we return 1 otherwise we return 0.
*
* No matter what the tcp_inpinfo_lock_add() function
* returns the caller must afterwards call tcp_inpinfo_lock_del()
* to drop the locks and reference properly.
*/
int
tcp_inpinfo_lock_add(struct inpcb *inp)
{
in_pcbref(inp);
INP_WUNLOCK(inp);
INP_INFO_RLOCK(&V_tcbinfo);
INP_WLOCK(inp);
if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
return(1);
}
return(0);
}
void void
tcp_inpinfo_lock_del(struct inpcb *inp, struct tcpcb *tp) tcp_inpinfo_lock_del(struct inpcb *inp, struct tcpcb *tp)
{ {
INP_INFO_RUNLOCK(&V_tcbinfo); if (inp && tp != NULL)
if (inp && (tp == NULL)) {
/*
* If tcp_close/drop() gets called and tp
* returns NULL, then the function dropped
* the inp lock, we hold a reference keeping
* this around, so we must re-aquire the
* INP_WLOCK() in order to proceed with
* our dropping the inp reference.
*/
INP_WLOCK(inp);
}
if (inp && in_pcbrele_wlocked(inp) == 0)
INP_WUNLOCK(inp); INP_WUNLOCK(inp);
} }
@ -331,6 +286,7 @@ tcp_timer_2msl(void *xtp)
{ {
struct tcpcb *tp = xtp; struct tcpcb *tp = xtp;
struct inpcb *inp; struct inpcb *inp;
struct epoch_tracker et;
CURVNET_SET(tp->t_vnet); CURVNET_SET(tp->t_vnet);
#ifdef TCPDEBUG #ifdef TCPDEBUG
int ostate; int ostate;
@ -377,11 +333,13 @@ tcp_timer_2msl(void *xtp)
tp->t_inpcb && tp->t_inpcb->inp_socket && tp->t_inpcb && tp->t_inpcb->inp_socket &&
(tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) { (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) {
TCPSTAT_INC(tcps_finwait2_drops); TCPSTAT_INC(tcps_finwait2_drops);
if (tcp_inpinfo_lock_add(inp)) { if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
tcp_inpinfo_lock_del(inp, tp); tcp_inpinfo_lock_del(inp, tp);
goto out; goto out;
} }
INP_INFO_RLOCK_ET(&V_tcbinfo, et);
tp = tcp_close(tp); tp = tcp_close(tp);
INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
tcp_inpinfo_lock_del(inp, tp); tcp_inpinfo_lock_del(inp, tp);
goto out; goto out;
} else { } else {
@ -389,15 +347,17 @@ tcp_timer_2msl(void *xtp)
callout_reset(&tp->t_timers->tt_2msl, callout_reset(&tp->t_timers->tt_2msl,
TP_KEEPINTVL(tp), tcp_timer_2msl, tp); TP_KEEPINTVL(tp), tcp_timer_2msl, tp);
} else { } else {
if (tcp_inpinfo_lock_add(inp)) { if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
tcp_inpinfo_lock_del(inp, tp); tcp_inpinfo_lock_del(inp, tp);
goto out; goto out;
} }
INP_INFO_RLOCK_ET(&V_tcbinfo, et);
tp = tcp_close(tp); tp = tcp_close(tp);
INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
tcp_inpinfo_lock_del(inp, tp); tcp_inpinfo_lock_del(inp, tp);
goto out; goto out;
} }
} }
#ifdef TCPDEBUG #ifdef TCPDEBUG
if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
@ -418,6 +378,7 @@ tcp_timer_keep(void *xtp)
struct tcpcb *tp = xtp; struct tcpcb *tp = xtp;
struct tcptemp *t_template; struct tcptemp *t_template;
struct inpcb *inp; struct inpcb *inp;
struct epoch_tracker et;
CURVNET_SET(tp->t_vnet); CURVNET_SET(tp->t_vnet);
#ifdef TCPDEBUG #ifdef TCPDEBUG
int ostate; int ostate;
@ -511,11 +472,11 @@ tcp_timer_keep(void *xtp)
dropit: dropit:
TCPSTAT_INC(tcps_keepdrops); TCPSTAT_INC(tcps_keepdrops);
if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
if (tcp_inpinfo_lock_add(inp)) {
tcp_inpinfo_lock_del(inp, tp); tcp_inpinfo_lock_del(inp, tp);
goto out; goto out;
} }
INP_INFO_RLOCK_ET(&V_tcbinfo, et);
tp = tcp_drop(tp, ETIMEDOUT); tp = tcp_drop(tp, ETIMEDOUT);
#ifdef TCPDEBUG #ifdef TCPDEBUG
@ -524,8 +485,9 @@ tcp_timer_keep(void *xtp)
PRU_SLOWTIMO); PRU_SLOWTIMO);
#endif #endif
TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO); TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
tcp_inpinfo_lock_del(inp, tp); tcp_inpinfo_lock_del(inp, tp);
out: out:
CURVNET_RESTORE(); CURVNET_RESTORE();
} }
@ -534,6 +496,7 @@ tcp_timer_persist(void *xtp)
{ {
struct tcpcb *tp = xtp; struct tcpcb *tp = xtp;
struct inpcb *inp; struct inpcb *inp;
struct epoch_tracker et;
CURVNET_SET(tp->t_vnet); CURVNET_SET(tp->t_vnet);
#ifdef TCPDEBUG #ifdef TCPDEBUG
int ostate; int ostate;
@ -573,11 +536,13 @@ tcp_timer_persist(void *xtp)
(ticks - tp->t_rcvtime >= tcp_maxpersistidle || (ticks - tp->t_rcvtime >= tcp_maxpersistidle ||
ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) { ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) {
TCPSTAT_INC(tcps_persistdrop); TCPSTAT_INC(tcps_persistdrop);
if (tcp_inpinfo_lock_add(inp)) { if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
tcp_inpinfo_lock_del(inp, tp); tcp_inpinfo_lock_del(inp, tp);
goto out; goto out;
} }
INP_INFO_RLOCK_ET(&V_tcbinfo, et);
tp = tcp_drop(tp, ETIMEDOUT); tp = tcp_drop(tp, ETIMEDOUT);
INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
tcp_inpinfo_lock_del(inp, tp); tcp_inpinfo_lock_del(inp, tp);
goto out; goto out;
} }
@ -588,11 +553,13 @@ tcp_timer_persist(void *xtp)
if (tp->t_state > TCPS_CLOSE_WAIT && if (tp->t_state > TCPS_CLOSE_WAIT &&
(ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) { (ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) {
TCPSTAT_INC(tcps_persistdrop); TCPSTAT_INC(tcps_persistdrop);
if (tcp_inpinfo_lock_add(inp)) { if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
tcp_inpinfo_lock_del(inp, tp); tcp_inpinfo_lock_del(inp, tp);
goto out; goto out;
} }
INP_INFO_RLOCK_ET(&V_tcbinfo, et);
tp = tcp_drop(tp, ETIMEDOUT); tp = tcp_drop(tp, ETIMEDOUT);
INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
tcp_inpinfo_lock_del(inp, tp); tcp_inpinfo_lock_del(inp, tp);
goto out; goto out;
} }
@ -618,6 +585,7 @@ tcp_timer_rexmt(void * xtp)
CURVNET_SET(tp->t_vnet); CURVNET_SET(tp->t_vnet);
int rexmt; int rexmt;
struct inpcb *inp; struct inpcb *inp;
struct epoch_tracker et;
#ifdef TCPDEBUG #ifdef TCPDEBUG
int ostate; int ostate;
@ -654,11 +622,13 @@ tcp_timer_rexmt(void * xtp)
if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
tp->t_rxtshift = TCP_MAXRXTSHIFT; tp->t_rxtshift = TCP_MAXRXTSHIFT;
TCPSTAT_INC(tcps_timeoutdrop); TCPSTAT_INC(tcps_timeoutdrop);
if (tcp_inpinfo_lock_add(inp)) { if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
tcp_inpinfo_lock_del(inp, tp); tcp_inpinfo_lock_del(inp, tp);
goto out; goto out;
} }
INP_INFO_RLOCK_ET(&V_tcbinfo, et);
tp = tcp_drop(tp, ETIMEDOUT); tp = tcp_drop(tp, ETIMEDOUT);
INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
tcp_inpinfo_lock_del(inp, tp); tcp_inpinfo_lock_del(inp, tp);
goto out; goto out;
} }

View File

@ -214,7 +214,6 @@ VNET_DECLARE(int, tcp_pmtud_blackhole_mss);
VNET_DECLARE(int, tcp_v6pmtud_blackhole_mss); VNET_DECLARE(int, tcp_v6pmtud_blackhole_mss);
#define V_tcp_v6pmtud_blackhole_mss VNET(tcp_v6pmtud_blackhole_mss) #define V_tcp_v6pmtud_blackhole_mss VNET(tcp_v6pmtud_blackhole_mss)
int tcp_inpinfo_lock_add(struct inpcb *inp);
void tcp_inpinfo_lock_del(struct inpcb *inp, struct tcpcb *tp); void tcp_inpinfo_lock_del(struct inpcb *inp, struct tcpcb *tp);
void tcp_timer_init(void); void tcp_timer_init(void);

View File

@ -206,11 +206,12 @@ void
tcp_tw_destroy(void) tcp_tw_destroy(void)
{ {
struct tcptw *tw; struct tcptw *tw;
struct epoch_tracker et;
INP_INFO_RLOCK(&V_tcbinfo); INP_INFO_RLOCK_ET(&V_tcbinfo, et);
while ((tw = TAILQ_FIRST(&V_twq_2msl)) != NULL) while ((tw = TAILQ_FIRST(&V_twq_2msl)) != NULL)
tcp_twclose(tw, 0); tcp_twclose(tw, 0);
INP_INFO_RUNLOCK(&V_tcbinfo); INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
TW_LOCK_DESTROY(V_tw_lock); TW_LOCK_DESTROY(V_tw_lock);
uma_zdestroy(V_tcptw_zone); uma_zdestroy(V_tcptw_zone);
@ -674,6 +675,7 @@ tcp_tw_2msl_scan(int reuse)
{ {
struct tcptw *tw; struct tcptw *tw;
struct inpcb *inp; struct inpcb *inp;
struct epoch_tracker et;
#ifdef INVARIANTS #ifdef INVARIANTS
if (reuse) { if (reuse) {
@ -707,12 +709,12 @@ tcp_tw_2msl_scan(int reuse)
in_pcbref(inp); in_pcbref(inp);
TW_RUNLOCK(V_tw_lock); TW_RUNLOCK(V_tw_lock);
INP_INFO_RLOCK(&V_tcbinfo); INP_INFO_RLOCK_ET(&V_tcbinfo, et);
INP_WLOCK(inp); INP_WLOCK(inp);
tw = intotw(inp); tw = intotw(inp);
if (in_pcbrele_wlocked(inp)) { if (in_pcbrele_wlocked(inp)) {
if (__predict_true(tw == NULL)) { if (__predict_true(tw == NULL)) {
INP_INFO_RUNLOCK(&V_tcbinfo); INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
continue; continue;
} else { } else {
/* This should not happen as in TIMEWAIT /* This should not happen as in TIMEWAIT
@ -731,7 +733,7 @@ tcp_tw_2msl_scan(int reuse)
"|| inp last reference) && tw != " "|| inp last reference) && tw != "
"NULL", __func__); "NULL", __func__);
#endif #endif
INP_INFO_RUNLOCK(&V_tcbinfo); INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
break; break;
} }
} }
@ -739,12 +741,12 @@ tcp_tw_2msl_scan(int reuse)
if (tw == NULL) { if (tw == NULL) {
/* tcp_twclose() has already been called */ /* tcp_twclose() has already been called */
INP_WUNLOCK(inp); INP_WUNLOCK(inp);
INP_INFO_RUNLOCK(&V_tcbinfo); INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
continue; continue;
} }
tcp_twclose(tw, reuse); tcp_twclose(tw, reuse);
INP_INFO_RUNLOCK(&V_tcbinfo); INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
if (reuse) if (reuse)
return tw; return tw;
} }

View File

@ -276,11 +276,12 @@ tcp_usr_detach(struct socket *so)
{ {
struct inpcb *inp; struct inpcb *inp;
int rlock = 0; int rlock = 0;
struct epoch_tracker et;
inp = sotoinpcb(so); inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_detach: inp == NULL")); KASSERT(inp != NULL, ("tcp_usr_detach: inp == NULL"));
if (!INP_INFO_WLOCKED(&V_tcbinfo)) { if (!INP_INFO_WLOCKED(&V_tcbinfo)) {
INP_INFO_RLOCK(&V_tcbinfo); INP_INFO_RLOCK_ET(&V_tcbinfo, et);
rlock = 1; rlock = 1;
} }
INP_WLOCK(inp); INP_WLOCK(inp);
@ -288,7 +289,7 @@ tcp_usr_detach(struct socket *so)
("tcp_usr_detach: inp_socket == NULL")); ("tcp_usr_detach: inp_socket == NULL"));
tcp_detach(so, inp); tcp_detach(so, inp);
if (rlock) if (rlock)
INP_INFO_RUNLOCK(&V_tcbinfo); INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
} }
#ifdef INET #ifdef INET
@ -668,10 +669,11 @@ tcp_usr_disconnect(struct socket *so)
{ {
struct inpcb *inp; struct inpcb *inp;
struct tcpcb *tp = NULL; struct tcpcb *tp = NULL;
struct epoch_tracker et;
int error = 0; int error = 0;
TCPDEBUG0; TCPDEBUG0;
INP_INFO_RLOCK(&V_tcbinfo); INP_INFO_RLOCK_ET(&V_tcbinfo, et);
inp = sotoinpcb(so); inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_disconnect: inp == NULL")); KASSERT(inp != NULL, ("tcp_usr_disconnect: inp == NULL"));
INP_WLOCK(inp); INP_WLOCK(inp);
@ -688,7 +690,7 @@ tcp_usr_disconnect(struct socket *so)
TCPDEBUG2(PRU_DISCONNECT); TCPDEBUG2(PRU_DISCONNECT);
TCP_PROBE2(debug__user, tp, PRU_DISCONNECT); TCP_PROBE2(debug__user, tp, PRU_DISCONNECT);
INP_WUNLOCK(inp); INP_WUNLOCK(inp);
INP_INFO_RUNLOCK(&V_tcbinfo); INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
return (error); return (error);
} }
@ -747,6 +749,7 @@ tcp6_usr_accept(struct socket *so, struct sockaddr **nam)
struct tcpcb *tp = NULL; struct tcpcb *tp = NULL;
struct in_addr addr; struct in_addr addr;
struct in6_addr addr6; struct in6_addr addr6;
struct epoch_tracker et;
in_port_t port = 0; in_port_t port = 0;
int v4 = 0; int v4 = 0;
TCPDEBUG0; TCPDEBUG0;
@ -756,7 +759,7 @@ tcp6_usr_accept(struct socket *so, struct sockaddr **nam)
inp = sotoinpcb(so); inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp6_usr_accept: inp == NULL")); KASSERT(inp != NULL, ("tcp6_usr_accept: inp == NULL"));
INP_INFO_RLOCK(&V_tcbinfo); INP_INFO_RLOCK_ET(&V_tcbinfo, et);
INP_WLOCK(inp); INP_WLOCK(inp);
if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
error = ECONNABORTED; error = ECONNABORTED;
@ -783,7 +786,7 @@ tcp6_usr_accept(struct socket *so, struct sockaddr **nam)
TCPDEBUG2(PRU_ACCEPT); TCPDEBUG2(PRU_ACCEPT);
TCP_PROBE2(debug__user, tp, PRU_ACCEPT); TCP_PROBE2(debug__user, tp, PRU_ACCEPT);
INP_WUNLOCK(inp); INP_WUNLOCK(inp);
INP_INFO_RUNLOCK(&V_tcbinfo); INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
if (error == 0) { if (error == 0) {
if (v4) if (v4)
*nam = in6_v4mapsin6_sockaddr(port, &addr); *nam = in6_v4mapsin6_sockaddr(port, &addr);
@ -803,9 +806,10 @@ tcp_usr_shutdown(struct socket *so)
int error = 0; int error = 0;
struct inpcb *inp; struct inpcb *inp;
struct tcpcb *tp = NULL; struct tcpcb *tp = NULL;
struct epoch_tracker et;
TCPDEBUG0; TCPDEBUG0;
INP_INFO_RLOCK(&V_tcbinfo); INP_INFO_RLOCK_ET(&V_tcbinfo, et);
inp = sotoinpcb(so); inp = sotoinpcb(so);
KASSERT(inp != NULL, ("inp == NULL")); KASSERT(inp != NULL, ("inp == NULL"));
INP_WLOCK(inp); INP_WLOCK(inp);
@ -824,7 +828,7 @@ tcp_usr_shutdown(struct socket *so)
TCPDEBUG2(PRU_SHUTDOWN); TCPDEBUG2(PRU_SHUTDOWN);
TCP_PROBE2(debug__user, tp, PRU_SHUTDOWN); TCP_PROBE2(debug__user, tp, PRU_SHUTDOWN);
INP_WUNLOCK(inp); INP_WUNLOCK(inp);
INP_INFO_RUNLOCK(&V_tcbinfo); INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
return (error); return (error);
} }
@ -887,6 +891,7 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
int error = 0; int error = 0;
struct inpcb *inp; struct inpcb *inp;
struct tcpcb *tp = NULL; struct tcpcb *tp = NULL;
struct epoch_tracker net_et;
#ifdef INET6 #ifdef INET6
int isipv6; int isipv6;
#endif #endif
@ -897,7 +902,7 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
* this call. * this call.
*/ */
if (flags & PRUS_EOF) if (flags & PRUS_EOF)
INP_INFO_RLOCK(&V_tcbinfo); INP_INFO_RLOCK_ET(&V_tcbinfo, net_et);
inp = sotoinpcb(so); inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_send: inp == NULL")); KASSERT(inp != NULL, ("tcp_usr_send: inp == NULL"));
INP_WLOCK(inp); INP_WLOCK(inp);
@ -1040,7 +1045,7 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND)); ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
INP_WUNLOCK(inp); INP_WUNLOCK(inp);
if (flags & PRUS_EOF) if (flags & PRUS_EOF)
INP_INFO_RUNLOCK(&V_tcbinfo); INP_INFO_RUNLOCK_ET(&V_tcbinfo, net_et);
return (error); return (error);
} }
@ -1079,12 +1084,13 @@ tcp_usr_abort(struct socket *so)
{ {
struct inpcb *inp; struct inpcb *inp;
struct tcpcb *tp = NULL; struct tcpcb *tp = NULL;
struct epoch_tracker et;
TCPDEBUG0; TCPDEBUG0;
inp = sotoinpcb(so); inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_abort: inp == NULL")); KASSERT(inp != NULL, ("tcp_usr_abort: inp == NULL"));
INP_INFO_RLOCK(&V_tcbinfo); INP_INFO_RLOCK_ET(&V_tcbinfo, et);
INP_WLOCK(inp); INP_WLOCK(inp);
KASSERT(inp->inp_socket != NULL, KASSERT(inp->inp_socket != NULL,
("tcp_usr_abort: inp_socket == NULL")); ("tcp_usr_abort: inp_socket == NULL"));
@ -1110,7 +1116,7 @@ tcp_usr_abort(struct socket *so)
} }
INP_WUNLOCK(inp); INP_WUNLOCK(inp);
dropped: dropped:
INP_INFO_RUNLOCK(&V_tcbinfo); INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
} }
/* /*
@ -1121,12 +1127,13 @@ tcp_usr_close(struct socket *so)
{ {
struct inpcb *inp; struct inpcb *inp;
struct tcpcb *tp = NULL; struct tcpcb *tp = NULL;
struct epoch_tracker et;
TCPDEBUG0; TCPDEBUG0;
inp = sotoinpcb(so); inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_close: inp == NULL")); KASSERT(inp != NULL, ("tcp_usr_close: inp == NULL"));
INP_INFO_RLOCK(&V_tcbinfo); INP_INFO_RLOCK_ET(&V_tcbinfo, et);
INP_WLOCK(inp); INP_WLOCK(inp);
KASSERT(inp->inp_socket != NULL, KASSERT(inp->inp_socket != NULL,
("tcp_usr_close: inp_socket == NULL")); ("tcp_usr_close: inp_socket == NULL"));
@ -1150,7 +1157,7 @@ tcp_usr_close(struct socket *so)
inp->inp_flags |= INP_SOCKREF; inp->inp_flags |= INP_SOCKREF;
} }
INP_WUNLOCK(inp); INP_WUNLOCK(inp);
INP_INFO_RUNLOCK(&V_tcbinfo); INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
} }
/* /*
@ -2043,6 +2050,7 @@ tcp_attach(struct socket *so)
{ {
struct tcpcb *tp; struct tcpcb *tp;
struct inpcb *inp; struct inpcb *inp;
struct epoch_tracker et;
int error; int error;
if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
@ -2052,10 +2060,10 @@ tcp_attach(struct socket *so)
} }
so->so_rcv.sb_flags |= SB_AUTOSIZE; so->so_rcv.sb_flags |= SB_AUTOSIZE;
so->so_snd.sb_flags |= SB_AUTOSIZE; so->so_snd.sb_flags |= SB_AUTOSIZE;
INP_INFO_RLOCK(&V_tcbinfo); INP_INFO_RLOCK_ET(&V_tcbinfo, et);
error = in_pcballoc(so, &V_tcbinfo); error = in_pcballoc(so, &V_tcbinfo);
if (error) { if (error) {
INP_INFO_RUNLOCK(&V_tcbinfo); INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
return (error); return (error);
} }
inp = sotoinpcb(so); inp = sotoinpcb(so);
@ -2073,12 +2081,12 @@ tcp_attach(struct socket *so)
if (tp == NULL) { if (tp == NULL) {
in_pcbdetach(inp); in_pcbdetach(inp);
in_pcbfree(inp); in_pcbfree(inp);
INP_INFO_RUNLOCK(&V_tcbinfo); INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
return (ENOBUFS); return (ENOBUFS);
} }
tp->t_state = TCPS_CLOSED; tp->t_state = TCPS_CLOSED;
INP_WUNLOCK(inp); INP_WUNLOCK(inp);
INP_INFO_RUNLOCK(&V_tcbinfo); INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
TCPSTATES_INC(TCPS_CLOSED); TCPSTATES_INC(TCPS_CLOSED);
return (0); return (0);
} }

View File

@ -264,12 +264,11 @@ struct tcp_function_block {
int (*tfb_tcp_output_wtime)(struct tcpcb *, const struct timeval *); int (*tfb_tcp_output_wtime)(struct tcpcb *, const struct timeval *);
void (*tfb_tcp_do_segment)(struct mbuf *, struct tcphdr *, void (*tfb_tcp_do_segment)(struct mbuf *, struct tcphdr *,
struct socket *, struct tcpcb *, struct socket *, struct tcpcb *,
int, int, uint8_t, int, int, uint8_t);
int);
void (*tfb_tcp_hpts_do_segment)(struct mbuf *, struct tcphdr *, void (*tfb_tcp_hpts_do_segment)(struct mbuf *, struct tcphdr *,
struct socket *, struct tcpcb *, struct socket *, struct tcpcb *,
int, int, uint8_t, int, int, uint8_t,
int, int, struct timeval *); int, struct timeval *);
int (*tfb_tcp_ctloutput)(struct socket *so, struct sockopt *sopt, int (*tfb_tcp_ctloutput)(struct socket *so, struct sockopt *sopt,
struct inpcb *inp, struct tcpcb *tp); struct inpcb *inp, struct tcpcb *tp);
/* Optional memory allocation/free routine */ /* Optional memory allocation/free routine */
@ -862,8 +861,7 @@ int tcp_input(struct mbuf **, int *, int);
int tcp_autorcvbuf(struct mbuf *, struct tcphdr *, struct socket *, int tcp_autorcvbuf(struct mbuf *, struct tcphdr *, struct socket *,
struct tcpcb *, int); struct tcpcb *, int);
void tcp_do_segment(struct mbuf *, struct tcphdr *, void tcp_do_segment(struct mbuf *, struct tcphdr *,
struct socket *, struct tcpcb *, int, int, uint8_t, struct socket *, struct tcpcb *, int, int, uint8_t);
int);
int register_tcp_functions(struct tcp_function_block *blk, int wait); int register_tcp_functions(struct tcp_function_block *blk, int wait);
int register_tcp_functions_as_names(struct tcp_function_block *blk, int register_tcp_functions_as_names(struct tcp_function_block *blk,

View File

@ -399,6 +399,7 @@ udp_input(struct mbuf **mp, int *offp, int proto)
struct sockaddr_in udp_in[2]; struct sockaddr_in udp_in[2];
struct mbuf *m; struct mbuf *m;
struct m_tag *fwd_tag; struct m_tag *fwd_tag;
struct epoch_tracker et;
int cscov_partial, iphlen; int cscov_partial, iphlen;
m = *mp; m = *mp;
@ -529,7 +530,7 @@ udp_input(struct mbuf **mp, int *offp, int proto)
struct inpcbhead *pcblist; struct inpcbhead *pcblist;
struct ip_moptions *imo; struct ip_moptions *imo;
INP_INFO_RLOCK(pcbinfo); INP_INFO_RLOCK_ET(pcbinfo, et);
pcblist = udp_get_pcblist(proto); pcblist = udp_get_pcblist(proto);
last = NULL; last = NULL;
CK_LIST_FOREACH(inp, pcblist, inp_list) { CK_LIST_FOREACH(inp, pcblist, inp_list) {
@ -625,14 +626,14 @@ udp_input(struct mbuf **mp, int *offp, int proto)
UDPSTAT_INC(udps_noportbcast); UDPSTAT_INC(udps_noportbcast);
if (inp) if (inp)
INP_RUNLOCK(inp); INP_RUNLOCK(inp);
INP_INFO_RUNLOCK(pcbinfo); INP_INFO_RUNLOCK_ET(pcbinfo, et);
goto badunlocked; goto badunlocked;
} }
UDP_PROBE(receive, NULL, last, ip, last, uh); UDP_PROBE(receive, NULL, last, ip, last, uh);
if (udp_append(last, ip, m, iphlen, udp_in) == 0) if (udp_append(last, ip, m, iphlen, udp_in) == 0)
INP_RUNLOCK(last); INP_RUNLOCK(last);
inp_lost: inp_lost:
INP_INFO_RUNLOCK(pcbinfo); INP_INFO_RUNLOCK_ET(pcbinfo, et);
return (IPPROTO_DONE); return (IPPROTO_DONE);
} }
@ -839,6 +840,7 @@ udp_pcblist(SYSCTL_HANDLER_ARGS)
struct inpcb *inp, **inp_list; struct inpcb *inp, **inp_list;
inp_gen_t gencnt; inp_gen_t gencnt;
struct xinpgen xig; struct xinpgen xig;
struct epoch_tracker et;
/* /*
* The process of preparing the PCB list is too time-consuming and * The process of preparing the PCB list is too time-consuming and
@ -857,10 +859,10 @@ udp_pcblist(SYSCTL_HANDLER_ARGS)
/* /*
* OK, now we're committed to doing something. * OK, now we're committed to doing something.
*/ */
INP_INFO_RLOCK(&V_udbinfo); INP_INFO_RLOCK_ET(&V_udbinfo, et);
gencnt = V_udbinfo.ipi_gencnt; gencnt = V_udbinfo.ipi_gencnt;
n = V_udbinfo.ipi_count; n = V_udbinfo.ipi_count;
INP_INFO_RUNLOCK(&V_udbinfo); INP_INFO_RUNLOCK_ET(&V_udbinfo, et);
error = sysctl_wire_old_buffer(req, 2 * (sizeof xig) error = sysctl_wire_old_buffer(req, 2 * (sizeof xig)
+ n * sizeof(struct xinpcb)); + n * sizeof(struct xinpcb));
@ -879,7 +881,7 @@ udp_pcblist(SYSCTL_HANDLER_ARGS)
if (inp_list == NULL) if (inp_list == NULL)
return (ENOMEM); return (ENOMEM);
INP_INFO_RLOCK(&V_udbinfo); INP_INFO_RLOCK_ET(&V_udbinfo, et);
for (inp = CK_LIST_FIRST(V_udbinfo.ipi_listhead), i = 0; inp && i < n; for (inp = CK_LIST_FIRST(V_udbinfo.ipi_listhead), i = 0; inp && i < n;
inp = CK_LIST_NEXT(inp, inp_list)) { inp = CK_LIST_NEXT(inp, inp_list)) {
INP_WLOCK(inp); INP_WLOCK(inp);
@ -890,7 +892,7 @@ udp_pcblist(SYSCTL_HANDLER_ARGS)
} }
INP_WUNLOCK(inp); INP_WUNLOCK(inp);
} }
INP_INFO_RUNLOCK(&V_udbinfo); INP_INFO_RUNLOCK_ET(&V_udbinfo, et);
n = i; n = i;
error = 0; error = 0;
@ -922,11 +924,11 @@ udp_pcblist(SYSCTL_HANDLER_ARGS)
* that something happened while we were processing this * that something happened while we were processing this
* request, and it might be necessary to retry. * request, and it might be necessary to retry.
*/ */
INP_INFO_RLOCK(&V_udbinfo); INP_INFO_RLOCK_ET(&V_udbinfo, et);
xig.xig_gen = V_udbinfo.ipi_gencnt; xig.xig_gen = V_udbinfo.ipi_gencnt;
xig.xig_sogen = so_gencnt; xig.xig_sogen = so_gencnt;
xig.xig_count = V_udbinfo.ipi_count; xig.xig_count = V_udbinfo.ipi_count;
INP_INFO_RUNLOCK(&V_udbinfo); INP_INFO_RUNLOCK_ET(&V_udbinfo, et);
error = SYSCTL_OUT(req, &xig, sizeof xig); error = SYSCTL_OUT(req, &xig, sizeof xig);
} }
free(inp_list, M_TEMP); free(inp_list, M_TEMP);
@ -1108,6 +1110,7 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
struct cmsghdr *cm; struct cmsghdr *cm;
struct inpcbinfo *pcbinfo; struct inpcbinfo *pcbinfo;
struct sockaddr_in *sin, src; struct sockaddr_in *sin, src;
struct epoch_tracker et;
int cscov_partial = 0; int cscov_partial = 0;
int error = 0; int error = 0;
int ipflags; int ipflags;
@ -1264,7 +1267,7 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
(inp->inp_laddr.s_addr == INADDR_ANY) || (inp->inp_laddr.s_addr == INADDR_ANY) ||
(inp->inp_lport == 0))) || (inp->inp_lport == 0))) ||
(src.sin_family == AF_INET)) { (src.sin_family == AF_INET)) {
INP_HASH_RLOCK(pcbinfo); INP_HASH_RLOCK_ET(pcbinfo, et);
unlock_udbinfo = UH_RLOCKED; unlock_udbinfo = UH_RLOCKED;
} else } else
unlock_udbinfo = UH_UNLOCKED; unlock_udbinfo = UH_UNLOCKED;
@ -1520,7 +1523,7 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
if (unlock_udbinfo == UH_WLOCKED) if (unlock_udbinfo == UH_WLOCKED)
INP_HASH_WUNLOCK(pcbinfo); INP_HASH_WUNLOCK(pcbinfo);
else if (unlock_udbinfo == UH_RLOCKED) else if (unlock_udbinfo == UH_RLOCKED)
INP_HASH_RUNLOCK(pcbinfo); INP_HASH_RUNLOCK_ET(pcbinfo, et);
UDP_PROBE(send, NULL, inp, &ui->ui_i, inp, &ui->ui_u); UDP_PROBE(send, NULL, inp, &ui->ui_i, inp, &ui->ui_u);
error = ip_output(m, inp->inp_options, error = ip_output(m, inp->inp_options,
(unlock_inp == UH_WLOCKED ? &inp->inp_route : NULL), ipflags, (unlock_inp == UH_WLOCKED ? &inp->inp_route : NULL), ipflags,
@ -1540,7 +1543,7 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
} else if (unlock_udbinfo == UH_RLOCKED) { } else if (unlock_udbinfo == UH_RLOCKED) {
KASSERT(unlock_inp == UH_RLOCKED, KASSERT(unlock_inp == UH_RLOCKED,
("%s: shared udbinfo lock, excl inp lock", __func__)); ("%s: shared udbinfo lock, excl inp lock", __func__));
INP_HASH_RUNLOCK(pcbinfo); INP_HASH_RUNLOCK_ET(pcbinfo, et);
INP_RUNLOCK(inp); INP_RUNLOCK(inp);
} else if (unlock_inp == UH_WLOCKED) } else if (unlock_inp == UH_WLOCKED)
INP_WUNLOCK(inp); INP_WUNLOCK(inp);

View File

@ -1896,6 +1896,7 @@ icmp6_rip6_input(struct mbuf **mp, int off)
struct inpcb *last = NULL; struct inpcb *last = NULL;
struct sockaddr_in6 fromsa; struct sockaddr_in6 fromsa;
struct icmp6_hdr *icmp6; struct icmp6_hdr *icmp6;
struct epoch_tracker et;
struct mbuf *opts = NULL; struct mbuf *opts = NULL;
#ifndef PULLDOWN_TEST #ifndef PULLDOWN_TEST
@ -1922,7 +1923,7 @@ icmp6_rip6_input(struct mbuf **mp, int off)
return (IPPROTO_DONE); return (IPPROTO_DONE);
} }
INP_INFO_RLOCK(&V_ripcbinfo); INP_INFO_RLOCK_ET(&V_ripcbinfo, et);
CK_LIST_FOREACH(in6p, &V_ripcb, inp_list) { CK_LIST_FOREACH(in6p, &V_ripcb, inp_list) {
if ((in6p->inp_vflag & INP_IPV6) == 0) if ((in6p->inp_vflag & INP_IPV6) == 0)
continue; continue;
@ -2000,7 +2001,7 @@ icmp6_rip6_input(struct mbuf **mp, int off)
} }
last = in6p; last = in6p;
} }
INP_INFO_RUNLOCK(&V_ripcbinfo); INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et);
if (last != NULL) { if (last != NULL) {
if (last->inp_flags & INP_CONTROLOPTS) if (last->inp_flags & INP_CONTROLOPTS)
ip6_savecontrol(last, m, &opts); ip6_savecontrol(last, m, &opts);

View File

@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$");
#include <sys/syslog.h> #include <sys/syslog.h>
#include <sys/sysctl.h> #include <sys/sysctl.h>
#include <sys/malloc.h> #include <sys/malloc.h>
#include <sys/proc.h>
#include <net/ethernet.h> #include <net/ethernet.h>
#include <net/if.h> #include <net/if.h>
@ -241,7 +242,7 @@ in6_gif_output(struct ifnet *ifp, struct mbuf *m, int proto, uint8_t ecn)
int len; int len;
/* prepend new IP header */ /* prepend new IP header */
MPASS(in_epoch()); MPASS(in_epoch(net_epoch_preempt));
len = sizeof(struct ip6_hdr); len = sizeof(struct ip6_hdr);
#ifndef __NO_STRICT_ALIGNMENT #ifndef __NO_STRICT_ALIGNMENT
if (proto == IPPROTO_ETHERIP) if (proto == IPPROTO_ETHERIP)
@ -283,7 +284,7 @@ in6_gif_input(struct mbuf *m, int off, int proto, void *arg)
struct ip6_hdr *ip6; struct ip6_hdr *ip6;
uint8_t ecn; uint8_t ecn;
MPASS(in_epoch()); MPASS(in_epoch(net_epoch_preempt));
if (sc == NULL) { if (sc == NULL) {
m_freem(m); m_freem(m);
IP6STAT_INC(ip6s_nogif); IP6STAT_INC(ip6s_nogif);
@ -312,7 +313,7 @@ in6_gif_lookup(const struct mbuf *m, int off, int proto, void **arg)
if (V_ipv6_hashtbl == NULL) if (V_ipv6_hashtbl == NULL)
return (0); return (0);
MPASS(in_epoch()); MPASS(in_epoch(net_epoch_preempt));
/* /*
* NOTE: it is safe to iterate without any locking here, because softc * NOTE: it is safe to iterate without any locking here, because softc
* can be reclaimed only when we are not within net_epoch_preempt * can be reclaimed only when we are not within net_epoch_preempt

View File

@ -40,6 +40,7 @@ __FBSDID("$FreeBSD$");
#include <sys/kernel.h> #include <sys/kernel.h>
#include <sys/sysctl.h> #include <sys/sysctl.h>
#include <sys/malloc.h> #include <sys/malloc.h>
#include <sys/proc.h>
#include <net/if.h> #include <net/if.h>
#include <net/if_var.h> #include <net/if_var.h>
@ -110,7 +111,7 @@ in6_gre_lookup(const struct mbuf *m, int off, int proto, void **arg)
if (V_ipv6_hashtbl == NULL) if (V_ipv6_hashtbl == NULL)
return (0); return (0);
MPASS(in_epoch()); MPASS(in_epoch(net_epoch_preempt));
ip6 = mtod(m, const struct ip6_hdr *); ip6 = mtod(m, const struct ip6_hdr *);
CK_LIST_FOREACH(sc, &GRE_HASH(&ip6->ip6_dst, &ip6->ip6_src), chain) { CK_LIST_FOREACH(sc, &GRE_HASH(&ip6->ip6_dst, &ip6->ip6_src), chain) {
/* /*

View File

@ -165,6 +165,7 @@ rip6_input(struct mbuf **mp, int *offp, int proto)
struct inpcb *last = NULL; struct inpcb *last = NULL;
struct mbuf *opts = NULL; struct mbuf *opts = NULL;
struct sockaddr_in6 fromsa; struct sockaddr_in6 fromsa;
struct epoch_tracker et;
RIP6STAT_INC(rip6s_ipackets); RIP6STAT_INC(rip6s_ipackets);
@ -172,7 +173,7 @@ rip6_input(struct mbuf **mp, int *offp, int proto)
ifp = m->m_pkthdr.rcvif; ifp = m->m_pkthdr.rcvif;
INP_INFO_RLOCK(&V_ripcbinfo); INP_INFO_RLOCK_ET(&V_ripcbinfo, et);
CK_LIST_FOREACH(in6p, &V_ripcb, inp_list) { CK_LIST_FOREACH(in6p, &V_ripcb, inp_list) {
/* XXX inp locking */ /* XXX inp locking */
if ((in6p->inp_vflag & INP_IPV6) == 0) if ((in6p->inp_vflag & INP_IPV6) == 0)
@ -291,7 +292,7 @@ rip6_input(struct mbuf **mp, int *offp, int proto)
} }
last = in6p; last = in6p;
} }
INP_INFO_RUNLOCK(&V_ripcbinfo); INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et);
#if defined(IPSEC) || defined(IPSEC_SUPPORT) #if defined(IPSEC) || defined(IPSEC_SUPPORT)
/* /*
* Check AH/ESP integrity. * Check AH/ESP integrity.

View File

@ -214,6 +214,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
int off = *offp; int off = *offp;
int cscov_partial; int cscov_partial;
int plen, ulen; int plen, ulen;
struct epoch_tracker et;
struct sockaddr_in6 fromsa[2]; struct sockaddr_in6 fromsa[2];
struct m_tag *fwd_tag; struct m_tag *fwd_tag;
uint16_t uh_sum; uint16_t uh_sum;
@ -300,7 +301,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
struct inpcbhead *pcblist; struct inpcbhead *pcblist;
struct ip6_moptions *imo; struct ip6_moptions *imo;
INP_INFO_RLOCK(pcbinfo); INP_INFO_RLOCK_ET(pcbinfo, et);
/* /*
* In the event that laddr should be set to the link-local * In the event that laddr should be set to the link-local
* address (this happens in RIPng), the multicast address * address (this happens in RIPng), the multicast address
@ -426,7 +427,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
INP_RUNLOCK(last); INP_RUNLOCK(last);
} else } else
INP_RUNLOCK(last); INP_RUNLOCK(last);
INP_INFO_RUNLOCK(pcbinfo); INP_INFO_RUNLOCK_ET(pcbinfo, et);
inp_lost: inp_lost:
return (IPPROTO_DONE); return (IPPROTO_DONE);
} }
@ -508,7 +509,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
return (IPPROTO_DONE); return (IPPROTO_DONE);
badheadlocked: badheadlocked:
INP_INFO_RUNLOCK(pcbinfo); INP_INFO_RUNLOCK_ET(pcbinfo, et);
badunlocked: badunlocked:
if (m) if (m)
m_freem(m); m_freem(m);

View File

@ -31,10 +31,9 @@
#define _SYS_EPOCH_H_ #define _SYS_EPOCH_H_
#ifdef _KERNEL #ifdef _KERNEL
#include <sys/lock.h> #include <sys/lock.h>
#include <sys/proc.h> #include <sys/pcpu.h>
#endif #endif
struct thread;
struct epoch; struct epoch;
typedef struct epoch *epoch_t; typedef struct epoch *epoch_t;
@ -46,48 +45,49 @@ extern epoch_t global_epoch_preempt;
struct epoch_context { struct epoch_context {
void *data[2]; void *data[2];
} __aligned(sizeof(void *)); } __aligned(sizeof(void *));
typedef struct epoch_context *epoch_context_t; typedef struct epoch_context *epoch_context_t;
struct epoch_tracker {
void *datap[3];
#ifdef INVARIANTS
int datai[5];
#else
int datai[1];
#endif
} __aligned(sizeof(void *));
typedef struct epoch_tracker *epoch_tracker_t;
epoch_t epoch_alloc(int flags); epoch_t epoch_alloc(int flags);
void epoch_free(epoch_t epoch); void epoch_free(epoch_t epoch);
void epoch_enter(epoch_t epoch);
void epoch_enter_preempt_internal(epoch_t epoch, struct thread *td);
void epoch_exit(epoch_t epoch);
void epoch_exit_preempt_internal(epoch_t epoch, struct thread *td);
void epoch_wait(epoch_t epoch); void epoch_wait(epoch_t epoch);
void epoch_wait_preempt(epoch_t epoch); void epoch_wait_preempt(epoch_t epoch);
void epoch_call(epoch_t epoch, epoch_context_t ctx, void (*callback) (epoch_context_t)); void epoch_call(epoch_t epoch, epoch_context_t ctx, void (*callback) (epoch_context_t));
int in_epoch(void); int in_epoch(epoch_t epoch);
int in_epoch_verbose(epoch_t epoch, int dump_onfail);
#ifdef _KERNEL #ifdef _KERNEL
DPCPU_DECLARE(int, epoch_cb_count); DPCPU_DECLARE(int, epoch_cb_count);
DPCPU_DECLARE(struct grouptask, epoch_cb_task); DPCPU_DECLARE(struct grouptask, epoch_cb_task);
#define EPOCH_MAGIC0 0xFADECAFEF00DD00D
#define EPOCH_MAGIC1 0xBADDBABEDEEDFEED
static __inline void void epoch_enter_preempt_KBI(epoch_t epoch, epoch_tracker_t et);
epoch_enter_preempt(epoch_t epoch) void epoch_exit_preempt_KBI(epoch_t epoch, epoch_tracker_t et);
{ void epoch_enter_KBI(epoch_t epoch);
struct thread *td; void epoch_exit_KBI(epoch_t epoch);
int nesting __unused;
td = curthread;
nesting = td->td_epochnest++; #if defined(KLD_MODULE) && !defined(KLD_TIED)
#ifndef INVARIANTS #define epoch_enter_preempt(e, t) epoch_enter_preempt_KBI((e), (t))
if (nesting == 0) #define epoch_exit_preempt(e, t) epoch_exit_preempt_KBI((e), (t))
#endif #define epoch_enter(e) epoch_enter_KBI((e))
epoch_enter_preempt_internal(epoch, td); #define epoch_exit(e) epoch_exit_KBI((e))
} #else
#include <sys/epoch_private.h>
static __inline void #endif /* KLD_MODULE */
epoch_exit_preempt(epoch_t epoch)
{ #endif /* _KERNEL */
struct thread *td;
td = curthread;
MPASS(td->td_epochnest);
if (td->td_epochnest-- == 1)
epoch_exit_preempt_internal(epoch, td);
}
#endif /* _KERNEL */
#endif #endif

203
sys/sys/epoch_private.h Normal file
View File

@ -0,0 +1,203 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2018, Matthew Macy <mmacy@freebsd.org>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _SYS_EPOCH_PRIVATE_H_
#define _SYS_EPOCH_PRIVATE_H_
#ifndef _KERNEL
#error "no user serviceable parts"
#else
#include <ck_epoch.h>
#include <sys/kpilite.h>
#include <sys/mutex.h>
extern void epoch_adjust_prio(struct thread *td, u_char prio);
#ifndef _SYS_SYSTM_H_
extern void critical_exit_preempt(void);
#endif
#ifdef __amd64__
#define EPOCH_ALIGN CACHE_LINE_SIZE*2
#else
#define EPOCH_ALIGN CACHE_LINE_SIZE
#endif
/*
* Standalone (_sa) routines for thread state manipulation
*/
static __inline void
critical_enter_sa(void *tdarg)
{
struct thread_lite *td;
td = tdarg;
td->td_critnest++;
__compiler_membar();
}
static __inline void
critical_exit_sa(void *tdarg)
{
struct thread_lite *td;
td = tdarg;
MPASS(td->td_critnest > 0);
__compiler_membar();
td->td_critnest--;
__compiler_membar();
if (__predict_false(td->td_owepreempt != 0))
critical_exit_preempt();
}
typedef struct epoch_thread {
#ifdef INVARIANTS
uint64_t et_magic_pre;
#endif
TAILQ_ENTRY(epoch_thread) et_link; /* Epoch queue. */
struct thread *et_td; /* pointer to thread in section */
ck_epoch_section_t et_section; /* epoch section object */
#ifdef INVARIANTS
uint64_t et_magic_post;
#endif
} *epoch_thread_t;
TAILQ_HEAD (epoch_tdlist, epoch_thread);
typedef struct epoch_record {
ck_epoch_record_t er_record;
volatile struct epoch_tdlist er_tdlist;
volatile uint32_t er_gen;
uint32_t er_cpuid;
} __aligned(EPOCH_ALIGN) *epoch_record_t;
struct epoch {
struct ck_epoch e_epoch __aligned(EPOCH_ALIGN);
struct epoch_record *e_pcpu_dom[MAXMEMDOM] __aligned(EPOCH_ALIGN);
int e_idx;
int e_flags;
struct epoch_record *e_pcpu[0];
};
#define INIT_CHECK(epoch) \
do { \
if (__predict_false((epoch) == NULL)) \
return; \
} while (0)
static __inline void
epoch_enter_preempt(epoch_t epoch, epoch_tracker_t et)
{
struct epoch_record *er;
struct epoch_thread *etd;
struct thread_lite *td;
MPASS(cold || epoch != NULL);
INIT_CHECK(epoch);
etd = (void *)et;
#ifdef INVARIANTS
MPASS(epoch->e_flags & EPOCH_PREEMPT);
etd->et_magic_pre = EPOCH_MAGIC0;
etd->et_magic_post = EPOCH_MAGIC1;
#endif
td = (struct thread_lite *)curthread;
etd->et_td = (void*)td;
td->td_epochnest++;
critical_enter_sa(td);
sched_pin_lite(td);
td->td_pre_epoch_prio = td->td_priority;
er = epoch->e_pcpu[curcpu];
TAILQ_INSERT_TAIL(&er->er_tdlist, etd, et_link);
ck_epoch_begin(&er->er_record, (ck_epoch_section_t *)&etd->et_section);
critical_exit_sa(td);
}
static __inline void
epoch_enter(epoch_t epoch)
{
ck_epoch_record_t *record;
struct thread_lite *td;
MPASS(cold || epoch != NULL);
INIT_CHECK(epoch);
td = (struct thread_lite *)curthread;
td->td_epochnest++;
critical_enter_sa(td);
record = &epoch->e_pcpu[curcpu]->er_record;
ck_epoch_begin(record, NULL);
}
static __inline void
epoch_exit_preempt(epoch_t epoch, epoch_tracker_t et)
{
struct epoch_record *er;
struct epoch_thread *etd;
struct thread_lite *td;
INIT_CHECK(epoch);
td = (struct thread_lite *)curthread;
critical_enter_sa(td);
sched_unpin_lite(td);
MPASS(td->td_epochnest);
td->td_epochnest--;
er = epoch->e_pcpu[curcpu];
MPASS(epoch->e_flags & EPOCH_PREEMPT);
etd = (void *)et;
#ifdef INVARIANTS
MPASS(etd != NULL);
MPASS(etd->et_td == (struct thread *)td);
MPASS(etd->et_magic_pre == EPOCH_MAGIC0);
MPASS(etd->et_magic_post == EPOCH_MAGIC1);
etd->et_magic_pre = 0;
etd->et_magic_post = 0;
etd->et_td = (void*)0xDEADBEEF;
#endif
ck_epoch_end(&er->er_record,
(ck_epoch_section_t *)&etd->et_section);
TAILQ_REMOVE(&er->er_tdlist, etd, et_link);
er->er_gen++;
if (__predict_false(td->td_pre_epoch_prio != td->td_priority))
epoch_adjust_prio((struct thread *)td, td->td_pre_epoch_prio);
critical_exit_sa(td);
}
static __inline void
epoch_exit(epoch_t epoch)
{
ck_epoch_record_t *record;
struct thread_lite *td;
INIT_CHECK(epoch);
td = (struct thread_lite *)curthread;
MPASS(td->td_epochnest);
td->td_epochnest--;
record = &epoch->e_pcpu[curcpu]->er_record;
ck_epoch_end(record, NULL);
critical_exit_sa(td);
}
#endif /* _KERNEL */
#endif /* _SYS_EPOCH_PRIVATE_H_ */

View File

@ -201,11 +201,12 @@ extern struct pmc_domain_buffer_header *pmc_dom_hdrs[MAXMEMDOM];
/* Hook invocation; for use within the kernel */ /* Hook invocation; for use within the kernel */
#define PMC_CALL_HOOK(t, cmd, arg) \ #define PMC_CALL_HOOK(t, cmd, arg) \
do { \ do { \
epoch_enter_preempt(global_epoch_preempt); \ struct epoch_tracker et; \
epoch_enter_preempt(global_epoch_preempt, &et); \
if (pmc_hook != NULL) \ if (pmc_hook != NULL) \
(pmc_hook)((t), (cmd), (arg)); \ (pmc_hook)((t), (cmd), (arg)); \
epoch_exit_preempt(global_epoch_preempt); \ epoch_exit_preempt(global_epoch_preempt, &et); \
} while (0) } while (0)
/* Hook invocation that needs an exclusive lock */ /* Hook invocation that needs an exclusive lock */

View File

@ -74,19 +74,6 @@
#include <machine/cpu.h> #include <machine/cpu.h>
#endif #endif
/*
* A section object may be passed to every begin-end pair to allow for
* forward progress guarantees with-in prolonged active sections.
*
* We can't include ck_epoch.h so we define our own variant here and
* then CTASSERT that it's the same size in subr_epoch.c
*/
struct epoch_section {
unsigned int bucket;
};
typedef struct epoch_section epoch_section_t;
/* /*
* One structure allocated per session. * One structure allocated per session.
* *
@ -373,8 +360,6 @@ struct thread {
int td_lastcpu; /* (t) Last cpu we were on. */ int td_lastcpu; /* (t) Last cpu we were on. */
int td_oncpu; /* (t) Which cpu we are on. */ int td_oncpu; /* (t) Which cpu we are on. */
void *td_lkpi_task; /* LinuxKPI task struct pointer */ void *td_lkpi_task; /* LinuxKPI task struct pointer */
TAILQ_ENTRY(thread) td_epochq; /* (t) Epoch queue. */
epoch_section_t td_epoch_section; /* (t) epoch section object */
int td_pmcpend; int td_pmcpend;
}; };