ip(6)_freemoptions: defer imo destruction to epoch callback task

Avoid the ugly unlock / lock of the inpcbinfo where we need to
figure out what kind of lock we hold by simply deferring the
operation to another context. (Also a small dependency for
converting the pcbinfo read lock to epoch)
This commit is contained in:
Matt Macy 2018-05-20 00:22:28 +00:00
parent 23d123c6cf
commit cb6bb2303e
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=333905
6 changed files with 44 additions and 80 deletions

View File

@ -165,8 +165,6 @@ static void inm_reap(struct in_multi *);
static void inm_release(struct in_multi *);
static struct ip_moptions *
inp_findmoptions(struct inpcb *);
static void inp_freemoptions_internal(struct ip_moptions *);
static void inp_gcmoptions(void *, int);
static int inp_get_source_filters(struct inpcb *, struct sockopt *);
static int inp_join_group(struct inpcb *, struct sockopt *);
static int inp_leave_group(struct inpcb *, struct sockopt *);
@ -199,10 +197,6 @@ static SYSCTL_NODE(_net_inet_ip_mcast, OID_AUTO, filters,
CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_ip_mcast_filters,
"Per-interface stack-wide source filters");
static STAILQ_HEAD(, ip_moptions) imo_gc_list =
STAILQ_HEAD_INITIALIZER(imo_gc_list);
static struct task imo_gc_task = TASK_INITIALIZER(0, inp_gcmoptions, NULL);
#ifdef KTR
/*
* Inline function which wraps assertions for a valid ifp.
@ -1665,46 +1659,15 @@ inp_findmoptions(struct inpcb *inp)
return (imo);
}
/*
* Discard the IP multicast options (and source filters). To minimize
* the amount of work done while holding locks such as the INP's
* pcbinfo lock (which is used in the receive path), the free
* operation is performed asynchronously in a separate task.
*
* SMPng: NOTE: assumes INP write lock is held.
*/
void
inp_freemoptions(struct ip_moptions *imo, struct inpcbinfo *pcbinfo)
{
int wlock;
if (imo == NULL)
return;
INP_INFO_LOCK_ASSERT(pcbinfo);
wlock = INP_INFO_WLOCKED(pcbinfo);
if (wlock)
INP_INFO_WUNLOCK(pcbinfo);
else
INP_INFO_RUNLOCK(pcbinfo);
KASSERT(imo != NULL, ("%s: ip_moptions is NULL", __func__));
IN_MULTI_LIST_LOCK();
STAILQ_INSERT_TAIL(&imo_gc_list, imo, imo_link);
IN_MULTI_LIST_UNLOCK();
taskqueue_enqueue(taskqueue_thread, &imo_gc_task);
if (wlock)
INP_INFO_WLOCK(pcbinfo);
else
INP_INFO_RLOCK(pcbinfo);
}
static void
inp_freemoptions_internal(struct ip_moptions *imo)
inp_gcmoptions(epoch_context_t ctx)
{
struct ip_moptions *imo;
struct in_mfilter *imf;
size_t idx, nmships;
imo = __containerof(ctx, struct ip_moptions, imo_epoch_ctx);
nmships = imo->imo_num_memberships;
for (idx = 0; idx < nmships; ++idx) {
imf = imo->imo_mfilters ? &imo->imo_mfilters[idx] : NULL;
@ -1721,20 +1684,18 @@ inp_freemoptions_internal(struct ip_moptions *imo)
free(imo, M_IPMOPTS);
}
static void
inp_gcmoptions(void *context, int pending)
/*
* Discard the IP multicast options (and source filters). To minimize
* the amount of work done while holding locks such as the INP's
* pcbinfo lock (which is used in the receive path), the free
* operation is deferred to the epoch callback task.
*/
void
inp_freemoptions(struct ip_moptions *imo)
{
struct ip_moptions *imo;
IN_MULTI_LIST_LOCK();
while (!STAILQ_EMPTY(&imo_gc_list)) {
imo = STAILQ_FIRST(&imo_gc_list);
STAILQ_REMOVE_HEAD(&imo_gc_list, imo_link);
IN_MULTI_LIST_UNLOCK();
inp_freemoptions_internal(imo);
IN_MULTI_LIST_LOCK();
}
IN_MULTI_LIST_UNLOCK();
if (imo == NULL)
return;
epoch_call(net_epoch_preempt, &imo->imo_epoch_ctx, inp_gcmoptions);
}
/*

View File

@ -1381,19 +1381,15 @@ in_pcbfree(struct inpcb *inp)
crfree(inp->inp_cred);
#ifdef MAC
mac_inpcb_destroy(inp);
#endif
if (!in_pcbrele_wlocked(inp))
INP_WUNLOCK(inp);
#if defined(INET) && defined(INET6)
if (imo == NULL && im6o == NULL)
return;
#endif
#ifdef INET6
ip6_freemoptions(im6o, pcbinfo);
ip6_freemoptions(im6o);
#endif
#ifdef INET
inp_freemoptions(imo, pcbinfo);
inp_freemoptions(imo);
#endif
if (!in_pcbrele_wlocked(inp))
INP_WUNLOCK(inp);
}
/*
@ -1545,6 +1541,8 @@ in_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp)
/*
* Drop multicast group membership if we joined
* through the interface being detached.
*
* XXX This can all be deferred to an epoch_call
*/
for (i = 0, gap = 0; i < imo->imo_num_memberships;
i++) {

View File

@ -36,6 +36,7 @@
#define _NETINET_IP_VAR_H_
#include <sys/queue.h>
#include <sys/epoch.h>
/*
* Overlay for ip header used by other protocols (tcp, udp).
@ -95,7 +96,7 @@ struct ip_moptions {
u_short imo_max_memberships; /* max memberships this socket */
struct in_multi **imo_membership; /* group memberships */
struct in_mfilter *imo_mfilters; /* source filters */
STAILQ_ENTRY(ip_moptions) imo_link;
struct epoch_context imo_epoch_ctx;
};
struct ipstat {
@ -202,7 +203,7 @@ extern struct pr_usrreqs rip_usrreqs;
#define V_rsvp_on VNET(rsvp_on)
#define V_drop_redirect VNET(drop_redirect)
void inp_freemoptions(struct ip_moptions *, struct inpcbinfo *);
void inp_freemoptions(struct ip_moptions *);
int inp_getmoptions(struct inpcb *, struct sockopt *);
int inp_setmoptions(struct inpcb *, struct sockopt *);

View File

@ -1616,22 +1616,19 @@ in6p_findmoptions(struct inpcb *inp)
* Discard the IPv6 multicast options (and source filters).
*
* SMPng: NOTE: assumes INP write lock is held.
*
* XXX can all be safely deferred to epoch_call
*
*/
void
ip6_freemoptions(struct ip6_moptions *imo, struct inpcbinfo *pcbinfo)
static void
inp_gcmoptions(epoch_context_t ctx)
{
struct ip6_moptions *imo;
struct in6_mfilter *imf;
size_t idx, nmships;
int wlock;
if (imo == NULL)
return;
INP_INFO_LOCK_ASSERT(pcbinfo);
wlock = INP_INFO_WLOCKED(pcbinfo);
if (wlock)
INP_INFO_WUNLOCK(pcbinfo);
else
INP_INFO_RUNLOCK(pcbinfo);
imo = __containerof(ctx, struct ip6_moptions, imo6_epoch_ctx);
nmships = imo->im6o_num_memberships;
for (idx = 0; idx < nmships; ++idx) {
@ -1648,10 +1645,14 @@ ip6_freemoptions(struct ip6_moptions *imo, struct inpcbinfo *pcbinfo)
free(imo->im6o_mfilters, M_IN6MFILTER);
free(imo->im6o_membership, M_IP6MOPTS);
free(imo, M_IP6MOPTS);
if (wlock)
INP_INFO_WLOCK(pcbinfo);
else
INP_INFO_RLOCK(pcbinfo);
}
void
ip6_freemoptions(struct ip6_moptions *imo)
{
if (imo == NULL)
return;
epoch_call(net_epoch_preempt, &imo->imo6_epoch_ctx, inp_gcmoptions);
}
/*

View File

@ -810,7 +810,7 @@ void in6m_print(const struct in6_multi *);
int in6m_record_source(struct in6_multi *, const struct in6_addr *);
void in6m_release_deferred(struct in6_multi *);
void in6m_release_list_deferred(struct in6_multi_head *);
void ip6_freemoptions(struct ip6_moptions *, struct inpcbinfo *);
void ip6_freemoptions(struct ip6_moptions *);
int ip6_getmoptions(struct inpcb *, struct sockopt *);
int ip6_setmoptions(struct inpcb *, struct sockopt *);

View File

@ -66,6 +66,8 @@
#ifndef _NETINET6_IP6_VAR_H_
#define _NETINET6_IP6_VAR_H_
#include <sys/epoch.h>
/*
* IP6 reassembly queue structure. Each fragment
* being reassembled is attached to one of these structures.
@ -121,6 +123,7 @@ struct ip6_moptions {
u_short im6o_max_memberships; /* max memberships this socket */
struct in6_multi **im6o_membership; /* group memberships */
struct in6_mfilter *im6o_mfilters; /* source filters */
struct epoch_context imo6_epoch_ctx;
};
/*