support thread migration and refactor kqdom locks

This commit is contained in:
Oscar Zhao 2021-05-21 14:38:08 -04:00
parent 0aa6e4d988
commit 49b76286b6
2 changed files with 103 additions and 42 deletions

View File

@ -195,6 +195,7 @@ static int kevq_total_knote(struct kevq *kevq);
static int kevq_avail_knote(struct kevq *kevq); static int kevq_avail_knote(struct kevq *kevq);
static void kevq_insert_head_knote(struct kevq *kevq, struct knote *kn); static void kevq_insert_head_knote(struct kevq *kevq, struct knote *kn);
static void knote_enqueue_head(struct knote *kn, struct kevq *kevq); static void knote_enqueue_head(struct knote *kn, struct kevq *kevq);
static void kevq_update_kqdom(struct kevq *kevq, struct thread *td);
static int kqueue_acquire_kevq(struct file *fp, struct thread *td, struct kqueue **kqp, struct kevq **kevq); static int kqueue_acquire_kevq(struct file *fp, struct thread *td, struct kqueue **kqp, struct kevq **kevq);
static void kqueue_ensure_kqdom(struct kqueue *kq); static void kqueue_ensure_kqdom(struct kqueue *kq);
@ -384,25 +385,24 @@ SYSCTL_UINT(_kern, OID_AUTO, kq_calloutmax, CTLFLAG_RW,
#define KEVQ_NOTOWNED(kevq) do { \ #define KEVQ_NOTOWNED(kevq) do { \
mtx_assert(&(kevq)->lock, MA_NOTOWNED); \ mtx_assert(&(kevq)->lock, MA_NOTOWNED); \
} while (0) } while (0)
#define KQD_ROWNED(kqd) do { \ #define KQD_ROWNED(kq) do { \
rw_assert(&(kqd)->kqd_lock, RA_RLOCKED); \ rw_assert(&kq->kqd_lock, RA_RLOCKED); \
} while (0) } while (0)
#define KQD_WOWNED(kqd) do { \ #define KQD_WOWNED(kq) do { \
rw_assert(&(kqd)->kqd_lock, RA_WLOCKED); \ rw_assert(&kq->kqd_lock, RA_WLOCKED); \
} while (0) } while (0)
#define KQD_RLOCK(kqd) do { \ #define KQD_RLOCK(kq) do { \
rw_rlock(&kqd->kqd_lock); \ rw_rlock(&kq->kqd_lock); \
} while (0) } while (0)
#define KQD_WLOCK(kqd) do { \ #define KQD_WLOCK(kq) do { \
rw_wlock(&kqd->kqd_lock); \ rw_wlock(&kq->kqd_lock); \
} while (0) } while (0)
#define KQD_WUNLOCK(kqd) do { \ #define KQD_WUNLOCK(kq) do { \
rw_wunlock(&kqd->kqd_lock); \ rw_wunlock(&kq->kqd_lock); \
} while (0) } while (0)
#define KQD_RUNLOCK(kqd) do { \ #define KQD_RUNLOCK(kq) do { \
rw_runlock(&kqd->kqd_lock); \ rw_runlock(&kq->kqd_lock); \
} while (0) } while (0)
#define KVLST_ROWNED(kq) do { \ #define KVLST_ROWNED(kq) do { \
rw_assert(&(kq)->kevq_vlist_lk, RA_RLOCKED); \ rw_assert(&(kq)->kevq_vlist_lk, RA_RLOCKED); \
} while (0) } while (0)
@ -1359,6 +1359,7 @@ kqueue_init(struct kqueue *kq)
/* XXX: move these guys to init only when needed */ /* XXX: move these guys to init only when needed */
veclist_init(&kq->kevq_vlist, 0, M_KQUEUE); veclist_init(&kq->kevq_vlist, 0, M_KQUEUE);
rw_init(&kq->kevq_vlist_lk, "kevq_vlist_lk"); rw_init(&kq->kevq_vlist_lk, "kevq_vlist_lk");
rw_init(&kq->kqd_lock, "kqdom_lock");
kqueue_ensure_kqdom(kq); kqueue_ensure_kqdom(kq);
kq->kq_rtshare = KQ_RTSHARE_DEFAULT; kq->kq_rtshare = KQ_RTSHARE_DEFAULT;
@ -1676,6 +1677,9 @@ kqueue_kevent(struct kqueue *kq, struct kevq *kevq, struct thread *td, int nchan
} }
} }
/* adjust kevq kqdom */
nerrors = 0; nerrors = 0;
while (nchanges > 0) { while (nchanges > 0) {
n = nchanges > KQ_NEVENTS ? KQ_NEVENTS : nchanges; n = nchanges > KQ_NEVENTS ? KQ_NEVENTS : nchanges;
@ -2359,6 +2363,34 @@ kqueue_ensure_kqdom(struct kqueue *kq)
} }
} }
static void
kevq_update_kqdom(struct kevq *kevq, struct thread *td)
{
struct kqdom * kqd;
KEVQ_NOTOWNED(kevq);
if (CPU_ISSET(td->td_oncpu, &kevq->kevq_kqd->cpu_mask)) {
/* quick fail. Actually this is not required as only one kthread accesses a kevq at a time. */
return;
}
/* otherwise assuming thread has migrated */
KQD_WLOCK(kevq->kq);
KEVQ_LOCK(kevq);
if (!CPU_ISSET(td->td_oncpu, &kevq->kevq_kqd->cpu_mask)) {
CTR2(KTR_KQ, "kevq_update_kqdom: kevq: %p old cpu: %d new cpu: %d", CPU_FFS(&kevq->kevq_kqd->cpu_mask), td->td_oncpu);
kqd = kqdom_find(kevq->kq->kq_kqd, td->td_oncpu);
kqdom_remove(kevq->kevq_kqd, kevq);
kqdom_insert(kqd, kevq);
kevq->kevq_kqd = kqd;
}
KEVQ_UNLOCK(kevq);
KQD_WUNLOCK(kevq->kq);
}
static int static int
kqueue_acquire_kevq(struct file *fp, struct thread *td, struct kqueue **kqp, struct kevq **kevqp) kqueue_acquire_kevq(struct file *fp, struct thread *td, struct kqueue **kqp, struct kevq **kevqp)
{ {
@ -2466,7 +2498,6 @@ kqdom_init(struct kqdom *kqd)
veclist_init(&kqd->children, 0, M_KQUEUE); veclist_init(&kqd->children, 0, M_KQUEUE);
veclist_init(&kqd->kqd_activelist, 0, M_KQUEUE); veclist_init(&kqd->kqd_activelist, 0, M_KQUEUE);
veclist_init(&kqd->kqd_kevqs, 0, M_KQUEUE); veclist_init(&kqd->kqd_kevqs, 0, M_KQUEUE);
rw_init(&kqd->kqd_lock, "kqdom_lock");
} }
static int static int
@ -2480,11 +2511,13 @@ static void
kqdom_insert(struct kqdom *kqd, struct kevq *kevq) kqdom_insert(struct kqdom *kqd, struct kevq *kevq)
{ {
int err, sz; int err, sz;
struct kqueue * kq;
kq = kevq->kq;
KASSERT(kqdom_is_leaf(kqd), ("inserting into a non-leaf kqdom")); KQD_WOWNED(kq);
KASSERT(kqdom_is_leaf(kqd), ("removing from a non-leaf kqdom"));
CTR2(KTR_KQ, "kqdom_insert: kevq: %p kqdom %d", kevq, kqd->id); CTR2(KTR_KQ, "kqdom_insert: kevq: %p kqdom %d", kevq, kqd->id);
KQD_WLOCK(kqd);
err = veclist_insert_tail(&kqd->kqd_kevqs, kevq); err = veclist_insert_tail(&kqd->kqd_kevqs, kevq);
/* XXX: this is a hack, need to handle ENOMEM */ /* XXX: this is a hack, need to handle ENOMEM */
if (err) { if (err) {
@ -2492,10 +2525,18 @@ kqdom_insert(struct kqdom *kqd, struct kevq *kevq)
} }
sz = veclist_size(&kqd->kqd_kevqs); sz = veclist_size(&kqd->kqd_kevqs);
KQD_WUNLOCK(kqd);
if (sz == 1) if (sz == 1) {
kqdom_update_parents(kqd, KQDIR_ACTIVE); kqdom_update_parents(kqd, KQDIR_ACTIVE);
}
}
static void
kqdom_insert_ul(struct kqdom *kqd, struct kevq *kevq)
{
KQD_WLOCK(kevq->kq);
kqdom_insert(kqd, kevq);
KQD_WUNLOCK(kevq->kq);
} }
/* removes a kevq from a leaf kqdom */ /* removes a kevq from a leaf kqdom */
@ -2503,17 +2544,27 @@ static void
kqdom_remove(struct kqdom *kqd, struct kevq *kevq) kqdom_remove(struct kqdom *kqd, struct kevq *kevq)
{ {
int sz; int sz;
struct kqueue * kq;
kq = kevq->kq;
KQD_WOWNED(kq);
KASSERT(kqdom_is_leaf(kqd), ("removing from a non-leaf kqdom")); KASSERT(kqdom_is_leaf(kqd), ("removing from a non-leaf kqdom"));
CTR2(KTR_KQ, "kqdom_remove: kevq: %p kqdom %d", kevq, kqd->id); CTR2(KTR_KQ, "kqdom_remove: kevq: %p kqdom %d", kevq, kqd->id);
KQD_WLOCK(kqd);
veclist_remove(&kqd->kqd_kevqs, kevq); veclist_remove(&kqd->kqd_kevqs, kevq);
sz = veclist_size(&kqd->kqd_kevqs); sz = veclist_size(&kqd->kqd_kevqs);
KQD_WUNLOCK(kqd);
if (sz == 0) if (sz == 0) {
kqdom_update_parents(kqd, KQDIR_INACTIVE); kqdom_update_parents(kqd, KQDIR_INACTIVE);
}
}
static void
kqdom_remove_ul(struct kqdom *kqd, struct kevq *kevq)
{
KQD_WLOCK(kevq->kq);
kqdom_remove(kqd, kevq);
KQD_WUNLOCK(kevq->kq);
} }
static void static void
@ -2529,8 +2580,6 @@ kqdom_destroy(struct kqdom *root)
veclist_destroy(&root->kqd_activelist); veclist_destroy(&root->kqd_activelist);
veclist_destroy(&root->children); veclist_destroy(&root->children);
rw_destroy(&root->kqd_lock);
free(root, M_KQUEUE); free(root, M_KQUEUE);
} }
@ -2573,7 +2622,7 @@ kevq_dump(struct sbuf *buf, struct kevq *kevq, int level)
static void static void
kqdom_dump(struct sbuf *buf, struct kqdom *kqd, int level) kqdom_dump(struct sbuf *buf, struct kqdom *kqd, int level)
{ {
/* XXX: No potential race between this and kqdom_build() for now. /* XXX: No potential race between this and kqdom_build() for now.
* If we move kqdom_build() out of kqueue() syscall then there is a potential race */ * If we move kqdom_build() out of kqueue() syscall then there is a potential race */
sbuf_printf(buf, "%*c<kqdom id=\"%d\" level=\"%d\" cpu_mask=\"0x%lx\" num_children=\"%d\" num_active=\"%d\" leaf=\"%d\" num_kevq=\"%d\">\n", level * DUMP_INDENT, ' ', sbuf_printf(buf, "%*c<kqdom id=\"%d\" level=\"%d\" cpu_mask=\"0x%lx\" num_children=\"%d\" num_active=\"%d\" leaf=\"%d\" num_kevq=\"%d\">\n", level * DUMP_INDENT, ' ',
@ -2586,12 +2635,10 @@ kqdom_dump(struct sbuf *buf, struct kqdom *kqd, int level)
veclist_size(&kqd->kqd_kevqs)); veclist_size(&kqd->kqd_kevqs));
if (kqdom_is_leaf(kqd)) { if (kqdom_is_leaf(kqd)) {
KQD_RLOCK(kqd);
/* print all kevqs */ /* print all kevqs */
for (int i = 0; i < veclist_size(&kqd->kqd_kevqs); i++) { for (int i = 0; i < veclist_size(&kqd->kqd_kevqs); i++) {
kevq_dump(buf, veclist_at(&kqd->kqd_kevqs, i), level + 1); kevq_dump(buf, veclist_at(&kqd->kqd_kevqs, i), level + 1);
} }
KQD_RUNLOCK(kqd);
} else { } else {
for(int i = 0; i < veclist_size(&kqd->children); i++) { for(int i = 0; i < veclist_size(&kqd->children); i++) {
kqdom_dump(buf, veclist_at(&kqd->children, i), level + 1); kqdom_dump(buf, veclist_at(&kqd->children, i), level + 1);
@ -2623,9 +2670,7 @@ kqdom_update_parents(struct kqdom *kqd, int direction)
if(kqd == NULL) if(kqd == NULL)
break; break;
KQD_WLOCK(kqd);
CTR3(KTR_KQ, "kqdom_update_parents: %d updating kqdom %d with %d active children", direction, kqd->id, veclist_size(&kqd->kqd_activelist)); CTR3(KTR_KQ, "kqdom_update_parents: %d updating kqdom %d with %d active children", direction, kqd->id, veclist_size(&kqd->kqd_activelist));
if (direction == KQDIR_INACTIVE) { if (direction == KQDIR_INACTIVE) {
@ -2651,7 +2696,6 @@ kqdom_update_parents(struct kqdom *kqd, int direction)
cont = 0; cont = 0;
} }
} }
KQD_WUNLOCK(kqd);
} }
} }
@ -2983,7 +3027,6 @@ kevq_activate(struct kevq *kevq, struct thread *td)
CTR1(KTR_KQ, "kevq_activate: kevq %p", kevq); CTR1(KTR_KQ, "kevq_activate: kevq %p", kevq);
KEVQ_LOCK(kevq); KEVQ_LOCK(kevq);
kevq->kevq_state |= KEVQ_ACTIVE; kevq->kevq_state |= KEVQ_ACTIVE;
KEVQ_UNLOCK(kevq); KEVQ_UNLOCK(kevq);
@ -3001,10 +3044,13 @@ kevq_activate(struct kevq *kevq, struct thread *td)
/* assign to the proper kqdom */ /* assign to the proper kqdom */
KASSERT(kq->kq_kqd != NULL, ("kqdom doesn't exist after referecing kq")); KASSERT(kq->kq_kqd != NULL, ("kqdom doesn't exist after referecing kq"));
KQD_WLOCK(kq);
KEVQ_LOCK(kevq);
kqd = kqdom_find(kq->kq_kqd, td->td_oncpu); kqd = kqdom_find(kq->kq_kqd, td->td_oncpu);
kevq->kevq_kqd = kqd; kevq->kevq_kqd = kqd;
kqdom_insert(kqd, kevq); kqdom_insert(kqd, kevq);
KEVQ_UNLOCK(kevq);
KQD_WUNLOCK(kq);
} }
} }
} }
@ -3087,7 +3133,13 @@ kqueue_scan(struct kevq *kevq, int maxevents, struct kevent_copyops *k_ops,
if ((kevq->kevq_state & KEVQ_ACTIVE) == 0) { if ((kevq->kevq_state & KEVQ_ACTIVE) == 0) {
/* activate kq if not already activated */ /* activate kq if not already activated */
kevq_activate(kevq, td); kevq_activate(kevq, td);
} else {
if ((kq->kq_flags & KQ_FLAG_MULTI) && (KQSCHED_GET_SCHED(kq) & KQDOM_FLAGS)) {
kevq_update_kqdom(kevq, td);
}
} }
KEVQ_LOCK(kevq); KEVQ_LOCK(kevq);
/* release processing knotes first */ /* release processing knotes first */
kevq_rel_proc_kn(kevq); kevq_rel_proc_kn(kevq);
@ -3568,9 +3620,11 @@ kqueue_dump(struct kqueue *kq, struct sbuf *buf)
/* dump kqdom if used */ /* dump kqdom if used */
if (KQSCHED_GET_SCHED(kq) & KQDOM_FLAGS) { if (KQSCHED_GET_SCHED(kq) & KQDOM_FLAGS) {
KQD_RLOCK(kq);
sbuf_printf(buf, "\n%*c<kqdom_dump>\n", 1 * DUMP_INDENT, ' '); sbuf_printf(buf, "\n%*c<kqdom_dump>\n", 1 * DUMP_INDENT, ' ');
kqdom_dump(buf, kq->kq_kqd, 2); kqdom_dump(buf, kq->kq_kqd, 2);
sbuf_printf(buf, "%*c</kqdom_dump>\n", 1 * DUMP_INDENT, ' '); sbuf_printf(buf, "%*c</kqdom_dump>\n", 1 * DUMP_INDENT, ' ');
KQD_RUNLOCK(kq);
} }
sbuf_printf(buf, "\n</kq_dump>\n"); sbuf_printf(buf, "\n</kq_dump>\n");
} }
@ -3774,7 +3828,6 @@ kevq_drain(struct kevq *kevq, struct thread *td)
struct kevqlist *kevq_list; struct kevqlist *kevq_list;
CTR3(KTR_KQ, "kevq_drain for %p (refcnt = %d) with %d knotes", kevq, kevq->kevq_refcnt, kevq_total_knote(kevq)); CTR3(KTR_KQ, "kevq_drain for %p (refcnt = %d) with %d knotes", kevq, kevq->kevq_refcnt, kevq_total_knote(kevq));
kq = kevq->kq; kq = kevq->kq;
kqd = kevq->kevq_kqd;
KQ_NOTOWNED(kq); KQ_NOTOWNED(kq);
KEVQ_NOTOWNED(kevq); KEVQ_NOTOWNED(kevq);
@ -3797,6 +3850,9 @@ kevq_drain(struct kevq *kevq, struct thread *td)
KASSERT(kevq->kevq_refcnt == 1, ("other refs of kevq are out there!")); KASSERT(kevq->kevq_refcnt == 1, ("other refs of kevq are out there!"));
KEVQ_UNLOCK(kevq); KEVQ_UNLOCK(kevq);
/* read kevq kqd after setting the closing flag and no extra references to ensure this doesn't change*/
kqd = kevq->kevq_kqd;
/* remove the kevq from queues first */ /* remove the kevq from queues first */
if ((kq->kq_flags & KQ_FLAG_MULTI) == KQ_FLAG_MULTI) { if ((kq->kq_flags & KQ_FLAG_MULTI) == KQ_FLAG_MULTI) {
KQ_LOCK(kq); KQ_LOCK(kq);
@ -3820,7 +3876,7 @@ kevq_drain(struct kevq *kevq, struct thread *td)
KVLST_WUNLOCK(kq); KVLST_WUNLOCK(kq);
if (KQSCHED_GET_SCHED(kq) & KQDOM_FLAGS) { if (KQSCHED_GET_SCHED(kq) & KQDOM_FLAGS) {
kqdom_remove(kqd, kevq); kqdom_remove_ul(kqd, kevq);
} }
} }
} else { } else {
@ -3830,7 +3886,7 @@ kevq_drain(struct kevq *kevq, struct thread *td)
} }
KEVQ_LOCK(kevq); KEVQ_LOCK(kevq);
/* release all kntoes processed by this dude */ /* release all kntoes processed by the kevq */
kevq_rel_proc_kn(kevq); kevq_rel_proc_kn(kevq);
/* drain all knotes on the kevq */ /* drain all knotes on the kevq */
while ((kn = kevq_peek_knote(kevq)) != NULL) { while ((kn = kevq_peek_knote(kevq)) != NULL) {
@ -4014,6 +4070,7 @@ kqueue_destroy(struct kqueue *kq)
/* XXX: move these guys to be destroyed earlier, like kqdom */ /* XXX: move these guys to be destroyed earlier, like kqdom */
rw_destroy(&kq->kevq_vlist_lk); rw_destroy(&kq->kevq_vlist_lk);
rw_destroy(&kq->kqd_lock);
veclist_destroy(&kq->kevq_vlist); veclist_destroy(&kq->kevq_vlist);
if (kq->kq_knhash != NULL) if (kq->kq_knhash != NULL)
@ -4798,7 +4855,12 @@ knote_next_kevq(struct knote *kn)
done_cq: done_cq:
KASSERT(kqdom_is_leaf(kqd), ("found kqdom not leaf")); KASSERT(kqdom_is_leaf(kqd), ("found kqdom not leaf"));
KQD_RLOCK(kqd); KQD_RLOCK(kq);
if (veclist_size(&kqd->kqd_kevqs) == 0) {
next_kevq = NULL;
KQD_RUNLOCK(kq);
goto end_cq;
}
/* pick a random kevq */ /* pick a random kevq */
next_kevq = kvlst_sel_kevq(&kqd->kqd_kevqs, 1, kqueue_random(&kn->kn_rand_seed), NULL, NULL); next_kevq = kvlst_sel_kevq(&kqd->kqd_kevqs, 1, kqueue_random(&kn->kn_rand_seed), NULL, NULL);
@ -4808,7 +4870,7 @@ knote_next_kevq(struct knote *kn)
if (next_kevq == NULL || (other_kevq != NULL && kevq_lat_wcmp(other_kevq, next_kevq) > 0)) { if (next_kevq == NULL || (other_kevq != NULL && kevq_lat_wcmp(other_kevq, next_kevq) > 0)) {
next_kevq = other_kevq; next_kevq = other_kevq;
CTR1(KTR_KQ, "knote_next_kevq: [QUEUE%d] new selected kevq: %p", next_kevq); CTR2(KTR_KQ, "knote_next_kevq: [QUEUE%d] new selected kevq: %p", sargs, next_kevq);
} }
} }
@ -4818,15 +4880,15 @@ knote_next_kevq(struct knote *kn)
if (sargs > 0) { if (sargs > 0) {
KVLST_RUNLOCK(kq); KVLST_RUNLOCK(kq);
} }
KQD_RUNLOCK(kqd); KQD_RUNLOCK(kq);
if (kqd_mismatch && next_kevq != NULL) { if (kqd_mismatch && next_kevq != NULL) {
next_kevq->kevq_tot_kqd_mismatch++; next_kevq->kevq_tot_kqd_mismatch++;
} }
end_cq:
CTR3(KTR_KQ, "knote_next_kevq: [QUEUE/CPU%d] next kevq %p for kn %p", sargs, next_kevq, kn); CTR3(KTR_KQ, "knote_next_kevq: [QUEUE/CPU%d] next kevq %p for kn %p", sargs, next_kevq, kn);
break; break;
case KQ_SCHED_BEST: case KQ_SCHED_BEST:
KVLST_RLOCK(kq); KVLST_RLOCK(kq);
@ -4834,7 +4896,7 @@ knote_next_kevq(struct knote *kn)
next_kevq = kevq_lock_check_avail(next_kevq); next_kevq = kevq_lock_check_avail(next_kevq);
KVLST_RUNLOCK(kq); KVLST_RUNLOCK(kq);
CTR3(KTR_KQ, "knote_next_kevq: [BOT%d] next kevq %p for kn %p", sargs, next_kevq, kn); CTR3(KTR_KQ, "knote_next_kevq: [BEST%d] next kevq %p for kn %p", sargs, next_kevq, kn);
break; break;
default: default:
@ -4871,7 +4933,6 @@ knote_next_kevq(struct knote *kn)
return next_kevq; return next_kevq;
} }
// if no kevqs are available for queueing, returns NULL
static void static void
knote_sched(struct knote *kn) knote_sched(struct knote *kn)
{ {

View File

@ -105,7 +105,6 @@ struct kevq {
struct kqdom { struct kqdom {
/* static */ /* static */
int id; int id;
struct rwlock kqd_lock;
struct kqdom *parent; struct kqdom *parent;
cpuset_t cpu_mask; cpuset_t cpu_mask;
struct veclist children; /* child kqdoms */ struct veclist children; /* child kqdoms */
@ -160,6 +159,7 @@ struct kqueue {
struct veclist kevq_vlist; struct veclist kevq_vlist;
/* CPU queue */ /* CPU queue */
struct rwlock kqd_lock;
struct kqdom *kq_kqd; /* root domain */ struct kqdom *kq_kqd; /* root domain */
}; };