random kevq changed to rw lock

This commit is contained in:
Oscar Zhao 2019-08-26 17:54:58 -04:00
parent 8e73e73197
commit f085e08d18
5 changed files with 263 additions and 187 deletions

View File

@ -76,6 +76,7 @@ __FBSDID("$FreeBSD$");
#include <sys/veclist.h> #include <sys/veclist.h>
#include <sys/stdint.h> #include <sys/stdint.h>
#include <sys/libkern.h> #include <sys/libkern.h>
#include <sys/rwlock.h>
#ifdef KTRACE #ifdef KTRACE
#include <sys/ktrace.h> #include <sys/ktrace.h>
#endif #endif
@ -85,6 +86,8 @@ __FBSDID("$FreeBSD$");
static MALLOC_DEFINE(M_KQUEUE, "kqueue", "memory for kqueue system"); static MALLOC_DEFINE(M_KQUEUE, "kqueue", "memory for kqueue system");
#define KQDOM_FLAGS ((KQ_SCHED_CPU) | (KQ_SCHED_WS) | (KQ_SCHED_QUEUE))
#define KQLST_FLAGS (KQ_SCHED_BOT)
/* /*
* This lock is used if multiple kq locks are required. This possibly * This lock is used if multiple kq locks are required. This possibly
* should be made into a per proc lock. * should be made into a per proc lock.
@ -138,12 +141,6 @@ extern struct cpu_group *cpu_top;
} \ } \
} while(0) } while(0)
static inline int
need_track_latency(struct kqueue *kq)
{
return (kq->kq_flags & KQ_FLAG_MULTI) != 0 && (kq->kq_sched_flags & KQ_SCHED_BEST_OF_N) != 0;
}
static inline uint64_t static inline uint64_t
timespec_to_ns(struct timespec *spec) timespec_to_ns(struct timespec *spec)
{ {
@ -162,6 +159,7 @@ static void kevq_worksteal(struct kevq *kevq);
void kevq_drain(struct kevq *kevq, struct thread *td); void kevq_drain(struct kevq *kevq, struct thread *td);
static int kqueue_acquire_kevq(struct file *fp, struct thread *td, struct kqueue **kqp, struct kevq **kevq); static int kqueue_acquire_kevq(struct file *fp, struct thread *td, struct kqueue **kqp, struct kevq **kevq);
static void kqueue_ensure_kqdom(struct kqueue *kq);
static int kevent_copyout(void *arg, struct kevent *kevp, int count); static int kevent_copyout(void *arg, struct kevent *kevp, int count);
static int kevent_copyin(void *arg, struct kevent *kevp, int count); static int kevent_copyin(void *arg, struct kevent *kevp, int count);
static int kqueue_register(struct kqueue *kq, struct kevq *kevq, static int kqueue_register(struct kqueue *kq, struct kevq *kevq,
@ -255,10 +253,9 @@ static int filt_user(struct knote *kn, long hint);
static void filt_usertouch(struct knote *kn, struct kevent *kev, static void filt_usertouch(struct knote *kn, struct kevent *kev,
u_long type); u_long type);
static int kq_sched_bon_count = 2; static int kq_sched_bot_count = 2;
SYSCTL_INT(_kern, OID_AUTO, kq_sched_bon_count, CTLFLAG_RWTUN, &kq_sched_bon_count, 0, "the number of kevqs to select the best one from"); SYSCTL_INT(_kern, OID_AUTO, kq_sched_bot_count, CTLFLAG_RWTUN, &kq_sched_bot_count, 0, "the number of kevqs to select the best one from");
/* TODO: make this a percentage? */
static int kq_sched_ws_count = 1; static int kq_sched_ws_count = 1;
SYSCTL_INT(_kern, OID_AUTO, kq_sched_ws_count, CTLFLAG_RWTUN, &kq_sched_ws_count, 0, "the number of kevqs to steal each time"); SYSCTL_INT(_kern, OID_AUTO, kq_sched_ws_count, CTLFLAG_RWTUN, &kq_sched_ws_count, 0, "the number of kevqs to steal each time");
@ -426,7 +423,6 @@ knote_enter_flux(struct knote *kn)
kn->kn_influx++; kn->kn_influx++;
} }
/* TODO: change *_ul functions to macros? */
static bool static bool
knote_leave_flux_ul(struct knote *kn) knote_leave_flux_ul(struct knote *kn)
{ {
@ -1145,13 +1141,16 @@ filt_usertouch(struct knote *kn, struct kevent *kev, u_long type)
int int
sys_kqueue(struct thread *td, struct kqueue_args *uap) sys_kqueue(struct thread *td, struct kqueue_args *uap)
{ {
return (kern_kqueue(td, 0, NULL)); return (kern_kqueue(td, 0, NULL));
} }
static void static void
kqueue_init(struct kqueue *kq) kqueue_init(struct kqueue *kq)
{ {
/* XXX: move these guys to init later, just like kqdom */
veclist_init(&kq->sched_bot_lst, 0, M_KQUEUE);
rw_init(&kq->sched_bot_lk, "kqueue_sched_bot_lk");
mtx_init(&kq->kq_lock, "kqueue", NULL, MTX_DEF | MTX_DUPOK); mtx_init(&kq->kq_lock, "kqueue", NULL, MTX_DEF | MTX_DUPOK);
knlist_init_mtx(&kq->kq_sel.si_note, &kq->kq_lock); knlist_init_mtx(&kq->kq_sel.si_note, &kq->kq_lock);
TASK_INIT(&kq->kq_task, 0, kqueue_task, kq); TASK_INIT(&kq->kq_task, 0, kqueue_task, kq);
@ -1434,9 +1433,9 @@ kqueue_kevent(struct kqueue *kq, struct kevq *kevq, struct thread *td, int nchan
KEVQ_UNLOCK(kevq); KEVQ_UNLOCK(kevq);
} }
if (need_track_latency(kq)) if (kq->kq_sched_flags & KQ_SCHED_BOT)
{ {
/* only need to do track the average latency for BON */ /* only need to do track the average latency for BOT */
KEVQ_LOCK(kevq); KEVQ_LOCK(kevq);
/* prob don't need the lock here as these are only accessible by one thread */ /* prob don't need the lock here as these are only accessible by one thread */
@ -1460,7 +1459,7 @@ kqueue_kevent(struct kqueue *kq, struct kevq *kevq, struct thread *td, int nchan
timespecclear(&kevq->kevq_last_kev); timespecclear(&kevq->kevq_last_kev);
kevq->kevq_last_nkev = 0; kevq->kevq_last_nkev = 0;
kqdom_update_lat(kevq->kevq_kqd, avg); //kqdom_update_lat(kevq->kevq_kqd, avg);
} }
KEVQ_UNLOCK(kevq); KEVQ_UNLOCK(kevq);
} }
@ -1989,11 +1988,12 @@ kevq_acquire(struct kevq *kevq, int locked)
static int static int
kqueue_obtain_kevq(struct kqueue *kq, struct thread *td, struct kevq **kevqp) kqueue_obtain_kevq(struct kqueue *kq, struct thread *td, struct kevq **kevqp)
{ {
void* to_free; void *to_free;
struct kevq_thred *kevq_th; struct kevq_thred *kevq_th;
struct kevq *kevq, *alloc_kevq; struct kevq *kevq, *alloc_kevq;
struct kevqlist *kevq_list; struct kevqlist *kevq_list;
struct kqdom *kqd; struct kqdom *kqd;
int err;
kevq = NULL; kevq = NULL;
to_free = NULL; to_free = NULL;
@ -2006,23 +2006,26 @@ kqueue_obtain_kevq(struct kqueue *kq, struct thread *td, struct kevq **kevqp)
} }
if ((kq->kq_flags & KQ_FLAG_MULTI) == KQ_FLAG_MULTI) { if ((kq->kq_flags & KQ_FLAG_MULTI) == KQ_FLAG_MULTI) {
// allocate KEVQ_TH
if (td->td_kevq_thred == NULL) { if (td->td_kevq_thred == NULL) {
/* allocate kevq_thred for each thread */
kevq_th = malloc(sizeof(struct kevq_thred), M_KQUEUE, M_WAITOK | M_ZERO); kevq_th = malloc(sizeof(struct kevq_thred), M_KQUEUE, M_WAITOK | M_ZERO);
kevq_thred_init(kevq_th); kevq_thred_init(kevq_th);
kevq_th->kevq_hash = hashinit_flags(KEVQ_HASHSIZE, M_KQUEUE, &kevq_th->kevq_hashmask , HASH_WAITOK); kevq_th->kevq_hash = hashinit_flags(KEVQ_HASHSIZE, M_KQUEUE, &kevq_th->kevq_hashmask, HASH_WAITOK);
thread_lock(td); thread_lock(td);
if (td->td_kevq_thred == NULL) { if (td->td_kevq_thred == NULL) {
td->td_kevq_thred = kevq_th; td->td_kevq_thred = kevq_th;
CTR2(KTR_KQ, "kqueue_ensure_kevq(M): allocated kevq_th %p for thread %d", kevq_th, td->td_tid); CTR2(KTR_KQ, "kqueue_ensure_kevq(M): allocated kevq_th %p for thread %d", kevq_th, td->td_tid);
} else { } else {
to_free = kevq_th; to_free = kevq_th;
kevq_th = td->td_kevq_thred; kevq_th = td->td_kevq_thred;
} }
thread_unlock(td); thread_unlock(td);
if (to_free != NULL) { if (to_free != NULL) {
free(((struct kevq_thred*)to_free)->kevq_hash, M_KQUEUE); free(((struct kevq_thred *)to_free)->kevq_hash, M_KQUEUE);
free(to_free, M_KQUEUE); free(to_free, M_KQUEUE);
} }
} else { } else {
@ -2036,51 +2039,76 @@ kqueue_obtain_kevq(struct kqueue *kq, struct thread *td, struct kevq **kevqp)
kevq = kevqlist_find(kevq_list, kq); kevq = kevqlist_find(kevq_list, kq);
KEVQ_TH_UNLOCK(kevq_th); KEVQ_TH_UNLOCK(kevq_th);
// allocate kevq /* make sure sched structs are allocated */
kqueue_ensure_kqdom(kq);
if (kevq == NULL) { if (kevq == NULL) {
/* allocate kevq */
to_free = NULL; to_free = NULL;
alloc_kevq = malloc(sizeof(struct kevq), M_KQUEUE, M_WAITOK | M_ZERO); alloc_kevq = malloc(sizeof(struct kevq), M_KQUEUE, M_WAITOK | M_ZERO);
kevq_init(alloc_kevq); kevq_init(alloc_kevq);
alloc_kevq->kq = kq; alloc_kevq->kq = kq;
alloc_kevq->kevq_th = kevq_th; alloc_kevq->kevq_th = kevq_th;
// assign the proper kqdomain CTR3(KTR_KQ, "kqueue_ensure_kevq(M): allocated kevq %p for thread %d (oncpu = %d)", alloc_kevq, td->td_tid, td->td_oncpu);
KASSERT(kq->kq_kqd != NULL, ("kqdom doesn't exist after referecing kq"));
kqd = kqdom_find(kq->kq_kqd, td->td_oncpu);
alloc_kevq->kevq_kqd = kqd;
CTR4(KTR_KQ, "kqueue_ensure_kevq(M): allocated kevq %p for thread %d (oncpu = %d), kqdom %d", alloc_kevq, td->td_tid, td->td_oncpu, kqd->id);
KQ_LOCK(kq); KQ_LOCK(kq);
KEVQ_TH_LOCK(kevq_th); KEVQ_TH_LOCK(kevq_th);
KQD_LOCK(kqd);
kevq = kevqlist_find(kevq_list, kq); kevq = kevqlist_find(kevq_list, kq);
/* TODO: probably don't need to re-check */ /* kevq should only be allocated by the current thread.
* This might only happen inside interrupt handler
* which I'm not actually sure about
* KASSERT(kevq != NULL, ("kevq double allocated"));
*/
if (kevq == NULL) { if (kevq == NULL) {
kevq = alloc_kevq; kevq = alloc_kevq;
// insert kevq to the kevq_th hash table /* insert kevq to the kevq_th hash table */
LIST_INSERT_HEAD(kevq_list, kevq, kevq_th_e); LIST_INSERT_HEAD(kevq_list, kevq, kevq_th_e);
// insert kevq to the kevq_th list, the list is used to drain kevq
/* insert kevq to the kevq_th list */
LIST_INSERT_HEAD(&kevq_th->kevq_list, kevq, kevq_th_tqe); LIST_INSERT_HEAD(&kevq_th->kevq_list, kevq, kevq_th_tqe);
/* insert into kqueue */
LIST_INSERT_HEAD(&kq->kq_kevqlist, kevq, kq_e); LIST_INSERT_HEAD(&kq->kq_kevqlist, kevq, kq_e);
/* insert into sched structures */
if (kq->kq_sched_flags & KQLST_FLAGS) {
rw_wlock(&kq->sched_bot_lk);
err = veclist_insert_tail(&kq->sched_bot_lst, kevq);
/* XXX: this is a hack, handle ENOMEM */
if (err) {
panic("sched_bot_lst insert tail failed");
}
rw_wunlock(&kq->sched_bot_lk);
}
if (kq->kq_sched_flags & KQDOM_FLAGS) {
/* assign to the proper kqdom */
KASSERT(kq->kq_kqd != NULL, ("kqdom doesn't exist after referecing kq"));
kqd = kqdom_find(kq->kq_kqd, td->td_oncpu);
alloc_kevq->kevq_kqd = kqd;
KQD_LOCK(kqd);
kqdom_insert(kqd, kevq);
KQD_UNLOCK(kqd);
}
KEVQ_TH_UNLOCK(kevq_th); KEVQ_TH_UNLOCK(kevq_th);
KQ_UNLOCK(kq); KQ_UNLOCK(kq);
kqdom_insert(kqd, kevq);
KQD_UNLOCK(kqd);
} else { } else {
to_free = alloc_kevq; to_free = alloc_kevq;
KQD_UNLOCK(kqd);
KEVQ_TH_UNLOCK(kevq_th); KEVQ_TH_UNLOCK(kevq_th);
KQ_UNLOCK(kq); KQ_UNLOCK(kq);
} }
if (to_free != NULL) { if (to_free != NULL) {
free(to_free, M_KQUEUE); free(to_free, M_KQUEUE);
} }
@ -2114,22 +2142,23 @@ kqueue_obtain_kevq(struct kqueue *kq, struct thread *td, struct kevq **kevqp)
return 0; return 0;
} }
static void static void
kqueue_check_kqdom(struct kqueue *kq) kqueue_ensure_kqdom(struct kqueue *kq)
{ {
struct kqdom* kqd; struct kqdom* kqd;
if (((kq->kq_flags & KQ_FLAG_MULTI) != 0) && (kq->kq_kqd == NULL)) { KQ_NOTOWNED(kq);
kqd = kqdom_build();
KQ_LOCK(kq);
if (kq->kq_kqd == NULL) {
kq->kq_kqd = kqd;
kqd = NULL;
}
KQ_UNLOCK(kq);
if (kqd != NULL) { kqd = kqdom_build();
kqdom_destroy(kqd);
} KQ_LOCK(kq);
if (kq->kq_kqd == NULL) {
kq->kq_kqd = kqd;
kqd = NULL;
}
KQ_UNLOCK(kq);
if (kqd != NULL) {
kqdom_destroy(kqd);
} }
} }
@ -2157,9 +2186,6 @@ kqueue_acquire_kevq(struct file *fp, struct thread *td, struct kqueue **kqp, str
KQ_UNLOCK(kq); KQ_UNLOCK(kq);
} }
/* allocate kqdoms if not present */
kqueue_check_kqdom(kq);
error = kqueue_obtain_kevq(kq, td, &kevq); error = kqueue_obtain_kevq(kq, td, &kevq);
if (error == 0) { if (error == 0) {
@ -2240,9 +2266,9 @@ kqdom_next_leaf(struct kqdom *kqd)
static void static void
kqdom_init(struct kqdom *kqd) kqdom_init(struct kqdom *kqd)
{ {
veclist_init(&kqd->children, NULL, 0); veclist_init(&kqd->children, 0, M_KQUEUE);
veclist_init(&kqd->kqd_activelist, NULL, 0); veclist_init(&kqd->kqd_activelist, 0, M_KQUEUE);
veclist_init(&kqd->kqd_kevqs, NULL, 0); veclist_init(&kqd->kqd_kevqs, 0, M_KQUEUE);
mtx_init(&kqd->kqd_lock, "kqdom_lock", NULL, MTX_DEF | MTX_DUPOK); mtx_init(&kqd->kqd_lock, "kqdom_lock", NULL, MTX_DEF | MTX_DUPOK);
} }
@ -2256,42 +2282,17 @@ kqdom_is_leaf(struct kqdom *kqd)
static void static void
kqdom_insert(struct kqdom *kqd, struct kevq *kevq) kqdom_insert(struct kqdom *kqd, struct kevq *kevq)
{ {
int oldcap, newcap; int err;
void **expand;
KQD_OWNED(kqd); KQD_OWNED(kqd);
KASSERT(kqdom_is_leaf(kqd), ("inserting into a non-leaf kqdom")); KASSERT(kqdom_is_leaf(kqd), ("inserting into a non-leaf kqdom"));
CTR2(KTR_KQ, "kqdom_insert: kevq: %p kqdom %d", kevq, kqd->id); CTR2(KTR_KQ, "kqdom_insert: kevq: %p kqdom %d", kevq, kqd->id);
/* expand the kqdom if needed */ err = veclist_insert_tail(&kqd->kqd_kevqs, kevq);
retry: /* XXX: this is a hack, need to handle ENOMEM */
if (veclist_need_exp(&kqd->kqd_kevqs)) { if (err) {
CTR2(KTR_KQ, "kqdom_insert: expanding... kqd %d for kevq %p\n", kqd->id, kevq); panic("kqdom veclist failed to insert tail");
oldcap = veclist_cap(&kqd->kqd_kevqs);
KQD_UNLOCK(kqd);
newcap = oldcap + KQDOM_EXTENT;
expand = malloc(sizeof(struct kqdom *) * newcap, M_KQUEUE, M_WAITOK | M_ZERO);
KQD_LOCK(kqd);
/* recheck if we need expansion, make sure old capacity didn't change */
if (veclist_cap(&kqd->kqd_kevqs) == oldcap) {
expand = veclist_expand(&kqd->kqd_kevqs, expand, newcap);
if (expand != NULL) {
free(expand, M_KQUEUE);
}
} else {
/* some threads made changes while we were allocating memory, retry */
free(expand, M_KQUEUE);
goto retry;
}
} }
KQD_OWNED(kqd);
KASSERT(!veclist_need_exp(&kqd->kqd_kevqs), ("failed to expand kqdom"));
veclist_insert_tail(&kqd->kqd_kevqs, kevq);
if (veclist_size(&kqd->kqd_kevqs) == 1) { if (veclist_size(&kqd->kqd_kevqs) == 1) {
kqdom_update_parents(kqd, KQDIR_ACTIVE); kqdom_update_parents(kqd, KQDIR_ACTIVE);
} }
@ -2308,40 +2309,29 @@ kqdom_remove(struct kqdom *kqd, struct kevq *kevq)
veclist_remove(&kqd->kqd_kevqs, kevq); veclist_remove(&kqd->kqd_kevqs, kevq);
if (veclist_size(&kqd->kqd_kevqs) == 0) { if (veclist_size(&kqd->kqd_kevqs) == 0) {
kqdom_update_parents(kqd, KQDIR_INACTIVE); kqdom_update_parents(kqd, KQDIR_INACTIVE);
} }
} }
static void static void
kqdom_destroy(struct kqdom *root) kqdom_destroy(struct kqdom *root)
{ {
void **buf;
for(int i = 0; i < veclist_size(&root->children); i++) { for(int i = 0; i < veclist_size(&root->children); i++) {
kqdom_destroy(veclist_at(&root->children, i)); kqdom_destroy(veclist_at(&root->children, i));
} }
CTR2(KTR_KQ, "kqdom_destroy: destroyed kqdom %d with %d child kqdoms", root->id, veclist_size(&root->children)); CTR2(KTR_KQ, "kqdom_destroy: destroyed kqdom %d with %d child kqdoms", root->id, veclist_size(&root->children));
buf = veclist_buf(&root->kqd_kevqs); veclist_destroy(&root->kqd_kevqs);
if (buf != NULL) { veclist_destroy(&root->kqd_activelist);
free(buf, M_KQUEUE); veclist_destroy(&root->children);
}
buf = veclist_buf(&root->kqd_activelist);
if (buf != NULL) {
free(buf, M_KQUEUE);
}
buf = veclist_buf(&root->children);
if (buf != NULL) {
free(buf, M_KQUEUE);
}
mtx_destroy(&root->kqd_lock); mtx_destroy(&root->kqd_lock);
free(root, M_KQUEUE); free(root, M_KQUEUE);
} }
/* Expensive if called *frequently* /* Expensive if called *frequently*
* *
* Updates a kqdom based on the currently active children * Updates a kqdom based on the currently active children
@ -2349,6 +2339,7 @@ kqdom_destroy(struct kqdom *root)
static void static void
kqdom_update_parents(struct kqdom *kqd, int direction) kqdom_update_parents(struct kqdom *kqd, int direction)
{ {
int err;
int cont; int cont;
struct kqdom *child; struct kqdom *child;
@ -2380,9 +2371,12 @@ kqdom_update_parents(struct kqdom *kqd, int direction)
/* kqd->kqd_activelist are preallocated with maximum children for non-leaf nodes /* kqd->kqd_activelist are preallocated with maximum children for non-leaf nodes
* Should NEVER fail * Should NEVER fail
*/ */
err = veclist_insert_tail(&kqd->kqd_activelist, child);
KASSERT(!veclist_need_exp(&kqd->kqd_activelist), ("kqdom requires expansion")); /* NOT a hack! */
veclist_insert_tail(&kqd->kqd_activelist, child); if (err) {
panic("kqdom activelist requires expansion");
}
/* KASSERT(!err, ("kqdom activelist requires expansion")); */
/* didn't change from 0 to 1, stop */ /* didn't change from 0 to 1, stop */
if (veclist_size(&kqd->kqd_activelist) != 1) { if (veclist_size(&kqd->kqd_activelist) != 1) {
@ -2396,6 +2390,9 @@ kqdom_update_parents(struct kqdom *kqd, int direction)
static void static void
kqdom_update_lat(struct kqdom *leaf, unsigned long avg) kqdom_update_lat(struct kqdom *leaf, unsigned long avg)
{ {
/* We don't need this function for now */
KASSERT(0, ("kqdom_update_lat called"));
while(leaf != NULL) { while(leaf != NULL) {
if (leaf->avg_lat != 0) { if (leaf->avg_lat != 0) {
// bit rot race here? // bit rot race here?
@ -2410,11 +2407,12 @@ kqdom_update_lat(struct kqdom *leaf, unsigned long avg)
} }
} }
/* Mirror the cpu_group structure */ /* Mirror the cpu_group structure */
static void static void
kqdom_build_internal(struct kqdom *kqd_cur, struct cpu_group *cg_cur, int *kqd_id) kqdom_build_internal(struct kqdom *kqd_cur, struct cpu_group *cg_cur, int *kqd_id)
{ {
void **expand; int err;
struct kqdom *child; struct kqdom *child;
int cg_numchild = cg_cur->cg_children; int cg_numchild = cg_cur->cg_children;
CTR4(KTR_KQ, "kqdom_build_internal: processing cpu_group with %d child groups, %d CPUs, shared cache level %d, kqd_id %d", cg_numchild, cg_cur->cg_count, cg_cur->cg_level, *kqd_id); CTR4(KTR_KQ, "kqdom_build_internal: processing cpu_group with %d child groups, %d CPUs, shared cache level %d, kqd_id %d", cg_numchild, cg_cur->cg_count, cg_cur->cg_level, *kqd_id);
@ -2426,11 +2424,16 @@ kqdom_build_internal(struct kqdom *kqd_cur, struct cpu_group *cg_cur, int *kqd_i
/* allocate children and active lists */ /* allocate children and active lists */
if (cg_numchild > 0) { if (cg_numchild > 0) {
expand = malloc(sizeof(struct kqdom *) * cg_numchild, M_KQUEUE, M_WAITOK | M_ZERO); err = veclist_expand(&kqd_cur->children, cg_numchild);
veclist_expand(&kqd_cur->children, expand, cg_numchild); /* XXX: These are hacks */
if (err) {
expand = malloc(sizeof(struct kqdom *) * cg_numchild, M_KQUEUE, M_WAITOK | M_ZERO); panic("kqdom build veclist expand");
veclist_expand(&kqd_cur->kqd_activelist, expand, cg_numchild); }
err = veclist_expand(&kqd_cur->kqd_activelist, cg_numchild);
if (err) {
panic("kqdom build veclist expand");
}
} }
for (int i = 0; i < cg_numchild; i++) { for (int i = 0; i < cg_numchild; i++) {
@ -2438,8 +2441,14 @@ kqdom_build_internal(struct kqdom *kqd_cur, struct cpu_group *cg_cur, int *kqd_i
kqdom_init(child); kqdom_init(child);
child->parent = kqd_cur; child->parent = kqd_cur;
err = veclist_insert_tail(&kqd_cur->children, child);
/* Not a hack! */
if (err) {
panic("kqdom build insert tail failed");
}
/* KASSERT(!err, ("kqdom build insert tail failed")); */
veclist_insert_tail(&kqd_cur->children, child);
kqdom_build_internal(child, &cg_cur->cg_child[i], kqd_id); kqdom_build_internal(child, &cg_cur->cg_child[i], kqd_id);
} }
} }
@ -2713,7 +2722,7 @@ kqueue_scan(struct kevq *kevq, int maxevents, struct kevent_copyops *k_ops,
kevp = keva; kevp = keva;
CTR3(KTR_KQ, "kqueue_scan: td %d on kevq %p has %d events", td->td_tid, kevq, kevq->kn_count); CTR3(KTR_KQ, "kqueue_scan: td %d on kevq %p has %d events", td->td_tid, kevq, kevq->kn_count);
if ((kq->kq_flags & KQ_FLAG_MULTI) != 0 && (kq->kq_sched_flags & KQ_SCHED_WORK_STEALING) != 0 && kevq->kn_count == 0) { if ((kq->kq_sched_flags & KQ_SCHED_WS) && kevq->kn_count == 0) {
/* try work stealing */ /* try work stealing */
kevq_worksteal(kevq); kevq_worksteal(kevq);
} }
@ -2905,7 +2914,7 @@ kqueue_scan(struct kevq *kevq, int maxevents, struct kevent_copyops *k_ops,
} }
if (nkev != 0 && need_track_latency(kq)) { if (nkev != 0 && (kq->kq_sched_flags & KQ_SCHED_BOT)) {
/* book keep the statistics */ /* book keep the statistics */
getnanouptime(&kevq->kevq_last_kev); getnanouptime(&kevq->kevq_last_kev);
kevq->kevq_last_nkev = nkev; kevq->kevq_last_nkev = nkev;
@ -3148,25 +3157,35 @@ kevq_drain(struct kevq *kevq, struct thread *td)
if ((kq->kq_flags & KQ_FLAG_MULTI) == KQ_FLAG_MULTI) { if ((kq->kq_flags & KQ_FLAG_MULTI) == KQ_FLAG_MULTI) {
KQ_LOCK(kq); KQ_LOCK(kq);
KEVQ_TH_LOCK(kevq->kevq_th); KEVQ_TH_LOCK(kevq->kevq_th);
KQD_LOCK(kqd);
// detach from kevq_th /* detach from kevq_th */
LIST_REMOVE(kevq, kevq_th_tqe); LIST_REMOVE(kevq, kevq_th_tqe);
kevq_list = &kevq->kevq_th->kevq_hash[KEVQ_HASH((unsigned long long)kq, kevq->kevq_th->kevq_hashmask)]; kevq_list = &kevq->kevq_th->kevq_hash[KEVQ_HASH((unsigned long long)kq, kevq->kevq_th->kevq_hashmask)];
LIST_REMOVE(kevq, kevq_th_e); LIST_REMOVE(kevq, kevq_th_e);
// detach from kqdom /* detach from kqueue */
kqdom_remove(kqd, kevq);
// detach from kqueue
if (kq->kq_ckevq == kevq) { if (kq->kq_ckevq == kevq) {
kq->kq_ckevq = LIST_NEXT(kevq, kq_e); kq->kq_ckevq = LIST_NEXT(kevq, kq_e);
} }
LIST_REMOVE(kevq, kq_e); LIST_REMOVE(kevq, kq_e);
KQD_UNLOCK(kqd);
/* detach from sched structs */
if (kq->kq_sched_flags & KQDOM_FLAGS) {
KQD_LOCK(kqd);
kqdom_remove(kqd, kevq);
KQD_UNLOCK(kqd);
}
if (kq->kq_sched_flags & KQLST_FLAGS) {
rw_wlock(&kq->sched_bot_lk);
veclist_remove(&kq->sched_bot_lst, kevq);
rw_wunlock(&kq->sched_bot_lk);
}
KEVQ_TH_UNLOCK(kevq->kevq_th); KEVQ_TH_UNLOCK(kevq->kevq_th);
KQ_UNLOCK(kq); KQ_UNLOCK(kq);
} else { } else {
KQ_LOCK(kq); KQ_LOCK(kq);
kq->kq_kevq = NULL; kq->kq_kevq = NULL;
@ -3242,7 +3261,6 @@ kqueue_drain(struct kqueue *kq, struct kevq *kevq, struct thread *td)
} }
} }
// destroy kqdoms and kevqs
if ((kq->kq_flags & KQ_FLAG_MULTI) == KQ_FLAG_MULTI) { if ((kq->kq_flags & KQ_FLAG_MULTI) == KQ_FLAG_MULTI) {
while((kevq = LIST_FIRST(&kq->kq_kevqlist)) != NULL) { while((kevq = LIST_FIRST(&kq->kq_kevqlist)) != NULL) {
KQ_UNLOCK(kq); KQ_UNLOCK(kq);
@ -3252,7 +3270,11 @@ kqueue_drain(struct kqueue *kq, struct kevq *kevq, struct thread *td)
} }
KQ_OWNED(kq); KQ_OWNED(kq);
kqdom_destroy(kq->kq_kqd);
/* destroy sched structs */
if (kq->kq_sched_flags & KQDOM_FLAGS) {
kqdom_destroy(kq->kq_kqd);
}
} else { } else {
KQ_UNLOCK(kq); KQ_UNLOCK(kq);
// we already have a reference for single threaded mode // we already have a reference for single threaded mode
@ -3286,6 +3308,10 @@ kqueue_destroy(struct kqueue *kq)
knlist_destroy(&kq->kq_sel.si_note); knlist_destroy(&kq->kq_sel.si_note);
mtx_destroy(&kq->kq_lock); mtx_destroy(&kq->kq_lock);
/* XXX: move these guys to be destroyed earlier, like kqdom */
rw_destroy(&kq->sched_bot_lk);
veclist_destroy(&kq->sched_bot_lst);
if (kq->kq_knhash != NULL) if (kq->kq_knhash != NULL)
free(kq->kq_knhash, M_KQUEUE); free(kq->kq_knhash, M_KQUEUE);
if (kq->kq_knlist != NULL) if (kq->kq_knlist != NULL)
@ -3892,7 +3918,26 @@ knote_drop_detached(struct knote *kn, struct thread *td)
knote_free(kn); knote_free(kn);
} }
/* A refcnt to kevq will be held upon return */ static struct kevq *
kqbot_random_kevq_locked(struct kqueue *kq)
{
int sz;
struct kevq *kevq = NULL;
u_long rand = random();
rw_rlock(&kq->sched_bot_lk);
sz = veclist_size(&kq->sched_bot_lst);
if (sz > 0) {
kevq = veclist_at(&kq->sched_bot_lst, rand % veclist_size(&kq->sched_bot_lst));
}
KEVQ_LOCK(kevq);
if (!KEVQ_AVAIL(kevq)) {
KEVQ_UNLOCK(kevq);
kevq = NULL;
}
rw_runlock(&kq->sched_bot_lk);
return kevq;
}
static struct kevq * static struct kevq *
kqdom_random_kevq_locked(struct kqdom *kqd) kqdom_random_kevq_locked(struct kqdom *kqd)
{ {
@ -3985,11 +4030,9 @@ knote_next_kevq(struct knote *kn)
return next_kevq; return next_kevq;
} }
if ((kq->kq_sched_flags & KQ_SCHED_BEST_OF_N) != 0) { if (kq->kq_sched_flags & KQ_SCHED_BOT) {
kqd = kq->kq_kqd; for(int i = 0; i < kq_sched_bot_count; i++) {
struct kevq *sel_kevq = kqbot_random_kevq_locked(kq);
for(int i = 0; i < kq_sched_bon_count; i++) {
struct kevq *sel_kevq = kqdom_random_kevq_locked(kqd);
if (sel_kevq != NULL) { if (sel_kevq != NULL) {
int ret; int ret;
@ -4017,7 +4060,7 @@ knote_next_kevq(struct knote *kn)
} }
} }
CTR3(KTR_KQ, "knote_next_kevq: [BON] current best kevq %p, avg time: %d, wait time: %d", next_kevq, next_kevq->kevq_avg_lat, next_kevq->kevq_avg_lat * next_kevq->kn_count); CTR3(KTR_KQ, "knote_next_kevq: [BOT] current best kevq %p, avg time: %d, wait time: %d", next_kevq, next_kevq->kevq_avg_lat, next_kevq->kevq_avg_lat * next_kevq->kn_count);
} }
} }
@ -4029,11 +4072,11 @@ knote_next_kevq(struct knote *kn)
*/ */
} }
CTR2(KTR_KQ, "knote_next_kevq: [BON] next kevq %p for kn %p", next_kevq, kn); CTR2(KTR_KQ, "knote_next_kevq: [BOT] next kevq %p for kn %p", next_kevq, kn);
} }
if ((next_kevq == NULL) && (kq->kq_sched_flags & KQ_SCHED_QUEUE) != 0) { if (next_kevq == NULL && (kq->kq_sched_flags & (KQ_SCHED_QUEUE | KQ_SCHED_CPU)) != 0 ) {
if((kq->kq_sched_flags & KQ_SCHED_QUEUE_CPU) != 0) { if((kq->kq_sched_flags & KQ_SCHED_CPU) != 0) {
kqd = kqdom_find(kq->kq_kqd, PCPU_GET(cpuid)); kqd = kqdom_find(kq->kq_kqd, PCPU_GET(cpuid));
} else { } else {
if (kn->kn_kqd == NULL) { if (kn->kn_kqd == NULL) {
@ -4051,7 +4094,7 @@ knote_next_kevq(struct knote *kn)
CTR2(KTR_KQ, "knote_next_kevq: [QUEUE] next kevq %p for kn %p", next_kevq, kn); CTR2(KTR_KQ, "knote_next_kevq: [QUEUE] next kevq %p for kn %p", next_kevq, kn);
} }
// fall-back round-robbin /* fall-back round-robbin */
if (next_kevq == NULL) { if (next_kevq == NULL) {
KQ_LOCK(kq); KQ_LOCK(kq);

View File

@ -399,11 +399,9 @@ __END_DECLS
/* /*
* KQ scheduler flags * KQ scheduler flags
*/ */
#define KQ_SCHED_WORK_STEALING 0x01 #define KQ_SCHED_WS 0x01
#define KQ_SCHED_QUEUE 0x02 /* make kq affinitize the knote depending on the first cpu it's scheduled to */ #define KQ_SCHED_QUEUE 0x02 /* make kq affinitize the knote depending on the first cpu it's scheduled to */
/* QUEUE_CPU = 0x04 | QUEUE */ #define KQ_SCHED_CPU 0x04 /* make kq affinitize the knote depending on the runtime cpu it's scheduled to */
#define KQ_SCHED_QUEUE_CPU 0x06 /* make kq affinitize the knote depending on the runtime cpu it's scheduled to */ #define KQ_SCHED_BOT 0x08
#define KQ_SCHED_BEST_OF_N 0x08
#endif /* !_SYS_EVENT_H_ */ #endif /* !_SYS_EVENT_H_ */

View File

@ -38,6 +38,9 @@
#include <sys/_task.h> #include <sys/_task.h>
#include <sys/veclist.h> #include <sys/veclist.h>
#include <sys/stdint.h> #include <sys/stdint.h>
#include <sys/param.h>
#include <sys/lock.h>
#include <sys/rwlock.h>
#define KQ_NEVENTS 8 /* minimize copy{in,out} calls */ #define KQ_NEVENTS 8 /* minimize copy{in,out} calls */
#define KQEXTENT 256 /* linear growth by this amount */ #define KQEXTENT 256 /* linear growth by this amount */
@ -108,12 +111,17 @@ struct kqueue {
struct kevq *kq_kevq; /* the kevq for kq, always created, act as buffer queue in multithreaded mode */ struct kevq *kq_kevq; /* the kevq for kq, always created, act as buffer queue in multithreaded mode */
struct task kq_task; struct task kq_task;
struct ucred *kq_cred; struct ucred *kq_cred;
struct kevqlist kq_kevqlist; /* list of kevqs */
/* scheduling stuff */ /* scheduling stuff */
struct kevqlist kq_kevqlist; /* list of kevqs for fall-back round robbin */
struct kqdom *kq_kqd; /* root domain */
struct kevq *kq_ckevq; /* current kevq for multithreaded kqueue, used for round robbin */
int kq_sched_flags; /* Scheduler flag for the KQ */ int kq_sched_flags; /* Scheduler flag for the KQ */
/* Round robbin (only as a fall back) */
struct kevq *kq_ckevq; /* current kevq for multithreaded kqueue, used for round robbin */
/* Best of two */
struct rwlock sched_bot_lk;
struct veclist sched_bot_lst;
/* CPU queue */
struct kqdom *kq_kqd; /* root domain */
}; };
#endif /* !_SYS_EVENTVAR_H_ */ #endif /* !_SYS_EVENTVAR_H_ */

View File

@ -39,19 +39,45 @@
#include <sys/param.h> #include <sys/param.h>
#include <sys/systm.h> #include <sys/systm.h>
#include <sys/types.h> #include <sys/types.h>
#include <sys/malloc.h>
#include <sys/errno.h>
struct veclist { struct veclist {
size_t cap; size_t cap;
size_t size; size_t size;
struct malloc_type *mtype;
void **buf; void **buf;
}; };
static inline void #define VECLIST_EXPAND_FACTOR (2)
veclist_init(struct veclist *lst, void **buf, int cap) #define VECLIST_INIT_SZ (8)
/* returns old buffer */
static inline int
veclist_expand(struct veclist *lst, size_t new_cap)
{ {
void **new_buf;
KASSERT(new_cap > lst->cap, ("veclist expand"));
new_buf = (void **)malloc(new_cap * sizeof(void*), lst->mtype, M_NOWAIT);
if (new_buf == NULL) {
return ENOMEM;
}
memcpy(new_buf, lst->buf, lst->size * sizeof(void*));
free(lst->buf, lst->mtype);
lst->buf = new_buf;
lst->cap = new_cap;
return 0;
}
static inline int
veclist_init(struct veclist *lst, size_t init_cap, struct malloc_type *mtype)
{
lst->cap = 0;
lst->buf = NULL;
lst->size = 0; lst->size = 0;
lst->buf = buf; lst->mtype = mtype;
lst->cap = cap;
return init_cap ? veclist_expand(lst, init_cap) : 0;
} }
static inline void * static inline void *
@ -65,6 +91,12 @@ veclist_remove_at(struct veclist *lst, size_t idx)
return ret; return ret;
} }
static inline void
veclist_destroy(struct veclist *lst)
{
free(lst->buf, lst->mtype);
}
static inline void * static inline void *
veclist_remove(struct veclist *lst, void *ele) veclist_remove(struct veclist *lst, void *ele)
{ {
@ -80,22 +112,31 @@ veclist_remove(struct veclist *lst, void *ele)
} }
/* inserts an element so that the index of the element after insertion is idx */ /* inserts an element so that the index of the element after insertion is idx */
static inline void static inline int
veclist_insert_at(struct veclist *lst, void *ele, size_t idx) veclist_insert_at(struct veclist *lst, void *ele, size_t idx)
{ {
KASSERT((lst->cap > lst->size) && (lst->size >= idx), ("veclist overflow")); int err;
KASSERT(idx <= lst->size, ("veclist idx overflow"));
if (lst->size == lst->cap) {
/* needs expansion */
err = veclist_expand(lst, lst->cap == 0 ? VECLIST_INIT_SZ : lst->cap * VECLIST_EXPAND_FACTOR);
if (err) {
return err;
}
}
memmove(&lst->buf[idx+1], &lst->buf[idx], (lst->size - idx) * sizeof(void*)); memmove(&lst->buf[idx+1], &lst->buf[idx], (lst->size - idx) * sizeof(void*));
lst->size++; lst->size++;
lst->buf[idx] = ele; lst->buf[idx] = ele;
return 0;
} }
static inline void static inline int
veclist_insert_tail(struct veclist *lst, void *ele) veclist_insert_tail(struct veclist *lst, void *ele)
{ {
return veclist_insert_at(lst, ele, lst->size); return veclist_insert_at(lst, ele, lst->size);
} }
static inline void static inline int
veclist_insert_head(struct veclist *lst, void *ele) veclist_insert_head(struct veclist *lst, void *ele)
{ {
return veclist_insert_at(lst, ele, 0); return veclist_insert_at(lst, ele, 0);
@ -113,43 +154,12 @@ veclist_remove_tail(struct veclist *lst)
return veclist_remove_at(lst, lst->size - 1); return veclist_remove_at(lst, lst->size - 1);
} }
/* returns old buffer */
static inline void**
veclist_expand(struct veclist *lst, void **new_buf, size_t new_cap)
{
void **ret;
KASSERT(new_cap > lst->cap, ("veclist expand"));
memcpy(new_buf, lst->buf, lst->size * sizeof(void*));
ret = lst->buf;
lst->buf = new_buf;
lst->cap = new_cap;
return ret;
}
static inline int
veclist_need_exp(struct veclist *lst)
{
return (lst->size == lst->cap);
}
static inline int
veclist_cap(struct veclist *lst)
{
return lst->cap;
}
static inline int static inline int
veclist_size(struct veclist *lst) veclist_size(struct veclist *lst)
{ {
return lst->size; return lst->size;
} }
static inline void *
veclist_buf(struct veclist *lst)
{
return lst->buf;
}
static inline void * static inline void *
veclist_at(struct veclist *lst, size_t idx) veclist_at(struct veclist *lst, size_t idx)
{ {

View File

@ -615,13 +615,17 @@ test_socket_brutal_worker(void* args)
} }
static void static void
test_socket_brutal() test_socket_brutal(char* name)
{ {
char id[256];
struct kevent kev; struct kevent kev;
const char *test_id = "[Multi]kevent(brutal)"; const char *test_id = "[Multi]kevent(brutal) - ";
test_begin(test_id); strcpy(id, test_id);
strcat(id, name);
test_begin(id);
for (int i = 0; i < SOCK_BRUTE_CNT; i++) { for (int i = 0; i < SOCK_BRUTE_CNT; i++) {
@ -719,7 +723,7 @@ test_evfilt_read_m()
} }
test_socket_read(0); test_socket_read(0);
test_socket_brutal(); test_socket_brutal("round robbin");
close(g_kqfd); close(g_kqfd);
@ -732,11 +736,24 @@ test_evfilt_read_m()
} }
//test_socket_queue(); //test_socket_queue();
test_socket_brutal(); test_socket_brutal("queue");
close(g_kqfd); close(g_kqfd);
flags = KQ_SCHED_WORK_STEALING;
flags = KQ_SCHED_CPU;
g_kqfd = kqueue();
error = ioctl(g_kqfd, FKQMULTI, &flags);
if (error == -1) {
err(1, "ioctl");
}
test_socket_brutal("cpu");
close(g_kqfd);
flags = KQ_SCHED_WS;
g_kqfd = kqueue(); g_kqfd = kqueue();
error = ioctl(g_kqfd, FKQMULTI, &flags); error = ioctl(g_kqfd, FKQMULTI, &flags);
if (error == -1) { if (error == -1) {
@ -744,17 +761,17 @@ test_evfilt_read_m()
} }
test_socket_ws(); test_socket_ws();
test_socket_brutal(); test_socket_brutal("work stealing");
close(g_kqfd); close(g_kqfd);
flags = KQ_SCHED_BEST_OF_N; flags = KQ_SCHED_BOT;
g_kqfd = kqueue(); g_kqfd = kqueue();
error = ioctl(g_kqfd, FKQMULTI, &flags); error = ioctl(g_kqfd, FKQMULTI, &flags);
if (error == -1) { if (error == -1) {
err(1, "ioctl"); err(1, "ioctl");
} }
test_socket_brutal(); test_socket_brutal("best of two");
test_socket_read(1); test_socket_read(1);