From 8e73e731977f7c387245ac2f87893478e10d443f Mon Sep 17 00:00:00 2001 From: Charlie Root Date: Fri, 23 Aug 2019 15:30:30 -0400 Subject: [PATCH] improve best of 2 (still slow) --- sys/kern/kern_event.c | 541 ++++++++++++++-------------- sys/sys/eventvar.h | 24 +- sys/sys/veclist.h | 161 +++++++++ tests/sys/kqueue/libkqueue/common.h | 1 + tests/sys/kqueue/libkqueue/main.c | 22 ++ tests/sys/kqueue/libkqueue/read_m.c | 88 ++--- 6 files changed, 500 insertions(+), 337 deletions(-) create mode 100644 sys/sys/veclist.h diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c index ecba10432536..88b4d0303ef6 100644 --- a/sys/kern/kern_event.c +++ b/sys/kern/kern_event.c @@ -73,6 +73,9 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include +#include +#include #ifdef KTRACE #include #endif @@ -135,6 +138,18 @@ extern struct cpu_group *cpu_top; } \ } while(0) +static inline int +need_track_latency(struct kqueue *kq) +{ + return (kq->kq_flags & KQ_FLAG_MULTI) != 0 && (kq->kq_sched_flags & KQ_SCHED_BEST_OF_N) != 0; +} + +static inline uint64_t +timespec_to_ns(struct timespec *spec) +{ + return spec->tv_nsec + spec->tv_sec * 1000000; +} + static struct kevq * kevqlist_find(struct kevqlist *kevq_list, struct kqueue *kq); static void kevq_thred_init(struct kevq_thred *kevq_th); static void kevq_thred_destroy(struct kevq_thred *kevq_th); @@ -144,11 +159,9 @@ static void kevq_release(struct kevq* kevq, int locked); static void kevq_destroy(struct kevq *kevq); static int kevq_acquire(struct kevq *kevq, int locked); static void kevq_worksteal(struct kevq *kevq); -void kevq_drain(struct kevq *kevq); +void kevq_drain(struct kevq *kevq, struct thread *td); static int kqueue_acquire_kevq(struct file *fp, struct thread *td, struct kqueue **kqp, struct kevq **kevq); -static void knote_xinit(struct knote *kn); - static int kevent_copyout(void *arg, struct kevent *kevp, int count); static int kevent_copyin(void *arg, struct kevent *kevp, int count); static int kqueue_register(struct kqueue *kq, struct kevq *kevq, @@ -213,11 +226,11 @@ static void knote_sched(struct knote *kn); static void kqdom_init(struct kqdom *kqd); static void kqdom_update_lat(struct kqdom *leaf, unsigned long avg); -static void kqdom_update_active(struct kqdom *leaf, int change); +static void kqdom_update_parents(struct kqdom *leaf, int direction); static void kqdom_insert(struct kqdom *kqd, struct kevq *kevq); static void kqdom_remove(struct kqdom *kqd, struct kevq *kevq); static void kqdom_destroy(struct kqdom *root); -static struct kevq * kqdom_random_kevq_locked(struct kqdom *kqd, struct kevq *last_kevq); +static struct kevq * kqdom_random_kevq_locked(struct kqdom *kqd); static void kqdom_build_internal(struct kqdom *kqd_cur, struct cpu_group *cg_cur, int *kqd_id); static struct kqdom * kqdom_build(void); static struct kqdom * kqdom_find(struct kqdom *root, int cpuid); @@ -249,19 +262,6 @@ SYSCTL_INT(_kern, OID_AUTO, kq_sched_bon_count, CTLFLAG_RWTUN, &kq_sched_bon_cou static int kq_sched_ws_count = 1; SYSCTL_INT(_kern, OID_AUTO, kq_sched_ws_count, CTLFLAG_RWTUN, &kq_sched_ws_count, 0, "the number of kevqs to steal each time"); -// hacky fast random generator -static unsigned int g_seed = 0x1234; -// Used to seed the generator. -static void kqueue_fsrand(int seed) { - g_seed = seed; -} -// Compute a pseudorandom integer. -// Output value in range [0, 32767] -static int kqueue_frand(void) { - g_seed = (214013 * g_seed + 2531011); - return (g_seed>>16) & 0x7FFF; -} - static struct filterops file_filtops = { .f_isfd = 1, .f_attach = filt_fileattach, @@ -420,8 +420,8 @@ knote_enter_flux_ul(struct knote *kn) static void knote_enter_flux(struct knote *kn) { - CTR2(KTR_KQ, "knote_enter_flux: %p flux: %d", kn, kn->kn_influx); - KN_FLUX_OWNED(kn); + /* CTR2(KTR_KQ, "knote_enter_flux: %p flux: %d", kn, kn->kn_influx); */ + KN_FLUX_OWNED(kn); MPASS(kn->kn_influx < INT_MAX); kn->kn_influx++; } @@ -441,13 +441,13 @@ knote_leave_flux_ul(struct knote *kn) static bool knote_leave_flux(struct knote *kn) { - CTR2(KTR_KQ, "knote_leave_flux: %p flux: %d", kn, kn->kn_influx); + /* CTR2(KTR_KQ, "knote_leave_flux: %p flux: %d", kn, kn->kn_influx); */ KN_FLUX_OWNED(kn); MPASS(kn->kn_influx > 0); kn->kn_influx--; - return (kn->kn_influx == 0); - } + return (kn->kn_influx == 0); +} #define KNL_ASSERT_LOCK(knl, islocked) do { \ if (islocked) \ @@ -1415,6 +1415,7 @@ kevqlist_find(struct kevqlist *kevq_list, struct kqueue *kq) return kevq_found; } + static int kqueue_kevent(struct kqueue *kq, struct kevq *kevq, struct thread *td, int nchanges, int nevents, struct kevent_copyops *k_ops, const struct timespec *timeout) @@ -1423,7 +1424,7 @@ kqueue_kevent(struct kqueue *kq, struct kevq *kevq, struct thread *td, int nchan struct kevent *kevp, *changes; struct timespec cur_ts; int i, n, nerrors, error; - unsigned long avg; + uint64_t avg; if ((kq->kq_flags & KQ_FLAG_MULTI) == 0 && (kevq->kevq_state & KEVQ_RDY) == 0) { /* Mark the global kevq as ready for single threaded mode to close the window between @@ -1433,31 +1434,36 @@ kqueue_kevent(struct kqueue *kq, struct kevq *kevq, struct thread *td, int nchan KEVQ_UNLOCK(kevq); } - KEVQ_LOCK(kevq); - /* prob don't need the lock here as these are only accessible by one thread */ - if (kevq->kevq_last_nkev != 0) + if (need_track_latency(kq)) { - /* make sure we actually processed events last time */ - getnanouptime(&cur_ts); - timespecsub(&cur_ts, &kevq->kevq_last_kev, &cur_ts); - - /* divide by the number of events processed */ - avg = (cur_ts.tv_sec * 1000000 + cur_ts.tv_nsec / 100) / kevq->kevq_last_nkev; + /* only need to do track the average latency for BON */ + KEVQ_LOCK(kevq); - if (kevq->kevq_avg_lat != 0) { - kevq->kevq_avg_lat = CALC_OVERTIME_AVG(kevq->kevq_avg_lat, avg); - } else { - kevq->kevq_avg_lat = avg; + /* prob don't need the lock here as these are only accessible by one thread */ + if (kevq->kevq_last_nkev != 0) + { + /* make sure we actually processed events last time */ + getnanouptime(&cur_ts); + timespecsub(&cur_ts, &kevq->kevq_last_kev, &cur_ts); + + /* divide by the number of events processed */ + avg = timespec_to_ns(&cur_ts) / kevq->kevq_last_nkev; + + if (kevq->kevq_avg_lat != 0) { + kevq->kevq_avg_lat = CALC_OVERTIME_AVG(kevq->kevq_avg_lat, avg); + } else { + kevq->kevq_avg_lat = avg; + } + + CTR3(KTR_KQ, "kevent: td %d spent %ld us per event on %d events", td->td_tid, avg, kevq->kevq_last_nkev); + /* clear parameters */ + timespecclear(&kevq->kevq_last_kev); + kevq->kevq_last_nkev = 0; + + kqdom_update_lat(kevq->kevq_kqd, avg); } - - CTR3(KTR_KQ, "kevent: td %d spent %ld us per event on %d events", td->td_tid, avg, kevq->kevq_last_nkev); - /* clear parameters */ - timespecclear(&kevq->kevq_last_kev); - kevq->kevq_last_nkev = 0; - - kqdom_update_lat(kevq->kevq_kqd, avg); + KEVQ_UNLOCK(kevq); } - KEVQ_UNLOCK(kevq); nerrors = 0; while (nchanges > 0) { @@ -1777,7 +1783,6 @@ kqueue_register(struct kqueue *kq, struct kevq *kevq, struct kevent *kev, struct error = ENOMEM; goto done; } - knote_xinit(kn); kn->kn_kevq = kevq; // this is set later depending on the scheduled CPU kn->kn_kqd = NULL; @@ -1910,6 +1915,7 @@ kevq_thred_init(struct kevq_thred *kevq_th) { static void kevq_thred_destroy(struct kevq_thred *kevq_th) { + mtx_destroy(&kevq_th->lock); free(kevq_th->kevq_hash, M_KQUEUE); free(kevq_th, M_KQUEUE); CTR1(KTR_KQ, "kevq_thred_destroy: freed kevq_th %p", kevq_th); @@ -1926,7 +1932,7 @@ kevq_thred_drain(struct kevq_thred *kevq_th, struct thread* td) { if (kevq_acquire(kevq, 0) == 0) { CTR2(KTR_KQ, "kevq_thred_drain: draining kevq %p on kevq_th %p", kevq, kevq_th); KEVQ_TH_UNLOCK(kevq_th); - kevq_drain(kevq); + kevq_drain(kevq, td); KEVQ_TH_LOCK(kevq_th); } } @@ -1949,7 +1955,7 @@ kevq_release(struct kevq* kevq, int locked) KEVQ_OWNED(kevq); else KEVQ_LOCK(kevq); - CTR2(KTR_KQ, "releasing kevq %p (refcnt = %d)", kevq, kevq->kevq_refcnt); + /* CTR2(KTR_KQ, "releasing kevq %p (refcnt = %d)", kevq, kevq->kevq_refcnt); */ kevq->kevq_refcnt--; if (kevq->kevq_refcnt == 1) wakeup(&kevq->kevq_refcnt); @@ -1967,7 +1973,7 @@ kevq_acquire(struct kevq *kevq, int locked) KEVQ_LOCK(kevq); } error = 0; - CTR2(KTR_KQ, "referencing kevq %p (refcnt = %d)", kevq, kevq->kevq_refcnt); + /* CTR2(KTR_KQ, "referencing kevq %p (refcnt = %d)", kevq, kevq->kevq_refcnt); */ if ((kevq->kevq_state & KEVQ_CLOSING) == KEVQ_CLOSING) { error = EINVAL; } else { @@ -2046,11 +2052,11 @@ kqueue_obtain_kevq(struct kqueue *kq, struct thread *td, struct kevq **kevqp) CTR4(KTR_KQ, "kqueue_ensure_kevq(M): allocated kevq %p for thread %d (oncpu = %d), kqdom %d", alloc_kevq, td->td_tid, td->td_oncpu, kqd->id); KQ_LOCK(kq); - KQD_LOCK(kqd); KEVQ_TH_LOCK(kevq_th); + KQD_LOCK(kqd); kevq = kevqlist_find(kevq_list, kq); - /* TODO: probably don't need to re-check unless a thread can asynchronously call - * kevent (signal handler?) */ + + /* TODO: probably don't need to re-check */ if (kevq == NULL) { kevq = alloc_kevq; // insert kevq to the kevq_th hash table @@ -2069,8 +2075,8 @@ kqueue_obtain_kevq(struct kqueue *kq, struct thread *td, struct kevq **kevqp) } else { to_free = alloc_kevq; - KEVQ_TH_UNLOCK(kevq_th); KQD_UNLOCK(kqd); + KEVQ_TH_UNLOCK(kevq_th); KQ_UNLOCK(kq); } @@ -2234,128 +2240,156 @@ kqdom_next_leaf(struct kqdom *kqd) static void kqdom_init(struct kqdom *kqd) { + veclist_init(&kqd->children, NULL, 0); + veclist_init(&kqd->kqd_activelist, NULL, 0); + veclist_init(&kqd->kqd_kevqs, NULL, 0); mtx_init(&kqd->kqd_lock, "kqdom_lock", NULL, MTX_DEF | MTX_DUPOK); } -/* inserts a list*/ +static int +kqdom_is_leaf(struct kqdom *kqd) +{ + return veclist_size(&kqd->children) == 0; +} + +/* inserts a kevq into a leaf kqdom */ static void kqdom_insert(struct kqdom *kqd, struct kevq *kevq) { - int oldcap; - struct kevq **expand; - KQD_OWNED(kqd); - KASSERT(kqd->num_children == 0, ("inserting into a non-leaf kqdom")); - CTR4(KTR_KQ, "kqdom_insert: kevq: %p kqd %d: cnt: %d cap: %d", kevq, kqd->id, kqd->kqd_kevqcnt, kqd->kqd_kevqcap); + int oldcap, newcap; + void **expand; + KQD_OWNED(kqd); + KASSERT(kqdom_is_leaf(kqd), ("inserting into a non-leaf kqdom")); + CTR2(KTR_KQ, "kqdom_insert: kevq: %p kqdom %d", kevq, kqd->id); + + /* expand the kqdom if needed */ retry: - if (kqd->kqd_kevqcnt + 1 > kqd->kqd_kevqcap) { + if (veclist_need_exp(&kqd->kqd_kevqs)) { CTR2(KTR_KQ, "kqdom_insert: expanding... kqd %d for kevq %p\n", kqd->id, kevq); - oldcap = kqd->kqd_kevqcap; + oldcap = veclist_cap(&kqd->kqd_kevqs); KQD_UNLOCK(kqd); - expand = malloc(sizeof(struct kqdom *) * (oldcap + KQDOM_EXTENT_FACTOR), M_KQUEUE, M_WAITOK | M_ZERO); + + newcap = oldcap + KQDOM_EXTENT; + expand = malloc(sizeof(struct kqdom *) * newcap, M_KQUEUE, M_WAITOK | M_ZERO); KQD_LOCK(kqd); /* recheck if we need expansion, make sure old capacity didn't change */ - if (kqd->kqd_kevqcap == oldcap) { - /* copy the content from the old list to this */ - for(int i = 0; i < kqd->kqd_kevqcnt; i++) { - expand[i] = kqd->kqd_kevqlist[i]; + if (veclist_cap(&kqd->kqd_kevqs) == oldcap) { + expand = veclist_expand(&kqd->kqd_kevqs, expand, newcap); + if (expand != NULL) { + free(expand, M_KQUEUE); } - free(kqd->kqd_kevqlist, M_KQUEUE); - kqd->kqd_kevqlist = expand; - kqd->kqd_kevqcap = oldcap + KQDOM_EXTENT_FACTOR; } else { - /* some threads made changes while we allocated memory, retry */ + /* some threads made changes while we were allocating memory, retry */ free(expand, M_KQUEUE); goto retry; } } KQD_OWNED(kqd); - KASSERT(kqd->kqd_kevqcnt + 1 <= kqd->kqd_kevqcap, ("kqdom didn't expand properly")); + + KASSERT(!veclist_need_exp(&kqd->kqd_kevqs), ("failed to expand kqdom")); + veclist_insert_tail(&kqd->kqd_kevqs, kevq); - /* insert to list */ - kqd->kqd_kevqlist[kqd->kqd_kevqcnt] = kevq; - kqd->kqd_kevqcnt++; + if (veclist_size(&kqd->kqd_kevqs) == 1) { + kqdom_update_parents(kqd, KQDIR_ACTIVE); + } } -/* removes a list */ +/* removes a kevq from a leaf kqdom */ static void kqdom_remove(struct kqdom *kqd, struct kevq *kevq) { - int found; KQD_OWNED(kqd); - KASSERT(kqd->num_children == 0, ("removing from a non-leaf kqdom")); - CTR4(KTR_KQ, "kqdom_remove: kevq: %p kqd %d: cnt: %d cap: %d", kevq, kqd->id, kqd->kqd_kevqcnt, kqd->kqd_kevqcap); - found = 0; + KASSERT(kqdom_is_leaf(kqd), ("removing from a non-leaf kqdom")); + CTR2(KTR_KQ, "kqdom_remove: kevq: %p kqdom %d", kevq, kqd->id); - /* slow, but no need to optimize for delete */ - for(int i = 0; i < kqd->kqd_kevqcnt; i++) { - if(kqd->kqd_kevqlist[i] == kevq) { - found = 1; - } + veclist_remove(&kqd->kqd_kevqs, kevq); - if(found && (i+1 < kqd->kqd_kevqcnt)) { - kqd->kqd_kevqlist[i] = kqd->kqd_kevqlist[i+1]; - } + if (veclist_size(&kqd->kqd_kevqs) == 0) { + kqdom_update_parents(kqd, KQDIR_INACTIVE); } - - KASSERT(found, ("cannot find kevq from kqdom")); - - kqd->kqd_kevqcnt--; - kqd->kqd_kevqlist[kqd->kqd_kevqcnt] = NULL; - - if (kqd->kqd_kevqcnt != 0) - kqd->kqd_ckevq = kqd->kqd_ckevq % kqd->kqd_kevqcnt; - else - kqd->kqd_ckevq = 0; } static void kqdom_destroy(struct kqdom *root) { - for(int i = 0; i < root->num_children; i++) { - kqdom_destroy(root->children[i]); + void **buf; + for(int i = 0; i < veclist_size(&root->children); i++) { + kqdom_destroy(veclist_at(&root->children, i)); } - CTR2(KTR_KQ, "kqdom_destroy: destroyed kqdom %p with %d child kqdoms", root, root->num_children); + CTR2(KTR_KQ, "kqdom_destroy: destroyed kqdom %d with %d child kqdoms", root->id, veclist_size(&root->children)); - if (root->kqd_kevqlist != NULL) { - KASSERT(root->kqd_kevqcnt == 0, ("freeing a kqdom with kevqs")); - free(root->kqd_kevqlist, M_KQUEUE); + buf = veclist_buf(&root->kqd_kevqs); + if (buf != NULL) { + free(buf, M_KQUEUE); + } + + buf = veclist_buf(&root->kqd_activelist); + if (buf != NULL) { + free(buf, M_KQUEUE); } - if (root->children != NULL) { - free(root->children, M_KQUEUE); + buf = veclist_buf(&root->children); + if (buf != NULL) { + free(buf, M_KQUEUE); } - KASSERT(root->num_active == 0, ("freeing a kqdom with active kevqs")); - + mtx_destroy(&root->kqd_lock); + free(root, M_KQUEUE); } +/* Expensive if called *frequently* + * + * Updates a kqdom based on the currently active children + */ static void -kqdom_update_active(struct kqdom *leaf, int change) +kqdom_update_parents(struct kqdom *kqd, int direction) { - int oldval, newval; + int cont; + struct kqdom *child; + + KQD_OWNED(kqd); - KASSERT(change != 0, ("updating active 0")); + /* We are locking parent kqdoms while the leaf lock is acquired. + * Just a note, not a problem (so far) + */ + cont = 1; + while (cont) { + child = kqd; + kqd = kqd->parent; - while (leaf != NULL) { - oldval = atomic_fetchadd_int(&leaf->num_active, change); - newval = oldval + change; - KASSERT(oldval >= 0 && newval >= 0, ("invalid oldval or newval after update")); - if (oldval == 0) { - change = 1; - CTR3(KTR_KQ, "kqdom_update_active: change %d: num of active %d for kqdom %d", change, newval, leaf->id); - } else if (newval == 0) { - /* if new val is 0, we */ - change = -1; - CTR3(KTR_KQ, "kqdom_update_active: change %d: num of active %d for kqdom %d", change, newval, leaf->id); - } else { + if(kqd == NULL) break; + + KQD_LOCK(kqd); + + CTR3(KTR_KQ, "kqdom_update_parents: %d updating kqdom %d with %d active children", direction, kqd->id, veclist_size(&kqd->kqd_activelist)); + + if (direction == KQDIR_INACTIVE) { + veclist_remove(&kqd->kqd_activelist, child); + + /* didn't change from 1 to 0, stop */ + if (veclist_size(&kqd->kqd_activelist) != 0) { + cont = 0; + } + } else { + /* kqd->kqd_activelist are preallocated with maximum children for non-leaf nodes + * Should NEVER fail + */ + + KASSERT(!veclist_need_exp(&kqd->kqd_activelist), ("kqdom requires expansion")); + veclist_insert_tail(&kqd->kqd_activelist, child); + + /* didn't change from 0 to 1, stop */ + if (veclist_size(&kqd->kqd_activelist) != 1) { + cont = 0; + } } - leaf = leaf->parent; + KQD_UNLOCK(kqd); } } @@ -2376,21 +2410,28 @@ kqdom_update_lat(struct kqdom *leaf, unsigned long avg) } } -/* DFS to mirror the cpu_group structure */ +/* Mirror the cpu_group structure */ static void kqdom_build_internal(struct kqdom *kqd_cur, struct cpu_group *cg_cur, int *kqd_id) { + void **expand; struct kqdom *child; int cg_numchild = cg_cur->cg_children; - CTR4(KTR_KQ, "kqdom_build_internal: processing cpu_group with %d child groups, %d CPUs, shared cache level %d, kqd_id %d", - cg_numchild, cg_cur->cg_count, cg_cur->cg_level, *kqd_id); + CTR4(KTR_KQ, "kqdom_build_internal: processing cpu_group with %d child groups, %d CPUs, shared cache level %d, kqd_id %d", cg_numchild, cg_cur->cg_count, cg_cur->cg_level, *kqd_id); - // init fields for current + /* init fields for current */ kqd_cur->id = *kqd_id; (*kqd_id)++; - kqd_cur->num_children = cg_numchild; CPU_COPY(&cg_cur->cg_mask, &kqd_cur->cpu_mask); - kqd_cur->children = malloc(sizeof(struct kqdom *) * cg_numchild, M_KQUEUE, M_WAITOK | M_ZERO); + + /* allocate children and active lists */ + if (cg_numchild > 0) { + expand = malloc(sizeof(struct kqdom *) * cg_numchild, M_KQUEUE, M_WAITOK | M_ZERO); + veclist_expand(&kqd_cur->children, expand, cg_numchild); + + expand = malloc(sizeof(struct kqdom *) * cg_numchild, M_KQUEUE, M_WAITOK | M_ZERO); + veclist_expand(&kqd_cur->kqd_activelist, expand, cg_numchild); + } for (int i = 0; i < cg_numchild; i++) { child = malloc(sizeof(struct kqdom), M_KQUEUE, M_WAITOK | M_ZERO); @@ -2398,7 +2439,7 @@ kqdom_build_internal(struct kqdom *kqd_cur, struct cpu_group *cg_cur, int *kqd_i child->parent = kqd_cur; - kqd_cur->children[i] = child; + veclist_insert_tail(&kqd_cur->children, child); kqdom_build_internal(child, &cg_cur->cg_child[i], kqd_id); } } @@ -2417,18 +2458,18 @@ kqdom_build() static struct kqdom * kqdom_find(struct kqdom *root, int cpuid) { - if (root->num_children == 0) { + if (kqdom_is_leaf(root)) { KASSERT(CPU_ISSET(cpuid, &root->cpu_mask), ("kqdom_find: cpuid and cpumask mismatch")); return root; } - for(int i = 0; i < root->num_children; i++) { - if(CPU_ISSET(cpuid, &root->children[i]->cpu_mask)) { - return kqdom_find(root->children[i], cpuid); + for(int i = 0; i < veclist_size(&root->children); i++) { + if(CPU_ISSET(cpuid, &((struct kqdom *)veclist_at(&root->children, i))->cpu_mask)) { + return kqdom_find((struct kqdom *)veclist_at(&root->children, i), cpuid); } } - KASSERT(0, ( "kqdom_find: cpu doesn't exist ")); + KASSERT(0, ("kqdom_find: cpu doesn't exist ")); return NULL; } @@ -2544,10 +2585,11 @@ kevq_worksteal(struct kevq *kevq) KEVQ_UNLOCK(kevq); /* todo maybe from cur kqdomain instead of from root */ - other_kevq = kqdom_random_kevq_locked(kq->kq_kqd, kevq); + other_kevq = kqdom_random_kevq_locked(kq->kq_kqd); + CTR2(KTR_KQ, "kevq_worksteal: kevq %p selected kevq %p", kevq, other_kevq); + if (other_kevq != NULL && other_kevq != kevq && other_kevq->kn_count > 0) { - CTR3(KTR_KQ, "kevq_worksteal: kevq %p selected kevq %p with %d knotes", kevq, other_kevq, other_kevq->kn_count); ws_kn = TAILQ_FIRST(&other_kevq->kn_head); while(ws_count < kq_sched_ws_count && ws_kn != NULL) { @@ -2659,14 +2701,12 @@ kqueue_scan(struct kevq *kevq, int maxevents, struct kevent_copyops *k_ops, asbt = 0; marker = knote_alloc(M_WAITOK); CTR2(KTR_KQ, "kqueue_scan: td %d allocated marker %p", td->td_tid, marker); - knote_xinit(marker); marker->kn_status = KN_MARKER; KEVQ_LOCK(kevq); if ((kevq->kevq_state & KEVQ_RDY) == 0) { /* Mark the kevq as ready to receive events */ kevq->kevq_state |= KEVQ_RDY; - kqdom_update_active(kevq->kevq_kqd, 1); } retry: @@ -2864,7 +2904,8 @@ kqueue_scan(struct kevq *kevq, int maxevents, struct kevent_copyops *k_ops, knote_flux_wakeup_ul(marker); } - if (nkev != 0) { + + if (nkev != 0 && need_track_latency(kq)) { /* book keep the statistics */ getnanouptime(&kevq->kevq_last_kev); kevq->kevq_last_nkev = nkev; @@ -3004,6 +3045,7 @@ static void kevq_destroy(struct kevq *kevq) { CTR1(KTR_KQ, "kevq_destroy for %p", kevq); + mtx_destroy(&kevq->lock); free(kevq, M_KQUEUE); } @@ -3011,7 +3053,7 @@ kevq_destroy(struct kevq *kevq) This is also called when a thread exits/crashes (currently racing, also to make it work need to reconfigure kq->ck_evq) * a ref cnt must be held */ void -kevq_drain(struct kevq *kevq) +kevq_drain(struct kevq *kevq, struct thread *td) { struct kqueue *kq; struct knote *kn; @@ -3061,17 +3103,23 @@ kevq_drain(struct kevq *kevq) KN_FLUX_OWNED(kn); KASSERT(!kn_in_flux(kn), ("knote is still influx")); - knote_enter_flux(kn); KN_FLUX_UNLOCK(kn); + /* remove knote from kevq */ knote_dequeue(kn); + + if ((kn->kn_flags & EV_AFFINITY) == EV_AFFINITY) { + knote_drop(kn, td); + } - if ((kq->kq_flags & KQ_FLAG_MULTI) == KQ_FLAG_MULTI && (kq->kq_state & KQ_CLOSING) != KQ_CLOSING && (kn->kn_status & KN_MARKER) == 0) { + /* a thread cannot crash while in kernel, and there is no extra refs + * Marker KNs should should not exist + */ + KASSERT((kn->kn_status & KN_MARKER) == 0, ("Marker KN present while closing")); + + if ((kq->kq_flags & KQ_FLAG_MULTI) == KQ_FLAG_MULTI && (kq->kq_state & KQ_CLOSING) != KQ_CLOSING) { KEVQ_UNLOCK(kevq); - /* TODO: When we knote activate, if the ev has EV_CLEAR set, maybe we shouldn't activate the event - * if there hasn't been activities on the fd - */ knote_activate(kn); KEVQ_LOCK(kevq); } @@ -3088,7 +3136,6 @@ kevq_drain(struct kevq *kevq) // // First, all knotes with kn->kn_kevq != kevq before queuing is not an issue // because if kn->kn_kevq == NULL, scheduler will grab kevq from either kqdom (QC) or kevqlist (RR) or kn->orgkevq (EV_AFFINITY) - // EV_AFFINITY is currently broken (need to keep a list of EV_AFFINITY for each kevq and delete them atomically) // KEVQs grabbed from QC or RR are locked with QC or RR locked, therefore they are either grabbed before kevq invalidation // or after kevq detachment. (In between doesn't matter since kevq is already invalidated) // In the former case, the knote would be queued to the kevq and later drained as usual. @@ -3096,13 +3143,12 @@ kevq_drain(struct kevq *kevq) // // Second, for all knotes with kn->kn_kevq == kevq. They would be already queued to kevq // and will be dequeued later (kn->kn_kevq will be set to another valid kevq) - // + // if ((kq->kq_flags & KQ_FLAG_MULTI) == KQ_FLAG_MULTI) { - // drop from KQ Domain KQ_LOCK(kq); - KQD_LOCK(kqd); KEVQ_TH_LOCK(kevq->kevq_th); + KQD_LOCK(kqd); // detach from kevq_th LIST_REMOVE(kevq, kevq_th_tqe); @@ -3110,9 +3156,6 @@ kevq_drain(struct kevq *kevq) LIST_REMOVE(kevq, kevq_th_e); // detach from kqdom - if((kevq->kevq_state & KEVQ_RDY) != 0) { - kqdom_update_active(kqd, -1); - } kqdom_remove(kqd, kevq); // detach from kqueue @@ -3121,8 +3164,8 @@ kevq_drain(struct kevq *kevq) } LIST_REMOVE(kevq, kq_e); - KEVQ_TH_UNLOCK(kevq->kevq_th); KQD_UNLOCK(kqd); + KEVQ_TH_UNLOCK(kevq->kevq_th); KQ_UNLOCK(kq); } else { KQ_LOCK(kq); @@ -3204,7 +3247,7 @@ kqueue_drain(struct kqueue *kq, struct kevq *kevq, struct thread *td) while((kevq = LIST_FIRST(&kq->kq_kevqlist)) != NULL) { KQ_UNLOCK(kq); if (kevq_acquire(kevq, 0) == 0) - kevq_drain(kevq); + kevq_drain(kevq, td); KQ_LOCK(kq); } @@ -3213,7 +3256,7 @@ kqueue_drain(struct kqueue *kq, struct kevq *kevq, struct thread *td) } else { KQ_UNLOCK(kq); // we already have a reference for single threaded mode - kevq_drain(kq->kq_kevq); + kevq_drain(kq->kq_kevq, td); KQ_LOCK(kq); } @@ -3456,7 +3499,7 @@ knote_activate(struct knote *kn) void knlist_add(struct knlist *knl, struct knote *kn, int islocked) { - CTR1(KTR_KQ, "knlist_add kn %p", kn); + /* CTR1(KTR_KQ, "knlist_add kn %p", kn); */ KNL_ASSERT_LOCK(knl, islocked); KQ_NOTOWNED(kn->kn_kq); KASSERT(kn_in_flux(kn), ("knote %p not in flux", kn)); @@ -3849,75 +3892,45 @@ knote_drop_detached(struct knote *kn, struct thread *td) knote_free(kn); } +/* A refcnt to kevq will be held upon return */ static struct kevq * -kqdom_random_kevq_locked(struct kqdom* kqd, struct kevq* last_kevq) +kqdom_random_kevq_locked(struct kqdom *kqd) { - struct kqdom *each_child, *last_child; - struct kevq *kevq, *each_kevq; - int num_active, init_idx; - u_long random; + struct kevq *kevq; + struct kqdom *tkqd; + int num_active; + u_long rand; - /* fall-back with the last child in case there is a race */ - last_child = NULL; kevq = NULL; - while (kqd->num_children > 0) { - /* read once */ - num_active = kqd->num_active; - if (num_active == 0) { - /* if we got to a child and now it doesn't have any active children, then return NULL - this happens either on the first loop or due to a race of kevq deletion */ - return NULL; - } - - random = kqueue_frand() % num_active; - KASSERT(random < kqd->num_children, ("more active children than total children")); - - for(int i = 0; i < kqd->num_children; i++) { - each_child = kqd->children[i]; - - if (each_child->num_active > 0) { - /* if the child suits our need */ - last_child = each_child; - if (random == 0) { - kqd = each_child; - break; - } - - random--; - } - - if (i == kqd->num_children) { - kqd = last_child; - } + while (!kqdom_is_leaf(kqd)) { + rand = random(); + KQD_LOCK(kqd); + /* we only select active stuff inside this, need to be EXTREMELY fast */ + num_active = veclist_size(&kqd->kqd_activelist); + CTR1(KTR_KQ, "kqdom_random_kevq_locked: randomly selected leaf kqdom %d", kqd->id); + if (num_active > 0) { + tkqd = veclist_at(&kqd->kqd_activelist, rand % num_active); + } else { + tkqd = NULL; } + KQD_UNLOCK(kqd); + kqd = tkqd; } if (kqd != NULL) { - CTR3(KTR_KQ, "kqdom_random_kevq_locked: selected kqd %d, # children %p, last_kevq %p", kqd->id, kqd->kqd_kevqcnt, last_kevq); - + CTR1(KTR_KQ, "kqdom_random_kevq_locked: randomly selected leaf kqdom %d", kqd->id); + rand = random(); KQD_LOCK(kqd); - if (kqd->kqd_kevqcnt != 0) { - random = kqueue_frand() % kqd->kqd_kevqcnt; - init_idx = random; - - each_kevq = kqd->kqd_kevqlist[random]; - while(1) { - /* fast fail */ - if (KEVQ_AVAIL(each_kevq) && each_kevq != last_kevq) { - KEVQ_LOCK(each_kevq); - if (KEVQ_AVAIL(each_kevq)) { - kevq = each_kevq; - break; - } - KEVQ_UNLOCK(each_kevq); - } - - random = (random + 1) % kqd->kqd_kevqcnt; - if (random == init_idx) { - break; - } - each_kevq = kqd->kqd_kevqlist[random]; + num_active = veclist_size(&kqd->kqd_kevqs); + if (num_active > 0) { + kevq = veclist_at(&kqd->kqd_kevqs, rand % num_active); + KEVQ_LOCK(kevq); + + /* make sure kevq is available */ + if (!KEVQ_AVAIL(kevq)) { + KEVQ_UNLOCK(kevq); + kevq = NULL; } } KQD_UNLOCK(kqd); @@ -3927,7 +3940,7 @@ kqdom_random_kevq_locked(struct kqdom* kqd, struct kevq* last_kevq) KEVQ_OWNED(kevq); } - CTR2(KTR_KQ, "kqdom_random_kevq_locked: selected kevq %p, last_kevq %p", kevq, last_kevq); + CTR1(KTR_KQ, "kqdom_random_kevq_locked: randomly selected kevq %p", kevq); return kevq; } @@ -3939,8 +3952,7 @@ knote_next_kevq(struct knote *kn) { struct kqdom *kqd; struct kqueue *kq; - struct kevq *next_kevq, *sel_kevq; - int cur_kevq; + struct kevq *next_kevq; next_kevq = NULL; kq = kn->kn_kq; @@ -3975,45 +3987,46 @@ knote_next_kevq(struct knote *kn) if ((kq->kq_sched_flags & KQ_SCHED_BEST_OF_N) != 0) { kqd = kq->kq_kqd; + for(int i = 0; i < kq_sched_bon_count; i++) { - sel_kevq = kqdom_random_kevq_locked(kqd, next_kevq); + struct kevq *sel_kevq = kqdom_random_kevq_locked(kqd); if (sel_kevq != NULL) { + int ret; + KEVQ_OWNED(sel_kevq); - CTR2(KTR_KQ, "knote_next_kevq: [BON] selected random kevq %p for kn %p", sel_kevq, kn); + /* acquire ref don't lock */ + ret = kevq_acquire(sel_kevq, 1); - if (next_kevq == NULL && kevq_acquire(sel_kevq, 1) == 0) { + KEVQ_UNLOCK(sel_kevq); + + if (ret != 0) { + continue; + } + + if (next_kevq == NULL) { next_kevq = sel_kevq; - KEVQ_UNLOCK(sel_kevq); } else { - // compare their avg wait time - // TODO: refactor the unlock pattern here + /* compare estimated wait time */ if (sel_kevq->kevq_avg_lat * sel_kevq->kn_count < next_kevq->kevq_avg_lat * next_kevq->kn_count) { - if (kevq_acquire(sel_kevq, 1) == 0) { - KEVQ_UNLOCK(sel_kevq); - - kevq_release(next_kevq, 0); - next_kevq = sel_kevq; - } else { - KEVQ_UNLOCK(sel_kevq); - } + /* if the newly selected kevq is better, swap them */ + kevq_release(next_kevq, 0); + next_kevq = sel_kevq; } else { - KEVQ_UNLOCK(sel_kevq); + kevq_release(sel_kevq, 0); } } - CTR2(KTR_KQ, "knote_next_kevq: [BON] current best kevq %p, avg wait time: %d", next_kevq, next_kevq->kevq_avg_lat * next_kevq->kn_count); + CTR3(KTR_KQ, "knote_next_kevq: [BON] current best kevq %p, avg time: %d, wait time: %d", next_kevq, next_kevq->kevq_avg_lat, next_kevq->kevq_avg_lat * next_kevq->kn_count); } } if (next_kevq != NULL) { KEVQ_LOCK(next_kevq); kevq_release(next_kevq, 1); - // recheck availability - if (!KEVQ_AVAIL(next_kevq)) { - KEVQ_UNLOCK(next_kevq); - next_kevq = NULL; - } + /* Here we don't recheck availability although it could change + * thread exiting is very rare, just give to the thread and + */ } CTR2(KTR_KQ, "knote_next_kevq: [BON] next kevq %p for kn %p", next_kevq, kn); @@ -4032,32 +4045,8 @@ knote_next_kevq(struct knote *kn) } kqd = kn->kn_kqd; } - KQD_LOCK(kqd); - cur_kevq = kqd->kqd_ckevq; - while(1) { - if (kqd->kqd_kevqcnt == 0) { - break; - } - cur_kevq = (cur_kevq + 1) % kqd->kqd_kevqcnt; - next_kevq = kqd->kqd_kevqlist[cur_kevq]; - - if (KEVQ_AVAIL(next_kevq)) { - /* fast fail */ - KEVQ_LOCK(next_kevq); - if (KEVQ_AVAIL(next_kevq)) { - kqd->kqd_ckevq = cur_kevq; - break; - } - KEVQ_UNLOCK(next_kevq); - } - - if (cur_kevq == kqd->kqd_ckevq) { - next_kevq = NULL; - break; - } - } - KQD_UNLOCK(kqd); + next_kevq = kqdom_random_kevq_locked(kqd); CTR2(KTR_KQ, "knote_next_kevq: [QUEUE] next kevq %p for kn %p", next_kevq, kn); } @@ -4105,12 +4094,16 @@ knote_enqueue(struct knote *kn, struct kevq *kevq) struct kqueue *kq; kq = kn->kn_kq; - CTR2(KTR_KQ, "knote_enqueue: kn %p to kevq %p", kn, kevq); + /* CTR2(KTR_KQ, "knote_enqueue: kn %p to kevq %p", kn, kevq); */ KEVQ_OWNED(kevq); KASSERT((kn->kn_status & KN_QUEUED) == 0, ("knote already queued")); - KASSERT((kevq->kevq_state & KEVQ_CLOSING) == 0 && (kevq->kevq_state & KEVQ_RDY) != 0, ("kevq already closing or not ready")); + + /* Queuing to a clsoing kevq is fine. + * The refcnt wait in kevq drain is before knote requeuing + * so no knote will be forgotten + * KASSERT((kevq->kevq_state & KEVQ_CLOSING) == 0 && (kevq->kevq_state & KEVQ_RDY) != 0, ("kevq already closing or not ready")); */ kn->kn_kevq = kevq; kn->kn_status |= KN_QUEUED; @@ -4121,12 +4114,6 @@ knote_enqueue(struct knote *kn, struct kevq *kevq) kevq_wakeup(kevq); } -static void -knote_xinit(struct knote *kn) -{ - mtx_init(&kn->kn_fluxlock, "kn_fluxlock", NULL, MTX_DEF | MTX_DUPOK); -} - static void knote_dequeue(struct knote *kn) { @@ -4156,14 +4143,18 @@ static struct knote * knote_alloc(int mflag) { struct knote *ret = uma_zalloc(knote_zone, mflag | M_ZERO); - CTR1(KTR_KQ, "knote_alloc: allocating knote %p", ret); + /* CTR1(KTR_KQ, "knote_alloc: allocating knote %p", ret); */ + mtx_init(&ret->kn_fluxlock, "kn_fluxlock", NULL, MTX_DEF | MTX_DUPOK); return ret; } static void knote_free(struct knote *kn) { - CTR1(KTR_KQ, "knote_free: kn %p", kn); + /* CTR1(KTR_KQ, "knote_free: kn %p", kn); */ + if (kn != NULL) { + mtx_destroy(&kn->kn_fluxlock); + } uma_zfree(knote_zone, kn); } diff --git a/sys/sys/eventvar.h b/sys/sys/eventvar.h index 7eae7b761d0f..2d1d0b81c86d 100644 --- a/sys/sys/eventvar.h +++ b/sys/sys/eventvar.h @@ -36,11 +36,15 @@ #endif #include +#include +#include #define KQ_NEVENTS 8 /* minimize copy{in,out} calls */ #define KQEXTENT 256 /* linear growth by this amount */ -#define KQDOM_EXTENT_FACTOR 8 /* linear growth by this amount */ +#define KQDOM_EXTENT 8 /* linear growth by this amount */ +#define KQDIR_ACTIVE (0) +#define KQDIR_INACTIVE (1) struct kevq { LIST_ENTRY(kevq) kevq_th_e; /* entry into kevq_thred's hashtable */ @@ -61,29 +65,25 @@ struct kevq { /* Used by the scheduler */ unsigned long kevq_avg_lat; struct timespec kevq_last_kev; - int kevq_last_nkev; + uint64_t kevq_last_nkev; }; /* TODO: assumed that threads don't get rescheduled across cores */ struct kqdom { /* static */ + int id; struct mtx kqd_lock; struct kqdom *parent; - int id; cpuset_t cpu_mask; - int num_children; - struct kqdom **children; + struct veclist children; /* child kqdoms */ - /* statistics */ + /* statistics. Atomically updated, doesn't require the lock*/ unsigned long avg_lat; - int num_active; /* total number of active children below this node */ /* dynamic members*/ - struct kevq **kqd_kevqlist; /* array list of kevqs on the kdomain, only set for leaf domains */ - int kqd_kevqcap; - int kqd_kevqcnt; - - int kqd_ckevq; + struct veclist kqd_activelist; /* active child kqdoms */ + struct veclist kqd_kevqs; /* kevqs for this kqdom */ + int kqd_ckevq; /* current kevq for round robbin. XXX: Remove round robbin it has literally no benefit but maintainance nightmares */ }; struct kqueue { diff --git a/sys/sys/veclist.h b/sys/sys/veclist.h new file mode 100644 index 000000000000..0821d8357e27 --- /dev/null +++ b/sys/sys/veclist.h @@ -0,0 +1,161 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c)2019 Reliable Computer Systems Lab, University of Waterloo + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +/* Vector list - insert/remove: O(n) + * - random access: O(1) + * - insert/remove tail: O(1) + */ + +#ifndef _SYS_VECLIST_H_ +#define _SYS_VECLIST_H_ + +#include +#include +#include + +struct veclist { + size_t cap; + size_t size; + void **buf; +}; + +static inline void +veclist_init(struct veclist *lst, void **buf, int cap) +{ + lst->size = 0; + lst->buf = buf; + lst->cap = cap; +} + +static inline void * +veclist_remove_at(struct veclist *lst, size_t idx) +{ + void *ret; + KASSERT(lst->size > idx, ("veclist_remove_at index out of bound")); + ret = lst->buf[idx]; + memmove(&lst->buf[idx], &lst->buf[idx+1], (lst->size - (idx + 1)) * sizeof(void*)); + lst->size--; + return ret; +} + +static inline void * +veclist_remove(struct veclist *lst, void *ele) +{ + int found; + + for(found = 0; found < lst->size; found++) { + if(lst->buf[found] == ele) { + break; + } + } + + return veclist_remove_at(lst, found); +} + +/* inserts an element so that the index of the element after insertion is idx */ +static inline void +veclist_insert_at(struct veclist *lst, void *ele, size_t idx) +{ + KASSERT((lst->cap > lst->size) && (lst->size >= idx), ("veclist overflow")); + memmove(&lst->buf[idx+1], &lst->buf[idx], (lst->size - idx) * sizeof(void*)); + lst->size++; + lst->buf[idx] = ele; +} + +static inline void +veclist_insert_tail(struct veclist *lst, void *ele) +{ + return veclist_insert_at(lst, ele, lst->size); +} + +static inline void +veclist_insert_head(struct veclist *lst, void *ele) +{ + return veclist_insert_at(lst, ele, 0); +} + +static inline void * +veclist_remove_head(struct veclist *lst) +{ + return veclist_remove_at(lst, 0); +} + +static inline void * +veclist_remove_tail(struct veclist *lst) +{ + return veclist_remove_at(lst, lst->size - 1); +} + +/* returns old buffer */ +static inline void** +veclist_expand(struct veclist *lst, void **new_buf, size_t new_cap) +{ + void **ret; + KASSERT(new_cap > lst->cap, ("veclist expand")); + memcpy(new_buf, lst->buf, lst->size * sizeof(void*)); + ret = lst->buf; + lst->buf = new_buf; + lst->cap = new_cap; + return ret; +} + +static inline int +veclist_need_exp(struct veclist *lst) +{ + return (lst->size == lst->cap); +} + +static inline int +veclist_cap(struct veclist *lst) +{ + return lst->cap; +} + +static inline int +veclist_size(struct veclist *lst) +{ + return lst->size; +} + +static inline void * +veclist_buf(struct veclist *lst) +{ + return lst->buf; +} + +static inline void * +veclist_at(struct veclist *lst, size_t idx) +{ + KASSERT(lst->size > idx, ("veclist_at index out of bound")); + return lst->buf[idx]; +} + + +#endif diff --git a/tests/sys/kqueue/libkqueue/common.h b/tests/sys/kqueue/libkqueue/common.h index 814d6cc0fc93..a20e9b187669 100644 --- a/tests/sys/kqueue/libkqueue/common.h +++ b/tests/sys/kqueue/libkqueue/common.h @@ -46,6 +46,7 @@ int vnode_fd; extern char * kevent_to_str(struct kevent *); struct kevent * kevent_get(int); struct kevent * kevent_get_timeout(int, int); +struct kevent * kevent_get_timeout_u(int kqfd, int useconds); void kevent_cmp(struct kevent *, struct kevent *); diff --git a/tests/sys/kqueue/libkqueue/main.c b/tests/sys/kqueue/libkqueue/main.c index 64a6cb465b25..657730eb368e 100644 --- a/tests/sys/kqueue/libkqueue/main.c +++ b/tests/sys/kqueue/libkqueue/main.c @@ -117,6 +117,28 @@ kevent_get_timeout(int kqfd, int seconds) return (kev); } +/* Retrieve a single kevent, specifying a maximum time to wait for it. */ +struct kevent * +kevent_get_timeout_u(int kqfd, int useconds) +{ + int nfds; + struct kevent *kev; + struct timespec timeout = {0, useconds * 1000}; + + if ((kev = calloc(1, sizeof(*kev))) == NULL) + err(1, "out of memory"); + + nfds = kevent(kqfd, NULL, 0, kev, 1, &timeout); + if (nfds < 0) { + err(1, "kevent(2)"); + } else if (nfds == 0) { + free(kev); + kev = NULL; + } + + return (kev); +} + char * kevent_fflags_dump(struct kevent *kev) { diff --git a/tests/sys/kqueue/libkqueue/read_m.c b/tests/sys/kqueue/libkqueue/read_m.c index 932d6b15c1b4..20db2c67a72a 100644 --- a/tests/sys/kqueue/libkqueue/read_m.c +++ b/tests/sys/kqueue/libkqueue/read_m.c @@ -35,14 +35,15 @@ struct thread_info { int group_id; int evcnt; int tid; + int delay; }; /* * Read test */ -#define THREAD_CNT (16) -#define PACKET_CNT (1600) +#define THREAD_CNT (32) +#define PACKET_CNT (3200) int g_kqfd; int g_sockfd[2]; @@ -51,9 +52,9 @@ struct thread_info g_thrd_info[THREAD_CNT]; sem_t g_sem_driver; static void -check_sched(struct thread_info *info, int size) +check_sched(struct thread_info *info, int size, unsigned int max_diff) { - int max = 0, min = 999999; + int max = 0, min = INT_MAX; for(int i = 0; i < size; i++) { int cur = info[i].evcnt; @@ -65,11 +66,8 @@ check_sched(struct thread_info *info, int size) } } - if ((max - min) > 1) { -#ifdef TEST_DEBUG - printf("READ_M: check_sched: max difference is %d\n", max - min); -#endif - abort(); + if ((max - min) > max_diff) { + err(1, "READ_M: check_sched: max difference is %d\n", max - min); } } @@ -124,6 +122,9 @@ test_socket_read_thrd(void* args) dat = socket_pop(ret->ident); free(ret); + if(info->delay) + usleep(info->tid * 10); + if (dat == 'e') break; @@ -142,10 +143,10 @@ test_socket_read_thrd(void* args) } static void -test_socket_read(void) +test_socket_read(int delay) { int error = 0; - const char *test_id = "[Multi]kevent(EVFILT_READ)"; + const char *test_id = delay ? "[Multi][BON]kevent" : "[Multi]kevent(EVFILT_READ)"; test_begin(test_id); if (socketpair(AF_UNIX, SOCK_STREAM, 0, &g_sockfd[0]) < 0) @@ -171,6 +172,7 @@ test_socket_read(void) for (int i = 0; i < THREAD_CNT; i++) { g_thrd_info[i].tid = i; g_thrd_info[i].evcnt = 0; + g_thrd_info[i].delay = delay; pthread_create(&g_thrd_info[i].thrd, NULL, test_socket_read_thrd, &g_thrd_info[i]); } @@ -188,7 +190,8 @@ test_socket_read(void) /* wait for thread events */ sem_wait(&g_sem_driver); - check_sched(g_thrd_info, THREAD_CNT); + if (!delay) + check_sched(g_thrd_info, THREAD_CNT, 1); } @@ -426,41 +429,25 @@ test_socket_queue(void) /*************************** * WS test ***************************/ -#define SOCK_WS_CNT (1000) +#define SOCK_WS_CNT (100) +#define WS_TIMEOUT (10) -volatile int ws_good = 0; +static volatile int ws_num = 0; static void* test_socket_ws_worker(void* args) { struct thread_info *info = (struct thread_info *) args; char dat; - int ws_num = 0; struct kevent *ret; - while (1) { -#ifdef TEST_DEBUG - printf("READ_M: thread %d waiting for events\n", info->tid); -#endif - ret = kevent_get(g_kqfd); -#ifdef TEST_DEBUG - printf("READ_M: thread %d woke up\n", info->tid); -#endif - - dat = socket_pop(ret->ident); - free(ret); - + while (ws_num < SOCK_WS_CNT) { if (info->ws_master == 0) { - /*if we are the master, wait for slave to signal us*/ - while(!ws_good) { - usleep(500); - } - break; - } else { - ws_num++; - if (ws_num == SOCK_WS_CNT - 1) { - ws_good = 1; - break; + ret = kevent_get_timeout_u(g_kqfd, WS_TIMEOUT); + if (ret != NULL) { + dat = socket_pop(ret->ident); + free(ret); + ws_num++; } } } @@ -731,7 +718,7 @@ test_evfilt_read_m() err(1, "ioctl"); } - test_socket_read(); + test_socket_read(0); test_socket_brutal(); close(g_kqfd); @@ -744,18 +731,7 @@ test_evfilt_read_m() err(1, "ioctl"); } - test_socket_queue(); - test_socket_brutal(); - - close(g_kqfd); - - flags = KQ_SCHED_BEST_OF_N; - g_kqfd = kqueue(); - error = ioctl(g_kqfd, FKQMULTI, &flags); - if (error == -1) { - err(1, "ioctl"); - } - + //test_socket_queue(); test_socket_brutal(); close(g_kqfd); @@ -769,6 +745,18 @@ test_evfilt_read_m() test_socket_ws(); test_socket_brutal(); + close(g_kqfd); + + flags = KQ_SCHED_BEST_OF_N; + g_kqfd = kqueue(); + error = ioctl(g_kqfd, FKQMULTI, &flags); + if (error == -1) { + err(1, "ioctl"); + } + + test_socket_brutal(); + test_socket_read(1); + close(g_kqfd); } \ No newline at end of file