Fix best of 2. The wakeup bug in knote_flux. Fix work stolen knotes not being processed

This commit is contained in:
Charlie Root 2019-12-09 18:17:02 -05:00
parent 28ef953b8c
commit 2043d894bb
6 changed files with 465 additions and 166 deletions

View File

@ -92,6 +92,10 @@ static MALLOC_DEFINE(M_KQUEUE, "kqueue", "memory for kqueue system");
static uint32_t cache_pen = 1000; static uint32_t cache_pen = 1000;
SYSCTL_U32(_kern, OID_AUTO, kq_cache_pen, CTLFLAG_RW, &cache_pen, 0, "KQueue cache miss's penalty in cycles."); SYSCTL_U32(_kern, OID_AUTO, kq_cache_pen, CTLFLAG_RW, &cache_pen, 0, "KQueue cache miss's penalty in cycles.");
/* sysctl for best of 2 latency penalty */
static uint32_t log_threshold = 400;
SYSCTL_U32(_kern, OID_AUTO, log_threshold, CTLFLAG_RW, &log_threshold, 0, "KQueue knote log threshold.");
/* sysctl for ws_int_sbt */ /* sysctl for ws_int_sbt */
static sbintime_t ws_int_sbt = 0; static sbintime_t ws_int_sbt = 0;
SYSCTL_U64(_kern, OID_AUTO, kq_ws_int_sbt, CTLFLAG_RD, &ws_int_sbt, 0, "KQueue work stealing interval in sbintime."); SYSCTL_U64(_kern, OID_AUTO, kq_ws_int_sbt, CTLFLAG_RD, &ws_int_sbt, 0, "KQueue work stealing interval in sbintime.");
@ -170,12 +174,12 @@ static int kevq_acquire(struct kevq *kevq, int locked);
static void kevq_worksteal(struct kevq *kevq); static void kevq_worksteal(struct kevq *kevq);
static void kevq_drain(struct kevq *kevq, struct thread *td); static void kevq_drain(struct kevq *kevq, struct thread *td);
static void kevq_activate(struct kevq *kevq, struct thread *td); static void kevq_activate(struct kevq *kevq, struct thread *td);
static struct kevq * kevq_vec_select_kevq(struct veclist *lst, int num_rand, u_long rand); static struct kevq * kvlst_sel_dist_kevq(struct veclist *lst, u_long rand, struct kevq *kevq_to_skip);
static struct kevq * kvlst_sel_kevq(struct veclist *lst, int num_rand, u_long rand, long (*kevq_cmp_f)(struct kevq*, struct kevq*), struct kevq *kevq_to_skip);
static struct knote * kevq_peek_knote(struct kevq *kevq); static struct knote * kevq_peek_knote(struct kevq *kevq);
static inline void kevq_delete_knote(struct kevq *kevq, struct knote *kn); static inline void kevq_delete_knote(struct kevq *kevq, struct knote *kn);
static void kevq_insert_knote(struct kevq *kevq, struct knote *kn); static void kevq_insert_knote(struct kevq *kevq, struct knote *kn);
static int kevq_total_knote(struct kevq *kevq); static int kevq_total_knote(struct kevq *kevq);
static struct knote * kevq_dequeue(struct kevq *kevq, int rt);
static void kevq_insert_head_knote(struct kevq *kevq, struct knote *kn); static void kevq_insert_head_knote(struct kevq *kevq, struct knote *kn);
static void knote_enqueue_head(struct knote *kn, struct kevq *kevq); static void knote_enqueue_head(struct knote *kn, struct kevq *kevq);
@ -195,7 +199,13 @@ static int kqueue_scan(struct kevq *kq, int maxevents,
struct kevent_copyops *k_ops, struct kevent_copyops *k_ops,
const struct timespec *timeout, const struct timespec *timeout,
struct kevent *keva, struct thread *td); struct kevent *keva, struct thread *td);
static void kqueue_dump(struct kqueue *kq, struct sbuf *buf);
/* XXX: */
#ifdef ENABLE_SELECT
static void kqueue_wakeup(struct kqueue *kq); static void kqueue_wakeup(struct kqueue *kq);
#endif
static struct filterops *kqueue_fo_find(int filt); static struct filterops *kqueue_fo_find(int filt);
static void kqueue_fo_release(int filt); static void kqueue_fo_release(int filt);
struct g_kevent_args; struct g_kevent_args;
@ -245,7 +255,7 @@ static struct kevq * knote_next_kevq(struct knote *kn);
static void kqdom_init(struct kqdom *kqd); static void kqdom_init(struct kqdom *kqd);
static void kqdom_update_lat(struct kqdom *leaf, unsigned long avg); //static void kqdom_update_lat(struct kqdom *leaf, unsigned long avg);
static void kqdom_update_parents(struct kqdom *leaf, int direction); static void kqdom_update_parents(struct kqdom *leaf, int direction);
static void kqdom_insert(struct kqdom *kqd, struct kevq *kevq); static void kqdom_insert(struct kqdom *kqd, struct kevq *kevq);
static void kqdom_remove(struct kqdom *kqd, struct kevq *kevq); static void kqdom_remove(struct kqdom *kqd, struct kevq *kevq);
@ -335,13 +345,6 @@ SYSCTL_UINT(_kern, OID_AUTO, kq_calloutmax, CTLFLAG_RW,
#define KN_FLUX_UNLOCK(kn) do { \ #define KN_FLUX_UNLOCK(kn) do { \
mtx_unlock(&(kn)->kn_fluxlock); \ mtx_unlock(&(kn)->kn_fluxlock); \
} while (0) } while (0)
#define KN_LEAVE_FLUX_WAKEUP(kn) do { \
KN_FLUX_NOTOWNED((kn)); \
KN_FLUX_LOCK((kn)); \
knote_leave_flux((kn)); \
knote_flux_wakeup((kn)); \
KN_FLUX_UNLOCK((kn)); \
} while(0)
#define KEVQ_TH_UNLOCK(kevqth) do { \ #define KEVQ_TH_UNLOCK(kevqth) do { \
mtx_unlock(&(kevqth)->lock); \ mtx_unlock(&(kevqth)->lock); \
} while (0) } while (0)
@ -366,7 +369,6 @@ SYSCTL_UINT(_kern, OID_AUTO, kq_calloutmax, CTLFLAG_RW,
#define KEVQ_NOTOWNED(kevq) do { \ #define KEVQ_NOTOWNED(kevq) do { \
mtx_assert(&(kevq)->lock, MA_NOTOWNED); \ mtx_assert(&(kevq)->lock, MA_NOTOWNED); \
} while (0) } while (0)
#define KQD_ROWNED(kqd) do { \ #define KQD_ROWNED(kqd) do { \
rw_assert(&(kqd)->kqd_lock, RA_RLOCKED); \ rw_assert(&(kqd)->kqd_lock, RA_RLOCKED); \
} while (0) } while (0)
@ -418,8 +420,7 @@ SYSCTL_UINT(_kern, OID_AUTO, kq_calloutmax, CTLFLAG_RW,
#define KQTUNE_PARSE_ARGS(sf) (((sf) >> 16) & 0xFFFF) #define KQTUNE_PARSE_ARGS(sf) (((sf) >> 16) & 0xFFFF)
#define KQTUNE_PARSE_OBJ(sf) ((sf) & 0xFFFF) #define KQTUNE_PARSE_OBJ(sf) ((sf) & 0xFFFF)
#define NSHUFF 50 #define NSHUFF (50)
/* /*
* Pseudo-random number generator for perturbing the profiling clock, * Pseudo-random number generator for perturbing the profiling clock,
@ -463,7 +464,21 @@ kqueue_srandom(u_long *field, u_long seed)
static inline long static inline long
kevq_exp_lat(struct kevq *kevq) kevq_exp_lat(struct kevq *kevq)
{ {
return kevq->kevq_avg_lat * (kevq_total_knote(kevq) + 1) + kevq->kevq_last_kev; int64_t expected_kev;
int64_t last_kev;
int64_t now;
now = get_cyclecount();
expected_kev = kevq->kevq_last_nkev * kevq->kevq_avg_lat;
/* XXX: if a thread is interrupted by a signal then this screws up. Maybe have a separate value for signal */
last_kev = kevq->kevq_last_kev == KEVQ_LAST_KERN ? now : kevq->kevq_last_kev;
if (last_kev < now - expected_kev) {
last_kev = now - expected_kev;
// CTR2(KTR_KQ, "kevq_exp_lat: %p too behind. setting new last_kevq: %ld", kevq, last_kev);
}
return kevq->kevq_avg_lat * (kevq_total_knote(kevq) + 1) + last_kev;
} }
static inline long static inline long
@ -471,17 +486,17 @@ kevq_lat_cmp(struct kevq *kevq1, struct kevq *kevq2)
{ {
if (kevq1 == kevq2) if (kevq1 == kevq2)
return 0; return 0;
// CTR4(KTR_KQ, "kevq_lat_cmp: comparing %p: %ld with %p: %ld", kevq1, kevq_exp_lat(kevq1), kevq2, kevq_exp_lat(kevq2));
return kevq_exp_lat(kevq1) - kevq_exp_lat(kevq2); return kevq_exp_lat(kevq2) - kevq_exp_lat(kevq1);
} }
static inline long static inline long
kevq_lat_wcmp(struct kevq *kevq1, struct kevq *kevq2, int pct1) kevq_lat_wcmp(struct kevq *kevq1, struct kevq *kevq2)
{ {
if (kevq1 == kevq2) if (kevq1 == kevq2)
return 0; return 0;
// CTR4(KTR_KQ, "kevq_lat_wcmp: comparing %p: %ld with %p: %ld", kevq1, kevq_exp_lat(kevq1), kevq2, kevq_exp_lat(kevq2));
return kevq_exp_lat(kevq1) - (cache_pen + kevq_exp_lat(kevq2)); return (cache_pen + kevq_exp_lat(kevq2)) - kevq_exp_lat(kevq1);
} }
static inline int static inline int
@ -550,7 +565,7 @@ knote_enter_flux_ul(struct knote *kn)
static void static void
knote_enter_flux(struct knote *kn) knote_enter_flux(struct knote *kn)
{ {
/* CTR2(KTR_KQ, "knote_enter_flux: %p flux: %d", kn, kn->kn_influx); */ CTR2(KTR_KQ, "knote_enter_flux: %p flux: %d", kn, kn->kn_influx);
KN_FLUX_OWNED(kn); KN_FLUX_OWNED(kn);
MPASS(kn->kn_influx < INT_MAX); MPASS(kn->kn_influx < INT_MAX);
kn->kn_influx++; kn->kn_influx++;
@ -570,11 +585,13 @@ knote_leave_flux_ul(struct knote *kn)
static bool static bool
knote_leave_flux(struct knote *kn) knote_leave_flux(struct knote *kn)
{ {
/* CTR2(KTR_KQ, "knote_leave_flux: %p flux: %d", kn, kn->kn_influx); */ CTR2(KTR_KQ, "knote_leave_flux: %p flux: %d", kn, kn->kn_influx);
KN_FLUX_OWNED(kn); KN_FLUX_OWNED(kn);
MPASS(kn->kn_influx > 0); MPASS(kn->kn_influx > 0);
kn->kn_influx--; kn->kn_influx--;
knote_flux_wakeup(kn);
return (kn->kn_influx == 0); return (kn->kn_influx == 0);
} }
@ -791,7 +808,7 @@ filt_proc(struct knote *kn, long hint)
struct proc *p; struct proc *p;
u_int event; u_int event;
CTR1(KTR_KQ, "KQUEUE: filt_proc called for kn %p", kn); CTR2(KTR_KQ, "filt_proc called for kn %p, hint %ld", kn, hint);
p = kn->kn_ptr.p_proc; p = kn->kn_ptr.p_proc;
if (p == NULL) /* already activated, from attach filter */ if (p == NULL) /* already activated, from attach filter */
@ -801,8 +818,10 @@ filt_proc(struct knote *kn, long hint)
event = (u_int)hint & NOTE_PCTRLMASK; event = (u_int)hint & NOTE_PCTRLMASK;
/* If the user is interested in this event, record it. */ /* If the user is interested in this event, record it. */
if (kn->kn_sfflags & event) if (kn->kn_sfflags & event) {
kn->kn_fflags |= event; kn->kn_fflags |= event;
CTR2(KTR_KQ, "filt_proc: set fflags or kn %p: %d", kn, kn->kn_fflags);
}
/* Process is gone, so flag the event as finished. */ /* Process is gone, so flag the event as finished. */
if (event == NOTE_EXIT) { if (event == NOTE_EXIT) {
@ -836,6 +855,8 @@ knote_fork(struct knlist *list, struct thread *td, int pid)
int error; int error;
int event; int event;
CTR2(KTR_KQ, "knote_fork for pid %d, tid", td->td_proc->p_pid, td->td_tid);
MPASS(list != NULL); MPASS(list != NULL);
KNL_ASSERT_LOCKED(list); KNL_ASSERT_LOCKED(list);
if (SLIST_EMPTY(&list->kl_list)) if (SLIST_EMPTY(&list->kl_list))
@ -843,6 +864,8 @@ knote_fork(struct knlist *list, struct thread *td, int pid)
memset(&kev, 0, sizeof(kev)); memset(&kev, 0, sizeof(kev));
SLIST_FOREACH(kn, &list->kl_list, kn_selnext) { SLIST_FOREACH(kn, &list->kl_list, kn_selnext) {
CTR2(KTR_KQ, "knote_fork processing knote %p for pid %d", kn, td->td_proc->p_pid);
kq = kn->kn_kq; kq = kn->kn_kq;
kevq = kn->kn_org_kevq; kevq = kn->kn_org_kevq;
@ -861,13 +884,14 @@ knote_fork(struct knlist *list, struct thread *td, int pid)
* The same as knote(), activate the event. * The same as knote(), activate the event.
*/ */
if ((kn->kn_sfflags & NOTE_TRACK) == 0) { if ((kn->kn_sfflags & NOTE_TRACK) == 0) {
CTR2(KTR_KQ, "knote_fork activating non-track knote %p for pid %d", kn, td->td_proc->p_pid);
event = kn->kn_fop->f_event(kn, NOTE_FORK); event = kn->kn_fop->f_event(kn, NOTE_FORK);
KQ_UNLOCK(kq); KQ_UNLOCK(kq);
if (event) if (event)
knote_activate(kn); knote_activate(kn);
KN_LEAVE_FLUX_WAKEUP(kn); knote_leave_flux_ul(kn);
continue; continue;
} }
@ -896,7 +920,7 @@ knote_fork(struct knlist *list, struct thread *td, int pid)
kev.fflags = kn->kn_sfflags; kev.fflags = kn->kn_sfflags;
kev.data = kn->kn_id; /* parent */ kev.data = kn->kn_id; /* parent */
kev.udata = kn->kn_kevent.udata;/* preserve udata */ kev.udata = kn->kn_kevent.udata;/* preserve udata */
error = kqueue_register(kq, kevq, &kev, NULL, M_NOWAIT); error = kqueue_register(kq, kevq, &kev, td, M_NOWAIT);
if (error) if (error)
kn->kn_fflags |= NOTE_TRACKERR; kn->kn_fflags |= NOTE_TRACKERR;
@ -910,14 +934,17 @@ knote_fork(struct knlist *list, struct thread *td, int pid)
kev.fflags = kn->kn_sfflags; kev.fflags = kn->kn_sfflags;
kev.data = kn->kn_id; /* parent */ kev.data = kn->kn_id; /* parent */
kev.udata = kn->kn_kevent.udata;/* preserve udata */ kev.udata = kn->kn_kevent.udata;/* preserve udata */
error = kqueue_register(kq, kevq, &kev, NULL, M_NOWAIT); error = kqueue_register(kq, kevq, &kev, td, M_NOWAIT);
if (error) if (error)
kn->kn_fflags |= NOTE_TRACKERR; kn->kn_fflags |= NOTE_TRACKERR;
if (kn->kn_fop->f_event(kn, NOTE_FORK))
if (kn->kn_fop->f_event(kn, NOTE_FORK)) {
CTR2(KTR_KQ, "knote_fork activating track knote %p for pid %d", kn, td->td_proc->p_pid);
knote_activate(kn); knote_activate(kn);
}
list->kl_lock(list->kl_lockarg); list->kl_lock(list->kl_lockarg);
KN_LEAVE_FLUX_WAKEUP(kn); knote_leave_flux_ul(kn);
} }
} }
@ -1002,7 +1029,7 @@ filt_timerexpire(void *knx)
knote_enter_flux_ul(kn); knote_enter_flux_ul(kn);
knote_activate(kn); knote_activate(kn);
KN_LEAVE_FLUX_WAKEUP(kn); knote_leave_flux_ul(kn);
if ((kn->kn_flags & EV_ONESHOT) != 0) if ((kn->kn_flags & EV_ONESHOT) != 0)
return; return;
@ -1570,8 +1597,8 @@ kqueue_kevent(struct kqueue *kq, struct kevq *kevq, struct thread *td, int nchan
if ((KQSCHED_GET_SCHED(kq) & KEVQ_LAT_FLAGS) && (kevq->kevq_state & KEVQ_ACTIVE)) if ((KQSCHED_GET_SCHED(kq) & KEVQ_LAT_FLAGS) && (kevq->kevq_state & KEVQ_ACTIVE))
{ {
/* prob don't need the lock here as these are only accessible by one thread */ /* kevq_last_nkev might be 0 if the thread is waken up by a signal */
if (kevq->kevq_last_nkev != 0) if (kevq->kevq_last_nkev != KEVQ_LAST_KERN && kevq->kevq_last_nkev != 0)
{ {
/* make sure we actually processed events last time */ /* make sure we actually processed events last time */
cur_ts = get_cyclecount(); cur_ts = get_cyclecount();
@ -1589,13 +1616,16 @@ kqueue_kevent(struct kqueue *kq, struct kevq *kevq, struct thread *td, int nchan
avg = cur_ts / kevq->kevq_last_nkev; avg = cur_ts / kevq->kevq_last_nkev;
CTR3(KTR_KQ, "kevent: td %d nkev %d kevent (avg) %ld ns", td->td_tid, kevq->kevq_last_nkev, avg); CTR3(KTR_KQ, "kevent: td %d nkev %d kevent (avg) %ld ns", td->td_tid, kevq->kevq_last_nkev, avg);
if (kevq->kevq_avg_lat != 0) { if (kevq->kevq_avg_lat != 0) {
kevq->kevq_avg_lat = calc_overtime_avg(kevq->kevq_avg_lat, avg, 80); kevq->kevq_avg_lat = calc_overtime_avg(kevq->kevq_avg_lat, avg, 95);
} else { } else {
kevq->kevq_avg_lat = avg; kevq->kevq_avg_lat = avg;
} }
CTR3(KTR_KQ, "kevent: td %d nkev %d kevent (new avg) %ld ns", td->td_tid, kevq->kevq_last_nkev, kevq->kevq_avg_lat); CTR3(KTR_KQ, "kevent: td %d nkev %d kevent (new avg) %ld ns", td->td_tid, kevq->kevq_last_nkev, kevq->kevq_avg_lat);
/* reset kevq->kevq_last_kev and nkev */
kevq->kevq_last_kev = KEVQ_LAST_KERN;
kevq->kevq_last_nkev = KEVQ_LAST_KERN;
//kqdom_update_lat(kevq->kevq_kqd, avg); //kqdom_update_lat(kevq->kevq_kqd, avg);
} }
} }
@ -1770,7 +1800,7 @@ kqueue_register(struct kqueue *kq, struct kevq *kevq, struct kevent *kev, struct
int error, filt; int error, filt;
int haskqglobal, filedesc_unlock; int haskqglobal, filedesc_unlock;
CTR5(KTR_KQ, "kqueue_register: kq %p, kevq %p, ident: %d, filter: %d, flags: 0x%X", kq, kevq, (int)kev->ident, kev->filter, kev->flags); CTR6(KTR_KQ, "kqueue_register: td %d kq %p, kevq %p, ident: %d, filter: %d, flags: 0x%X", td->td_tid, kq, kevq, (int)kev->ident, kev->filter, kev->flags);
if ((kev->flags & (EV_ENABLE | EV_DISABLE)) == (EV_ENABLE | EV_DISABLE)) if ((kev->flags & (EV_ENABLE | EV_DISABLE)) == (EV_ENABLE | EV_DISABLE))
return (EINVAL); return (EINVAL);
@ -2026,7 +2056,7 @@ kqueue_register(struct kqueue *kq, struct kevq *kevq, struct kevent *kev, struct
knote_activate(kn); knote_activate(kn);
kn->kn_status &= ~KN_SCAN; kn->kn_status &= ~KN_SCAN;
KN_LEAVE_FLUX_WAKEUP(kn); knote_leave_flux_ul(kn);
kn_list_unlock(knl); kn_list_unlock(knl);
@ -2091,6 +2121,9 @@ kevq_init(struct kevq *kevq) {
kevq->kn_marker->kn_kevq = kevq; kevq->kn_marker->kn_kevq = kevq;
kevq->kn_marker_rt->kn_kevq = kevq; kevq->kn_marker_rt->kn_kevq = kevq;
kevq->kevq_last_nkev = KEVQ_LAST_KERN;
kevq->kevq_last_kev = KEVQ_LAST_KERN;
kqueue_srandom(&kevq->kevq_rand_seed, (u_long)kevq); kqueue_srandom(&kevq->kevq_rand_seed, (u_long)kevq);
} }
@ -2469,7 +2502,9 @@ kevq_dump(struct sbuf *buf, struct kevq *kevq, int level)
"total_mismatches=\"%ld\" " "total_mismatches=\"%ld\" "
"total_worksteal=\"%ld\" " "total_worksteal=\"%ld\" "
"total_realtime=\"%ld\" " "total_realtime=\"%ld\" "
"total_sched=\"%ld\" />\n", "total_sched=\"%ld\" "
"last_kev=\"%ld\" "
"last_nkev=\"%d\" />\n",
level * DUMP_INDENT, ' ', kevq, kevq->kn_count, kevq->kn_rt_count, level * DUMP_INDENT, ' ', kevq, kevq->kn_count, kevq->kn_rt_count,
kevq->kevq_avg_rlimit, kevq->kevq_avg_rlimit,
kevq->kevq_tot_time, kevq->kevq_tot_time,
@ -2481,7 +2516,9 @@ kevq_dump(struct sbuf *buf, struct kevq *kevq, int level)
kevq->kevq_tot_kqd_mismatch, kevq->kevq_tot_kqd_mismatch,
kevq->kevq_tot_ws, kevq->kevq_tot_ws,
kevq->kevq_tot_realtime, kevq->kevq_tot_realtime,
kevq->kevq_tot_sched); kevq->kevq_tot_sched,
kevq->kevq_last_kev,
kevq->kevq_last_nkev);
} }
static void static void
@ -2568,25 +2605,25 @@ kqdom_update_parents(struct kqdom *kqd, int direction)
} }
} }
static void // static void
kqdom_update_lat(struct kqdom *leaf, uint64_t avg) // kqdom_update_lat(struct kqdom *leaf, uint64_t avg)
{ // {
/* We don't need this function for now */ // /* We don't need this function for now */
KASSERT(0, ("kqdom_update_lat called")); // KASSERT(0, ("kqdom_update_lat called"));
while(leaf != NULL) { // while(leaf != NULL) {
if (leaf->avg_lat != 0) { // if (leaf->avg_lat != 0) {
// bit rot race here? // // bit rot race here?
leaf->avg_lat = calc_overtime_avg(leaf->avg_lat, avg, 80); // leaf->avg_lat = calc_overtime_avg(leaf->avg_lat, avg, 80);
} else { // } else {
leaf->avg_lat = avg; // leaf->avg_lat = avg;
} // }
CTR2(KTR_KQ, "kqdom_update_lat: updated avg lat %ld us for kqdom %d", leaf->avg_lat, leaf->id); // CTR2(KTR_KQ, "kqdom_update_lat: updated avg lat %ld us for kqdom %d", leaf->avg_lat, leaf->id);
leaf = leaf->parent; // leaf = leaf->parent;
} // }
} // }
/* Mirror the cpu_group structure */ /* Mirror the cpu_group structure */
@ -2794,9 +2831,9 @@ kevq_worksteal(struct kevq *kevq)
KASSERT(tgt_count <= 8, ("too many kevq ws knotes")); KASSERT(tgt_count <= 8, ("too many kevq ws knotes"));
KVLST_RLOCK(kq); KVLST_RLOCK(kq);
other_kevq = kevq_vec_select_kevq(&kq->kevq_vlist, 1, kqueue_random(&kevq->kevq_rand_seed)); other_kevq = kvlst_sel_kevq(&kq->kevq_vlist, 1, kqueue_random(&kevq->kevq_rand_seed), NULL, kevq);
/* fast fail */ /* fast fail */
if (other_kevq != kevq && kevq_stealable(other_kevq)) { if (other_kevq != NULL && other_kevq != kevq && kevq_stealable(other_kevq)) {
if (KEVQ_TRYLOCK(other_kevq)) { if (KEVQ_TRYLOCK(other_kevq)) {
if (!kevq_stealable(other_kevq)) { if (!kevq_stealable(other_kevq)) {
KEVQ_UNLOCK(other_kevq); KEVQ_UNLOCK(other_kevq);
@ -2870,8 +2907,8 @@ kevq_worksteal(struct kevq *kevq)
kevq->kevq_tot_ws += ws_count; kevq->kevq_tot_ws += ws_count;
for (int i = 0; i < ws_count; i++) { for (int i = 0; i < ws_count; i++) {
knote_enqueue_head(ws_lst[i], kevq); knote_enqueue_head(ws_lst[i], kevq);
KN_LEAVE_FLUX_WAKEUP(ws_lst[i]); knote_leave_flux_ul(ws_lst[i]);
//CTR4(KTR_KQ, "kevq_worksteal: kevq %p stole kn %p, ident: %d from kevq %p", kevq, ws_lst[i], ws_lst[i]->kn_id, other_kevq); CTR4(KTR_KQ, "kevq_worksteal: kevq %p stole kn %p, ident: %d from kevq %p", kevq, ws_lst[i], ws_lst[i]->kn_id, other_kevq);
} }
} }
@ -2982,7 +3019,7 @@ kqueue_scan(struct kevq *kevq, int maxevents, struct kevent_copyops *k_ops,
if (kevq->kevq_avg_rlimit == 0) { if (kevq->kevq_avg_rlimit == 0) {
kevq->kevq_avg_rlimit = rtlimit; kevq->kevq_avg_rlimit = rtlimit;
} else { } else {
kevq->kevq_avg_rlimit = calc_overtime_avg(kevq->kevq_avg_rlimit, rtlimit, 80); kevq->kevq_avg_rlimit = calc_overtime_avg(kevq->kevq_avg_rlimit, rtlimit, 95);
} }
rsbt = 0; rsbt = 0;
if (tsp != NULL) { if (tsp != NULL) {
@ -3026,6 +3063,27 @@ kqueue_scan(struct kevq *kevq, int maxevents, struct kevent_copyops *k_ops,
rtmarker->kn_status = KN_MARKER; rtmarker->kn_status = KN_MARKER;
rtmarker->kn_flags = EV_REALTIME; rtmarker->kn_flags = EV_REALTIME;
} }
/* for detecting imbalances only */
if (kevq_total_knote(kevq) >= log_threshold) {
struct sbuf buf;
char * rbuf;
rbuf = malloc(1024 * 1024 * sizeof(char), M_KQUEUE, M_NOWAIT);
if (rbuf != NULL) {
sbuf_new(&buf, rbuf, 1024 * 1024, SBUF_FIXEDLEN | SBUF_INCLUDENUL);
kqueue_dump(kq, &buf);
sbuf_finish(&buf);
uprintf("%s\n", sbuf_data(&buf));
sbuf_delete(&buf);
free(rbuf, M_KQUEUE);
}
}
KEVQ_LOCK(kevq); KEVQ_LOCK(kevq);
@ -3125,7 +3183,8 @@ kqueue_scan(struct kevq *kevq, int maxevents, struct kevent_copyops *k_ops,
influx = 0; influx = 0;
while (count < maxevents) { /* XXX : performance: we should only traverse all knotes in SKQ mode. */
while (1) {
KEVQ_OWNED(kevq); KEVQ_OWNED(kevq);
/* fullfill the limit first */ /* fullfill the limit first */
@ -3145,10 +3204,11 @@ kqueue_scan(struct kevq *kevq, int maxevents, struct kevent_copyops *k_ops,
pri = 0; pri = 0;
kn = TAILQ_FIRST(kntq); kn = TAILQ_FIRST(kntq);
} }
KASSERT(kn != NULL, ("kqueue_scan dequeued NULL")); KASSERT(kn != NULL, ("kqueue_scan dequeued NULL"));
KN_FLUX_LOCK(kn); KN_FLUX_LOCK(kn);
if ((kn->kn_status == KN_MARKER && kn != marker && kn != rtmarker) || if ((kn->kn_status == KN_MARKER && kn != marker && kn != rtmarker) ||
kn_in_flux(kn)) { kn_in_flux(kn)) {
if (influx) { if (influx) {
@ -3167,17 +3227,11 @@ kqueue_scan(struct kevq *kevq, int maxevents, struct kevent_copyops *k_ops,
continue; continue;
} }
/* Now we have exclusive access to kn */ CTR5(KTR_KQ, "kqueue_scan: td %d on kevq %p dequeued knote %p, fflags: %d, curr %d", td->td_tid, kevq, kn, kn->kn_fflags, curr);
TAILQ_REMOVE(kntq, kn, kn_tqe); TAILQ_REMOVE(kntq, kn, kn_tqe);
CTR4(KTR_KQ, "kqueue_scan: td %d on kevq %p dequeued knote %p, curr %d", td->td_tid, kevq, kn, curr); /* check marker first (exit condition) */
if ((kn->kn_status & KN_DISABLED) == KN_DISABLED) {
kn->kn_status &= ~(KN_QUEUED | KN_PROCESSING | KN_WS);
*kncnt -= 1;
KN_FLUX_UNLOCK(kn);
continue;
}
if (kn == marker || kn == rtmarker) { if (kn == marker || kn == rtmarker) {
/* We are dequeuing our marker, wakeup threads waiting on it */ /* We are dequeuing our marker, wakeup threads waiting on it */
knote_flux_wakeup(kn); knote_flux_wakeup(kn);
@ -3196,6 +3250,13 @@ kqueue_scan(struct kevq *kevq, int maxevents, struct kevent_copyops *k_ops,
goto done; goto done;
} }
if ((kn->kn_status & KN_DISABLED) == KN_DISABLED) {
kn->kn_status &= ~(KN_QUEUED | KN_PROCESSING | KN_WS);
*kncnt -= 1;
KN_FLUX_UNLOCK(kn);
continue;
}
KASSERT(!kn_in_flux(kn), KASSERT(!kn_in_flux(kn),
("knote %p is unexpectedly in flux", kn)); ("knote %p is unexpectedly in flux", kn));
@ -3231,6 +3292,9 @@ kqueue_scan(struct kevq *kevq, int maxevents, struct kevent_copyops *k_ops,
knote_enter_flux(kn); knote_enter_flux(kn);
KN_FLUX_UNLOCK(kn); KN_FLUX_UNLOCK(kn);
KEVQ_UNLOCK(kevq); KEVQ_UNLOCK(kevq);
/* Now we dropped the kevq lock and kn is influx */
if ((kn->kn_status & KN_KQUEUE) == KN_KQUEUE) { if ((kn->kn_status & KN_KQUEUE) == KN_KQUEUE) {
/* TODO: we are waiting for another kqueue /* TODO: we are waiting for another kqueue
*/ */
@ -3239,40 +3303,66 @@ kqueue_scan(struct kevq *kevq, int maxevents, struct kevent_copyops *k_ops,
knl = kn_list_lock(kn); knl = kn_list_lock(kn);
fevent = kn->kn_fop->f_event(kn, 0); fevent = kn->kn_fop->f_event(kn, 0);
/* return ALL knotes */ /* resched knotes that were processed last time - only for SKQ mode */
if (kn->kn_status & KN_WS) { if (KQSCHED_GET_SCHED(kq) && (kn->kn_status & KN_PROCESSING) == KN_PROCESSING) {
KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal);
kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE | KN_SCAN | KN_PROCESSING | KN_WS); kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE | KN_SCAN | KN_PROCESSING | KN_WS);
if (fevent) { if (fevent) {
knote_activate(kn); knote_activate(kn);
} }
knote_leave_flux_ul(kn);
KEVQ_LOCK(kevq); KEVQ_LOCK(kevq);
*kncnt -= 1; *kncnt -= 1;
CTR3(KTR_KQ, "kqueue_scan: td %d, kevq %p resched processed knote %p", td->td_tid, kevq, kn);
knote_leave_flux_ul(kn);
kn_list_unlock(knl);
influx = 1;
continue;
}
if (count >= maxevents) {
KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal);
kn->kn_status &= ~KN_SCAN;
KEVQ_LOCK(kevq);
/* don't continue processing events when we don't have enough space for it, re-insert at the end */
TAILQ_INSERT_TAIL(kntq, kn, kn_tqe);
CTR3(KTR_KQ, "kqueue_scan: td %d, kevq %p ignored overflow knote %p", td->td_tid, kevq, kn);
knote_leave_flux_ul(kn);
kn_list_unlock(knl); kn_list_unlock(knl);
influx = 1; influx = 1;
CTR3(KTR_KQ, "kqueue_scan: td %d, kevq %p returned stolen knote %p", td->td_tid, kevq, kn);
continue; continue;
} }
if (fevent == 0) { if (fevent == 0) {
KEVQ_LOCK(kevq);
KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal);
kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE | KN_SCAN | KN_PROCESSING | KN_WS); kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE | KN_SCAN | KN_PROCESSING | KN_WS);
knote_leave_flux_ul(kn);
KEVQ_LOCK(kevq);
*kncnt -= 1; *kncnt -= 1;
CTR4(KTR_KQ, "kqueue_scan: kn %p, ident: %d not asserted anymore for kevq %p, td %d", kn, kn->kn_id, kevq, td->td_tid);
knote_leave_flux_ul(kn);
kn_list_unlock(knl); kn_list_unlock(knl);
influx = 1; influx = 1;
CTR4(KTR_KQ, "kqueue_scan: kn %p, ident: %d not valid anymore for kevq %p, td %d", kn, kn->kn_id, kevq, td->td_tid);
continue; continue;
} }
touch = (!kn->kn_fop->f_isfd && kn->kn_fop->f_touch != NULL); touch = (!kn->kn_fop->f_isfd && kn->kn_fop->f_touch != NULL);
if (touch) if (touch)
kn->kn_fop->f_touch(kn, kevp, EVENT_PROCESS); kn->kn_fop->f_touch(kn, kevp, EVENT_PROCESS);
else else {
CTR4(KTR_KQ, "kqueue_scan: td %d, kevq %p WRITTEN to KEVP knote %p, fflags: %d", td->td_tid, kevq, kn, kn->kn_fflags);
*kevp = kn->kn_kevent; *kevp = kn->kn_kevent;
}
KEVQ_LOCK(kevq); KEVQ_LOCK(kevq);
KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal);
if (kn->kn_flags & (EV_CLEAR | EV_DISPATCH)) { if (kn->kn_flags & (EV_CLEAR | EV_DISPATCH)) {
@ -3303,8 +3393,12 @@ kqueue_scan(struct kevq *kevq, int maxevents, struct kevent_copyops *k_ops,
knote_leave_flux_ul(kn); knote_leave_flux_ul(kn);
kn_list_unlock(knl); kn_list_unlock(knl);
influx = 1; influx = 1;
}
CTR4(KTR_KQ, "kqueue_scan: td %d, kevq %p finished scanning knote %p, fflags: %d", td->td_tid, kevq, kn, kn->kn_fflags);
}
KASSERT(count < maxevents, ("count >= maxevents"));
/* we are returning a copy to the user */ /* we are returning a copy to the user */
kevp++; kevp++;
nkev++; nkev++;
@ -3367,7 +3461,7 @@ kqueue_scan(struct kevq *kevq, int maxevents, struct kevent_copyops *k_ops,
if (kevq->kevq_avg_ev == 0) { if (kevq->kevq_avg_ev == 0) {
kevq->kevq_avg_ev = count; kevq->kevq_avg_ev = count;
} else { } else {
kevq->kevq_avg_ev = calc_overtime_avg(kevq->kevq_avg_ev, count, 80); kevq->kevq_avg_ev = calc_overtime_avg(kevq->kevq_avg_ev, count, 95);
} }
} }
@ -3377,9 +3471,34 @@ kqueue_scan(struct kevq *kevq, int maxevents, struct kevent_copyops *k_ops,
error = k_ops->k_copyout(k_ops->arg, keva, nkev); error = k_ops->k_copyout(k_ops->arg, keva, nkev);
} }
td->td_retval[0] = count; td->td_retval[0] = count;
CTR2(KTR_KQ, "Kqueue_scan RETURNED for tid %d, total %d events!\n", td->td_tid, count);
return (error); return (error);
} }
static void
kqueue_dump(struct kqueue *kq, struct sbuf *buf)
{
sbuf_printf(buf, "<kq_dump ptr=\"0x%p\" sched=\"0x%x\" sargs=\"0x%x\" feat=\"0x%x\" fargs=\"0x%x\" rtshare=\"%d\" tfreq=\"%d\" >\n", kq,
kq->kq_ssched, kq->kq_ssargs, kq->kq_sfeat,
kq->kq_sfargs,kq->kq_rtshare, kq->kq_tfreq);
sbuf_printf(buf, "\n%*c<kevq_dump>\n", 1 * DUMP_INDENT, ' ');
KVLST_RLOCK(kq);
for(int i = 0; i < veclist_size(&kq->kevq_vlist); i++) {
kevq_dump(buf, veclist_at(&kq->kevq_vlist, i), 2);
}
KVLST_RUNLOCK(kq);
sbuf_printf(buf, "%*c</kevq_dump>\n", 1 * DUMP_INDENT, ' ');
/* dump kqdom if used */
if (KQSCHED_GET_SCHED(kq) & KQDOM_FLAGS) {
sbuf_printf(buf, "\n%*c<kqdom_dump>\n", 1 * DUMP_INDENT, ' ');
kqdom_dump(buf, kq->kq_kqd, 2);
sbuf_printf(buf, "%*c</kqdom_dump>\n", 1 * DUMP_INDENT, ' ');
}
sbuf_printf(buf, "\n</kq_dump>\n");
}
/*ARGSUSED*/ /*ARGSUSED*/
static int static int
kqueue_ioctl(struct file *fp, u_long cmd, void *data, kqueue_ioctl(struct file *fp, u_long cmd, void *data,
@ -3483,33 +3602,16 @@ kqueue_ioctl(struct file *fp, u_long cmd, void *data,
KQ_UNLOCK(kq); KQ_UNLOCK(kq);
break; break;
case FKQMPRNT: case FKQMPRNT:
rbuf = malloc(1024 * 1024 * sizeof(char), M_KQUEUE, M_WAITOK); rbuf = malloc(1024 * 1024 * sizeof(char), M_KQUEUE, M_WAITOK);
sbuf_new(&buf, rbuf, 1024 * 1024, SBUF_FIXEDLEN | SBUF_INCLUDENUL); sbuf_new(&buf, rbuf, 1024 * 1024, SBUF_FIXEDLEN | SBUF_INCLUDENUL);
if (kq->kq_flags & KQ_FLAG_MULTI) { if (kq->kq_flags & KQ_FLAG_MULTI) {
sbuf_printf(&buf, "<kq_dump ptr=\"0x%p\" sched=\"0x%x\" sargs=\"0x%x\" feat=\"0x%x\" fargs=\"0x%x\" rtshare=\"%d\" tfreq=\"%d\" >\n", kq, kqueue_dump(kq, &buf);
kq->kq_ssched, kq->kq_ssargs, kq->kq_sfeat,
kq->kq_sfargs,kq->kq_rtshare, kq->kq_tfreq);
sbuf_printf(&buf, "\n%*c<kevq_dump>\n", 1 * DUMP_INDENT, ' ');
KVLST_RLOCK(kq);
for(int i = 0; i < veclist_size(&kq->kevq_vlist); i++) {
kevq_dump(&buf, veclist_at(&kq->kevq_vlist, i), 2);
}
KVLST_RUNLOCK(kq);
sbuf_printf(&buf, "%*c</kevq_dump>\n", 1 * DUMP_INDENT, ' ');
/* dump kqdom if used */
if (KQSCHED_GET_SCHED(kq) & KQDOM_FLAGS) {
sbuf_printf(&buf, "\n%*c<kqdom_dump>\n", 1 * DUMP_INDENT, ' ');
kqdom_dump(&buf, kq->kq_kqd, 2);
sbuf_printf(&buf, "%*c</kqdom_dump>\n", 1 * DUMP_INDENT, ' ');
}
sbuf_printf(&buf, "\n</kq_dump>\n");
} else { } else {
error = (EINVAL); error = (EINVAL);
break;
} }
sbuf_finish(&buf); sbuf_finish(&buf);
@ -3699,7 +3801,7 @@ kevq_drain(struct kevq *kevq, struct thread *td)
KEVQ_LOCK(kevq); KEVQ_LOCK(kevq);
} }
KN_LEAVE_FLUX_WAKEUP(kn); knote_leave_flux_ul(kn);
} }
KASSERT(kevq_total_knote(kevq) == 0, ("some knotes are left")); KASSERT(kevq_total_knote(kevq) == 0, ("some knotes are left"));
@ -3989,7 +4091,7 @@ knote(struct knlist *list, long hint, int lockflags)
if (kn_active) if (kn_active)
knote_activate(kn); knote_activate(kn);
KN_LEAVE_FLUX_WAKEUP(kn); knote_leave_flux_ul(kn);
} }
} }
if ((lockflags & KNF_LISTLOCKED) == 0) if ((lockflags & KNF_LISTLOCKED) == 0)
@ -4010,9 +4112,9 @@ knote_flux_wakeup(struct knote *kn)
{ {
KN_FLUX_OWNED(kn); KN_FLUX_OWNED(kn);
if (kn->kn_fluxwait) { if (kn->kn_fluxwait) {
CTR1(KTR_KQ, "waking up kn %p", kn); CTR1(KTR_KQ, "knote_flux_wakeup: waking up %p", kn);
kn->kn_fluxwait = 0; kn->kn_fluxwait = 0;
wakeup(kn); wakeup(kn);
} }
} }
@ -4354,7 +4456,9 @@ knote_fdclose(struct thread *td, int fd)
kn->kn_fluxwait = 1; kn->kn_fluxwait = 1;
KQ_UNLOCK(kq); KQ_UNLOCK(kq);
CTR2(KTR_KQ, "knote_fdclose: thread %d waiting on knote %p", td->td_tid, kn);
msleep(kn, &kn->kn_fluxlock, PSOCK | PDROP, "kqflxwt4", 0); msleep(kn, &kn->kn_fluxlock, PSOCK | PDROP, "kqflxwt4", 0);
CTR2(KTR_KQ, "knote_fdclose: thread %d woke up from knote %p", td->td_tid, kn);
KQ_LOCK(kq); KQ_LOCK(kq);
goto again; goto again;
@ -4409,6 +4513,7 @@ knote_drop_detached(struct knote *kn, struct thread *td)
struct kevq *kevq; struct kevq *kevq;
struct klist *list; struct klist *list;
CTR2(KTR_KQ, "knote_drop_detached: td %d dropping knote %p", td->td_tid, kn);
kq = kn->kn_kq; kq = kn->kn_kq;
KASSERT((kn->kn_status & KN_DETACHED) != 0, KASSERT((kn->kn_status & KN_DETACHED) != 0,
@ -4435,7 +4540,7 @@ knote_drop_detached(struct knote *kn, struct thread *td)
KEVQ_UNLOCK(kevq); KEVQ_UNLOCK(kevq);
} }
KN_LEAVE_FLUX_WAKEUP(kn); knote_leave_flux_ul(kn);
KQ_UNLOCK(kq); KQ_UNLOCK(kq);
if (kn->kn_fop->f_isfd) { if (kn->kn_fop->f_isfd) {
@ -4448,35 +4553,54 @@ knote_drop_detached(struct knote *kn, struct thread *td)
} }
static struct kevq * static struct kevq *
kevq_vec_select_kevq(struct veclist *lst, int num_rand, u_long rand) kvlst_sel_dist_kevq(struct veclist *lst, u_long rand, struct kevq *kevq_to_skip)
{ {
struct kevq *ret = NULL;
int idx;
int sz; int sz;
struct kevq *cur_kevq = NULL, *next_kevq; sz = veclist_size(lst);
if (kevq_to_skip != NULL) {
sz--;
}
if (sz > 0) {
idx = rand % sz;
ret = veclist_at(lst, idx);
if (ret == kevq_to_skip) {
KASSERT(idx + 1 < veclist_size(lst), ("kvlist_sel_dist_kevq overflow"));
ret = veclist_at(lst, idx + 1);
}
}
return ret;
}
/*
* kevq_cmp_f: kevq1, kevq2. if kevq1 is more favorable than kevq2, return > 0
*/
static struct kevq *
kvlst_sel_kevq(struct veclist *lst, int num_rand, u_long rand, long (*kevq_cmp_f)(struct kevq*, struct kevq*), struct kevq *kevq_to_skip)
{
struct kevq *cur_kevq = NULL;
struct kevq *next_kevq;
/* XXX: hack */ /* XXX: hack */
KASSERT(num_rand <= 2, ("too large num_rand")); KASSERT(num_rand <= 2, ("too large num_rand"));
//CTR1(KTR_KQ, "kevq_vec_select_kevq: num - %d", num_rand); if (veclist_size(lst) > 0) {
sz = veclist_size(lst);
if (sz > 0) {
for (int i = 0; i < num_rand; i++) { for (int i = 0; i < num_rand; i++) {
next_kevq = veclist_at(lst, rand % sz); next_kevq = kvlst_sel_dist_kevq(lst, rand, kevq_to_skip);
//CTR5(KTR_KQ, "kevq_vec_select_kevq: candidate kevq %p [%d] avg lat %ld #kn %d Tot: %ld", next_kevq, i, next_kevq->kevq_avg_lat, next_kevq->kn_count, next_kevq->kevq_avg_lat * next_kevq->kn_count); if (cur_kevq == NULL || (next_kevq != NULL && kevq_cmp_f != NULL && kevq_cmp_f(next_kevq, cur_kevq) > 0)) {
if (cur_kevq == NULL || (next_kevq != NULL && kevq_lat_cmp(cur_kevq, next_kevq) > 0)) {
cur_kevq = next_kevq; cur_kevq = next_kevq;
kevq_to_skip = cur_kevq;
} }
/* XXX: hack, 256 queues max */ /* XXX: hack, 256 queues max */
rand = rand >> 8; rand = rand >> 8;
} }
} }
//CTR2(KTR_KQ, "kevq_vec_select_kevq: selected kevq %p Tot: %ld", cur_kevq, cur_kevq == NULL ? 0 : cur_kevq->kevq_avg_lat * cur_kevq->kn_count);
return cur_kevq; return cur_kevq;
} }
@ -4590,14 +4714,16 @@ knote_next_kevq(struct knote *kn)
KASSERT(kqdom_is_leaf(kqd), ("found kqdom not leaf")); KASSERT(kqdom_is_leaf(kqd), ("found kqdom not leaf"));
KQD_RLOCK(kqd); KQD_RLOCK(kqd);
next_kevq = kevq_vec_select_kevq(&kqd->kqd_kevqs, 1, kqueue_random(&kn->kn_rand_seed)); /* pick a random kevq */
next_kevq = kvlst_sel_kevq(&kqd->kqd_kevqs, 1, kqueue_random(&kn->kn_rand_seed), NULL, NULL);
if (sargs > 0) { if (sargs > 0) {
KVLST_RLOCK(kq); KVLST_RLOCK(kq);
other_kevq = kevq_vec_select_kevq(&kq->kevq_vlist, sargs, kqueue_random(&kn->kn_rand_seed)); other_kevq = kvlst_sel_kevq(&kq->kevq_vlist, sargs, kqueue_random(&kn->kn_rand_seed), kevq_lat_cmp, next_kevq);
if (next_kevq == NULL || (other_kevq != NULL && kevq_lat_wcmp(next_kevq, other_kevq, 90) > 0)) { if (next_kevq == NULL || (other_kevq != NULL && kevq_lat_wcmp(other_kevq, next_kevq) > 0)) {
next_kevq = other_kevq; next_kevq = other_kevq;
CTR1(KTR_KQ, "knote_next_kevq: [QUEUE%d] new selected kevq: %p", next_kevq);
} }
} }
@ -4619,7 +4745,7 @@ knote_next_kevq(struct knote *kn)
case KQ_SCHED_BEST: case KQ_SCHED_BEST:
KVLST_RLOCK(kq); KVLST_RLOCK(kq);
next_kevq = kevq_vec_select_kevq(&kq->kevq_vlist, sargs, kqueue_random(&kn->kn_rand_seed)); next_kevq = kvlst_sel_kevq(&kq->kevq_vlist, sargs, kqueue_random(&kn->kn_rand_seed), kevq_lat_cmp, NULL);
next_kevq = kevq_lock_check_avail(next_kevq); next_kevq = kevq_lock_check_avail(next_kevq);
KVLST_RUNLOCK(kq); KVLST_RUNLOCK(kq);

View File

@ -56,6 +56,10 @@ struct kevq {
uint64_t kevq_avg_ev; uint64_t kevq_avg_ev;
uint64_t kevq_tot_ev; uint64_t kevq_tot_ev;
uint64_t kevq_tot_time; uint64_t kevq_tot_time;
/* the following two are only set when the thread is procssing in userspace
* in kernel they are set to special value KEVQ_LAST_KERN
*/
#define KEVQ_LAST_KERN (0)
uint64_t kevq_last_kev; uint64_t kevq_last_kev;
uint32_t kevq_last_nkev; uint32_t kevq_last_nkev;
#define KEVQ_SLEEP 0x01 #define KEVQ_SLEEP 0x01

View File

@ -16,7 +16,9 @@ SRCS.kqtest= \
proc.c \ proc.c \
signal.c \ signal.c \
read_m.c \ read_m.c \
user.c user.c \
close_m.c
WARNS?= 2 WARNS?= 2
LDADD+= -lthr LDADD+= -lthr
.include <bsd.test.mk> .include <bsd.test.mk>

View File

@ -0,0 +1,151 @@
#include "common.h"
#include "common_m.h"
#include <sys/event.h>
#include <sys/ioctl.h>
#include <semaphore.h>
#include <pthread.h>
#include <sys/types.h>
#include <sys/sysctl.h>
#include <pthread_np.h>
#include <unistd.h>
#include <stdlib.h>
#define SOCK_CLOSE_PROB (30)
#define CLOSER_RAND_DELAY (17)
#define QUEUE_RAND_DELAY (7)
#define CLOSER_TOT_SOCK (8)
#define PACKET_CNT (CLOSER_TOT_SOCK * 1024)
#define THREAD_CNT (8)
static int close_socks[CLOSER_TOT_SOCK][2];
static volatile int close_stop;
static int g_kqfd;
static void *
socket_closer(void* args)
{
struct kevent kev;
while(!close_stop) {
int ran = rand() % CLOSER_TOT_SOCK;
printf("closed idx %d...\n", ran);
close(close_socks[ran][0]);
close(close_socks[ran][1]);
/* events are supposed to clean up themselves after fd invalidates */
if (socketpair(AF_UNIX, SOCK_STREAM, 0, &close_socks[ran][0]) < 0) {
err(1, "kevent_socket");
}
EV_SET(&kev, close_socks[ran][0], EVFILT_READ, EV_ADD, 0, 0, &close_socks[ran][0]);
if (kevent(g_kqfd, &kev, 1, NULL, 0, NULL) == -1) {
err(1, "kevent_brutal_add");
}
usleep(rand() % CLOSER_RAND_DELAY);
}
return NULL;
}
static void *
socket_worker(void* args)
{
char dat;
struct kevent *ret;
while (1) {
ret = kevent_get(g_kqfd);
printf("processing packet...\n");
dat = socket_pop_igerr(ret->ident);
if (dat == 'e')
break;
free(ret);
}
return NULL;
}
void
test_socket_close(char* name)
{
pthread_t workers[THREAD_CNT];
pthread_t closer;
char id[256];
struct kevent kev;
const char *test_id = "[Multi]kevent(close) - ";
strcpy(id, test_id);
strcat(id, name);
test_begin(id);
close_stop = 0;
srand(time(NULL));
int flags = KQSCHED_MAKE(KQ_SCHED_CPU, 2, 0, 0);
g_kqfd = kqueue();
int error = ioctl(g_kqfd, FKQMULTI, &flags);
if (error == -1) {
err(1, "ioctl");
}
for (int i = 0; i < CLOSER_TOT_SOCK; i++) {
if (socketpair(AF_UNIX, SOCK_STREAM, 0, &close_socks[i][0]) < 0) {
err(1, "kevent_socket");
}
EV_SET(&kev, close_socks[i][0], EVFILT_READ, EV_ADD, 0, 0, &close_socks[i][0]);
if (kevent(g_kqfd, &kev, 1, NULL, 0, NULL) == -1) {
err(1, "kevent_brutal_add");
}
}
for (int i = 0; i < THREAD_CNT; i++) {
pthread_create(&workers[i], NULL, socket_worker, NULL);
}
pthread_create(&closer, NULL, socket_closer, NULL);
for(int i = 0; i < PACKET_CNT; i++) {
socket_push_igerr(close_socks[rand() % CLOSER_TOT_SOCK][1], '.');
usleep(rand() % QUEUE_RAND_DELAY);
}
printf("Stopping closer...\n");
close_stop = 1;
pthread_join(closer, NULL);
printf("Closer stopped!\n");
for (int i = 0; i < THREAD_CNT; i++) {
socket_push(close_socks[rand() % CLOSER_TOT_SOCK][1], 'e');
}
for (int i = 0; i < THREAD_CNT; i++) {
pthread_join(workers[i], NULL);
}
for (int i = 0; i < CLOSER_TOT_SOCK; i++) {
close(close_socks[i][0]);
close(close_socks[i][1]);
}
printf("Threads stopped!\n");
success();
}

View File

@ -0,0 +1,39 @@
#ifndef _COMMON_M_H
#define _COMMON_M_H
#include "common.h"
static inline char
socket_pop(int sockfd)
{
char buf;
if (read(sockfd, &buf, 1) < 1)
err(1, "read(2)");
return buf;
}
static inline char
socket_pop_igerr(int sockfd)
{
char buf = 0;
read(sockfd, &buf, 1);
return buf;
}
static inline void
socket_push(int sockfd, char ch)
{
if (write(sockfd, &ch, 1) < 1) {
err(1, "write(2)");
}
}
static inline void
socket_push_igerr(int sockfd, char ch)
{
write(sockfd, &ch, 1);
}
#endif

View File

@ -17,13 +17,17 @@
*/ */
#include "common.h" #include "common.h"
#include "common_m.h"
#include <sys/event.h>
#include <sys/ioctl.h> #include <sys/ioctl.h>
#include <semaphore.h> #include <semaphore.h>
#include <pthread.h> #include <pthread.h>
#include <sys/types.h> #include <sys/types.h>
#include <sys/sysctl.h> #include <sys/sysctl.h>
#include <pthread_np.h> #include <pthread_np.h>
#include <unistd.h>
#include <stdlib.h>
//#define TEST_DEBUG //#define TEST_DEBUG
@ -42,7 +46,7 @@ struct thread_info {
* Read test * Read test
*/ */
#define THREAD_CNT (16) #define THREAD_CNT (8)
#define PACKET_CNT (1600) #define PACKET_CNT (1600)
static int g_kqfd; static int g_kqfd;
@ -67,35 +71,6 @@ dump_gkq()
} }
} }
static char
socket_pop(int sockfd)
{
char buf;
/* Drain the read buffer, then make sure there are no more events. */
#ifdef TEST_DEBUG
printf("READ_M: popping the read buffer of sock %d\n", sockfd);
#endif
if (read(sockfd, &buf, 1) < 1)
err(1, "read(2)");
return buf;
}
static void
socket_push(int sockfd, char ch)
{
#ifdef TEST_DEBUG
printf("READ_M: pushing to socket %d\n", sockfd);
#endif
if (write(sockfd, &ch, 1) < 1) {
#ifdef TEST_DEBUG
printf("READ_M: write failed with %d\n", errno);
#endif
err(1, "write(2)");
}
}
/*************************** /***************************
* Read test * Read test
***************************/ ***************************/
@ -634,24 +609,22 @@ test_socket_ws_timeout()
success(); success();
} }
/*************************** /***************************
* Brutal test * Brutal test
***************************/ ***************************/
#define THREAD_BRUTE_CNT (8) #define THREAD_BRUTE_CNT (8)
#define SOCK_BRUTE_CNT (512) #define SOCK_BRUTE_CNT (256)
#define PACKET_BRUTE_CNT (100 * (SOCK_BRUTE_CNT)) #define PACKET_BRUTE_CNT (256 * (SOCK_BRUTE_CNT))
#define THREAD_EXIT_PROB (50) #define THREAD_EXIT_PROB (50)
#define BRUTE_REALTIME_PROB (50) #define BRUTE_REALTIME_PROB (50)
#define BRUTE_MAX_FREQ (10000) #define BRUTE_MAX_FREQ (10000)
#define BRUTE_MIN_FREQ (1) #define BRUTE_MIN_FREQ (1)
#define RAND_SLEEP (29) #define RAND_SLEEP (13)
#define RAND_SEND_SLEEP (7) #define RAND_SEND_SLEEP (7)
static int brute_sockfd[SOCK_BRUTE_CNT][2];
int brute_sockfd[SOCK_BRUTE_CNT][2]; static struct thread_info brute_threadinfo[THREAD_BRUTE_CNT];
struct thread_info brute_threadinfo[THREAD_BRUTE_CNT];
static void* static void*
test_socket_brutal_worker(void* args) test_socket_brutal_worker(void* args)
@ -691,11 +664,12 @@ test_socket_brutal_worker(void* args)
#endif #endif
dat = socket_pop(ret->ident); dat = socket_pop(ret->ident);
free(ret);
if (dat == 'e') if (dat == 'e')
break; break;
free(ret);
info->evcnt++; info->evcnt++;
usleep(rand() % RAND_SLEEP); usleep(rand() % RAND_SLEEP);
@ -723,7 +697,6 @@ test_socket_brutal(char* name)
srand(time(NULL)); srand(time(NULL));
for (int i = 0; i < SOCK_BRUTE_CNT; i++) { for (int i = 0; i < SOCK_BRUTE_CNT; i++) {
/* Create a connected pair of full-duplex sockets for testing socket events */ /* Create a connected pair of full-duplex sockets for testing socket events */
@ -740,8 +713,6 @@ test_socket_brutal(char* name)
} }
} }
srand(time(NULL));
#ifdef TEST_DEBUG #ifdef TEST_DEBUG
printf("READ_M: creating %d threads...\n", THREAD_BRUTE_CNT); printf("READ_M: creating %d threads...\n", THREAD_BRUTE_CNT);
#endif #endif
@ -910,12 +881,18 @@ test_socket_realtime()
success(); success();
} }
extern void
test_socket_close(char* name);
void void
test_evfilt_read_m() test_evfilt_read_m()
{ {
int flags = 0; int flags = 0;
int error; int error;
/* close test */
//test_socket_close("default");
/* Default rand */ /* Default rand */
flags = 0; flags = 0;
g_kqfd = kqueue(); g_kqfd = kqueue();