Fixed random race and hardcode CPU HZ
This commit is contained in:
parent
7f31542099
commit
28ef953b8c
@ -170,7 +170,7 @@ static int kevq_acquire(struct kevq *kevq, int locked);
|
||||
static void kevq_worksteal(struct kevq *kevq);
|
||||
static void kevq_drain(struct kevq *kevq, struct thread *td);
|
||||
static void kevq_activate(struct kevq *kevq, struct thread *td);
|
||||
static struct kevq * kevq_vec_select_kevq(struct veclist *lst, int num_rand);
|
||||
static struct kevq * kevq_vec_select_kevq(struct veclist *lst, int num_rand, u_long rand);
|
||||
static struct knote * kevq_peek_knote(struct kevq *kevq);
|
||||
static inline void kevq_delete_knote(struct kevq *kevq, struct knote *kn);
|
||||
static void kevq_insert_knote(struct kevq *kevq, struct knote *kn);
|
||||
@ -250,7 +250,7 @@ static void kqdom_update_parents(struct kqdom *leaf, int direction);
|
||||
static void kqdom_insert(struct kqdom *kqd, struct kevq *kevq);
|
||||
static void kqdom_remove(struct kqdom *kqd, struct kevq *kevq);
|
||||
static void kqdom_destroy(struct kqdom *root);
|
||||
static struct kevq * kqdom_random_kevq_locked(struct kqdom *kqd);
|
||||
//static struct kevq * kqdom_random_kevq_locked(struct kqdom *kqd);
|
||||
static void kqdom_build_internal(struct kqdom *kqd_cur, struct cpu_group *cg_cur, int *kqd_id);
|
||||
static struct kqdom * kqdom_build(void);
|
||||
static struct kqdom * kqdom_find(struct kqdom *root, int cpuid);
|
||||
@ -418,10 +418,52 @@ SYSCTL_UINT(_kern, OID_AUTO, kq_calloutmax, CTLFLAG_RW,
|
||||
#define KQTUNE_PARSE_ARGS(sf) (((sf) >> 16) & 0xFFFF)
|
||||
#define KQTUNE_PARSE_OBJ(sf) ((sf) & 0xFFFF)
|
||||
|
||||
#define NSHUFF 50
|
||||
|
||||
|
||||
/*
|
||||
* Pseudo-random number generator for perturbing the profiling clock,
|
||||
* and whatever else we might use it for. The result is uniform on
|
||||
* [0, 2^31 - 1].
|
||||
*/
|
||||
static u_long
|
||||
kqueue_random(u_long* seed)
|
||||
{
|
||||
long x, hi, lo, t;
|
||||
|
||||
/*
|
||||
* Compute x[n + 1] = (7^5 * x[n]) mod (2^31 - 1).
|
||||
* From "Random number generators: good ones are hard to find",
|
||||
* Park and Miller, Communications of the ACM, vol. 31, no. 10,
|
||||
* October 1988, p. 1195.
|
||||
*/
|
||||
/* Can't be initialized with 0, so use another value. */
|
||||
if ((x = *seed) == 0)
|
||||
x = 123459876;
|
||||
hi = x / 127773;
|
||||
lo = x % 127773;
|
||||
t = 16807 * lo - 2836 * hi;
|
||||
if (t < 0)
|
||||
t += 0x7fffffff;
|
||||
*seed = t;
|
||||
//CTR1(KTR_KQ, "kqueue_random: generated %ld", t);
|
||||
return (t);
|
||||
}
|
||||
|
||||
static void
|
||||
kqueue_srandom(u_long *field, u_long seed)
|
||||
{
|
||||
int i;
|
||||
|
||||
*field = seed;
|
||||
for (i = 0; i < NSHUFF; i++)
|
||||
kqueue_random(field);
|
||||
}
|
||||
|
||||
static inline long
|
||||
kevq_exp_lat(struct kevq *kevq)
|
||||
{
|
||||
return kevq->kevq_avg_lat * (kevq_total_knote(kevq) + 1) + kevq->kevq_last_kev - get_cyclecount();
|
||||
return kevq->kevq_avg_lat * (kevq_total_knote(kevq) + 1) + kevq->kevq_last_kev;
|
||||
}
|
||||
|
||||
static inline long
|
||||
@ -2040,13 +2082,16 @@ kevq_init(struct kevq *kevq) {
|
||||
TAILQ_INIT(&kevq->kn_head);
|
||||
TAILQ_INIT(&kevq->kn_rt_head);
|
||||
|
||||
kevq->kn_marker.kn_status = KN_MARKER;
|
||||
kevq->kn_marker_rt.kn_status = KN_MARKER;
|
||||
kevq->kn_marker_rt.kn_flags = EV_REALTIME;
|
||||
kevq->kn_marker.kn_kevq = kevq;
|
||||
kevq->kn_marker_rt.kn_kevq = kevq;
|
||||
mtx_init(&kevq->kn_marker.kn_fluxlock, "kn_fluxlock", NULL, MTX_DEF | MTX_DUPOK);
|
||||
mtx_init(&kevq->kn_marker_rt.kn_fluxlock, "kn_fluxlock", NULL, MTX_DEF | MTX_DUPOK);
|
||||
kevq->kn_marker = knote_alloc(M_WAITOK);
|
||||
kevq->kn_marker_rt = knote_alloc(M_WAITOK);
|
||||
kevq->kn_marker->kn_status = KN_MARKER;
|
||||
kevq->kn_marker->kn_status = KN_MARKER;
|
||||
kevq->kn_marker_rt->kn_status = KN_MARKER;
|
||||
kevq->kn_marker_rt->kn_flags = EV_REALTIME;
|
||||
kevq->kn_marker->kn_kevq = kevq;
|
||||
kevq->kn_marker_rt->kn_kevq = kevq;
|
||||
|
||||
kqueue_srandom(&kevq->kevq_rand_seed, (u_long)kevq);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -2414,6 +2459,7 @@ kevq_dump(struct sbuf *buf, struct kevq *kevq, int level)
|
||||
sbuf_printf(buf, "%*c<kevq ptr=\"%p\" "
|
||||
"knotes=\"%d\" "
|
||||
"rt_knotes=\"%d\" "
|
||||
"avg_rtlimit=\"%ld\" "
|
||||
"total_time=\"%ld\" "
|
||||
"total_syscall=\"%ld\" "
|
||||
"total_events=\"%ld\" "
|
||||
@ -2425,6 +2471,7 @@ kevq_dump(struct sbuf *buf, struct kevq *kevq, int level)
|
||||
"total_realtime=\"%ld\" "
|
||||
"total_sched=\"%ld\" />\n",
|
||||
level * DUMP_INDENT, ' ', kevq, kevq->kn_count, kevq->kn_rt_count,
|
||||
kevq->kevq_avg_rlimit,
|
||||
kevq->kevq_tot_time,
|
||||
kevq->kevq_tot_syscall,
|
||||
kevq->kevq_tot_ev,
|
||||
@ -2747,7 +2794,7 @@ kevq_worksteal(struct kevq *kevq)
|
||||
KASSERT(tgt_count <= 8, ("too many kevq ws knotes"));
|
||||
|
||||
KVLST_RLOCK(kq);
|
||||
other_kevq = kevq_vec_select_kevq(&kq->kevq_vlist, 1);
|
||||
other_kevq = kevq_vec_select_kevq(&kq->kevq_vlist, 1, kqueue_random(&kevq->kevq_rand_seed));
|
||||
/* fast fail */
|
||||
if (other_kevq != kevq && kevq_stealable(other_kevq)) {
|
||||
if (KEVQ_TRYLOCK(other_kevq)) {
|
||||
@ -2908,7 +2955,7 @@ kqueue_scan(struct kevq *kevq, int maxevents, struct kevent_copyops *k_ops,
|
||||
* = (1 / kq->kq_tfreq) / (kevq->kevq_avg_lat / hz)
|
||||
* = (hz / (kevq->kevq_avg_lat * kq->kq_tfreq))
|
||||
*/
|
||||
evlimit = hz / (kevq->kevq_avg_lat * kq->kq_tfreq);
|
||||
evlimit = (2100 * 1000 * 1000) / (kevq->kevq_avg_lat * kq->kq_tfreq);
|
||||
|
||||
if (evlimit == 0) {
|
||||
evlimit = 1;
|
||||
@ -2932,7 +2979,11 @@ kqueue_scan(struct kevq *kevq, int maxevents, struct kevent_copyops *k_ops,
|
||||
*/
|
||||
rtlimit = (maxevents * kq->kq_rtshare + 99) / 100;
|
||||
KASSERT(rtlimit > 0, ("the math above is fundamentally broken"));
|
||||
|
||||
if (kevq->kevq_avg_rlimit == 0) {
|
||||
kevq->kevq_avg_rlimit = rtlimit;
|
||||
} else {
|
||||
kevq->kevq_avg_rlimit = calc_overtime_avg(kevq->kevq_avg_rlimit, rtlimit, 80);
|
||||
}
|
||||
rsbt = 0;
|
||||
if (tsp != NULL) {
|
||||
if (tsp->tv_sec < 0 || tsp->tv_nsec < 0 ||
|
||||
@ -2966,8 +3017,8 @@ kqueue_scan(struct kevq *kevq, int maxevents, struct kevent_copyops *k_ops,
|
||||
}
|
||||
|
||||
if (kq->kq_flags & KQ_FLAG_MULTI) {
|
||||
marker = &kevq->kn_marker;
|
||||
rtmarker = &kevq->kn_marker_rt;
|
||||
marker = kevq->kn_marker;
|
||||
rtmarker = kevq->kn_marker_rt;
|
||||
} else {
|
||||
marker = knote_alloc(M_WAITOK);
|
||||
rtmarker = knote_alloc(M_WAITOK);
|
||||
@ -3188,7 +3239,7 @@ kqueue_scan(struct kevq *kevq, int maxevents, struct kevent_copyops *k_ops,
|
||||
knl = kn_list_lock(kn);
|
||||
fevent = kn->kn_fop->f_event(kn, 0);
|
||||
|
||||
/* return stolen knotes */
|
||||
/* return ALL knotes */
|
||||
if (kn->kn_status & KN_WS) {
|
||||
KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal);
|
||||
kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE | KN_SCAN | KN_PROCESSING | KN_WS);
|
||||
@ -3533,6 +3584,8 @@ kevq_destroy(struct kevq *kevq)
|
||||
{
|
||||
CTR1(KTR_KQ, "kevq_destroy for %p", kevq);
|
||||
mtx_destroy(&kevq->lock);
|
||||
knote_free(kevq->kn_marker);
|
||||
knote_free(kevq->kn_marker_rt);
|
||||
free(kevq, M_KQUEUE);
|
||||
}
|
||||
|
||||
@ -4395,22 +4448,19 @@ knote_drop_detached(struct knote *kn, struct thread *td)
|
||||
}
|
||||
|
||||
static struct kevq *
|
||||
kevq_vec_select_kevq(struct veclist *lst, int num_rand)
|
||||
kevq_vec_select_kevq(struct veclist *lst, int num_rand, u_long rand)
|
||||
{
|
||||
int sz;
|
||||
struct kevq *cur_kevq = NULL, *next_kevq;
|
||||
u_long rand;
|
||||
|
||||
/* XXX: hack */
|
||||
KASSERT(num_rand < 8, ("too much num_rand"));
|
||||
KASSERT(num_rand <= 2, ("too large num_rand"));
|
||||
|
||||
//CTR1(KTR_KQ, "kevq_vec_select_kevq: num - %d", num_rand);
|
||||
|
||||
sz = veclist_size(lst);
|
||||
|
||||
if (sz > 0) {
|
||||
rand = random();
|
||||
|
||||
for (int i = 0; i < num_rand; i++) {
|
||||
next_kevq = veclist_at(lst, rand % sz);
|
||||
|
||||
@ -4420,7 +4470,7 @@ kevq_vec_select_kevq(struct veclist *lst, int num_rand)
|
||||
cur_kevq = next_kevq;
|
||||
}
|
||||
|
||||
/* XXX: hack */
|
||||
/* XXX: hack, 256 queues max */
|
||||
rand = rand >> 8;
|
||||
}
|
||||
}
|
||||
@ -4430,51 +4480,51 @@ kevq_vec_select_kevq(struct veclist *lst, int num_rand)
|
||||
return cur_kevq;
|
||||
}
|
||||
|
||||
static struct kevq *
|
||||
kqdom_random_kevq_locked(struct kqdom *kqd)
|
||||
{
|
||||
struct kevq *kevq;
|
||||
struct kqdom *tkqd;
|
||||
int num_active;
|
||||
u_long rand;
|
||||
// static struct kevq *
|
||||
// kqdom_random_kevq_locked(struct kqdom *kqd)
|
||||
// {
|
||||
// struct kevq *kevq;
|
||||
// struct kqdom *tkqd;
|
||||
// int num_active;
|
||||
// u_long rand;
|
||||
|
||||
rand = random();
|
||||
kevq = NULL;
|
||||
// rand = random();
|
||||
// kevq = NULL;
|
||||
|
||||
while (!kqdom_is_leaf(kqd)) {
|
||||
KQD_RLOCK(kqd);
|
||||
/* we only select active stuff inside this, need to be EXTREMELY fast */
|
||||
num_active = veclist_size(&kqd->kqd_activelist);
|
||||
CTR1(KTR_KQ, "kqdom_random_kevq_locked: randomly selected leaf kqdom %d", kqd->id);
|
||||
if (num_active > 0) {
|
||||
tkqd = veclist_at(&kqd->kqd_activelist, rand % num_active);
|
||||
} else {
|
||||
tkqd = NULL;
|
||||
}
|
||||
KQD_RUNLOCK(kqd);
|
||||
kqd = tkqd;
|
||||
/* XXX: hack */
|
||||
rand = rand >> 8;
|
||||
}
|
||||
// while (!kqdom_is_leaf(kqd)) {
|
||||
// KQD_RLOCK(kqd);
|
||||
// /* we only select active stuff inside this, need to be EXTREMELY fast */
|
||||
// num_active = veclist_size(&kqd->kqd_activelist);
|
||||
// CTR1(KTR_KQ, "kqdom_random_kevq_locked: randomly selected leaf kqdom %d", kqd->id);
|
||||
// if (num_active > 0) {
|
||||
// tkqd = veclist_at(&kqd->kqd_activelist, rand % num_active);
|
||||
// } else {
|
||||
// tkqd = NULL;
|
||||
// }
|
||||
// KQD_RUNLOCK(kqd);
|
||||
// kqd = tkqd;
|
||||
// /* XXX: hack */
|
||||
// rand = rand >> 8;
|
||||
// }
|
||||
|
||||
if (kqd != NULL) {
|
||||
CTR1(KTR_KQ, "kqdom_random_kevq_locked: randomly selected leaf kqdom %d", kqd->id);
|
||||
KQD_RLOCK(kqd);
|
||||
// if (kqd != NULL) {
|
||||
// CTR1(KTR_KQ, "kqdom_random_kevq_locked: randomly selected leaf kqdom %d", kqd->id);
|
||||
// KQD_RLOCK(kqd);
|
||||
|
||||
kevq = kevq_vec_select_kevq(&kqd->kqd_kevqs, 1);
|
||||
kevq = kevq_lock_check_avail(kevq);
|
||||
// kevq = kevq_vec_select_kevq(&kqd->kqd_kevqs, 1);
|
||||
// kevq = kevq_lock_check_avail(kevq);
|
||||
|
||||
KQD_RUNLOCK(kqd);
|
||||
}
|
||||
// KQD_RUNLOCK(kqd);
|
||||
// }
|
||||
|
||||
if (kevq != NULL) {
|
||||
KEVQ_OWNED(kevq);
|
||||
}
|
||||
// if (kevq != NULL) {
|
||||
// KEVQ_OWNED(kevq);
|
||||
// }
|
||||
|
||||
CTR1(KTR_KQ, "kqdom_random_kevq_locked: randomly selected kevq %p", kevq);
|
||||
// CTR1(KTR_KQ, "kqdom_random_kevq_locked: randomly selected kevq %p", kevq);
|
||||
|
||||
return kevq;
|
||||
}
|
||||
// return kevq;
|
||||
// }
|
||||
|
||||
|
||||
/* select the next kevq based on knote and scheduler flags and locks the returned kevq */
|
||||
@ -4540,11 +4590,11 @@ knote_next_kevq(struct knote *kn)
|
||||
KASSERT(kqdom_is_leaf(kqd), ("found kqdom not leaf"));
|
||||
|
||||
KQD_RLOCK(kqd);
|
||||
next_kevq = kevq_vec_select_kevq(&kqd->kqd_kevqs, 1);
|
||||
next_kevq = kevq_vec_select_kevq(&kqd->kqd_kevqs, 1, kqueue_random(&kn->kn_rand_seed));
|
||||
|
||||
if (sargs > 0) {
|
||||
KVLST_RLOCK(kq);
|
||||
other_kevq = kevq_vec_select_kevq(&kq->kevq_vlist, sargs);
|
||||
other_kevq = kevq_vec_select_kevq(&kq->kevq_vlist, sargs, kqueue_random(&kn->kn_rand_seed));
|
||||
|
||||
if (next_kevq == NULL || (other_kevq != NULL && kevq_lat_wcmp(next_kevq, other_kevq, 90) > 0)) {
|
||||
next_kevq = other_kevq;
|
||||
@ -4569,7 +4619,7 @@ knote_next_kevq(struct knote *kn)
|
||||
case KQ_SCHED_BEST:
|
||||
KVLST_RLOCK(kq);
|
||||
|
||||
next_kevq = kevq_vec_select_kevq(&kq->kevq_vlist, sargs);
|
||||
next_kevq = kevq_vec_select_kevq(&kq->kevq_vlist, sargs, kqueue_random(&kn->kn_rand_seed));
|
||||
next_kevq = kevq_lock_check_avail(next_kevq);
|
||||
|
||||
KVLST_RUNLOCK(kq);
|
||||
@ -4582,7 +4632,7 @@ knote_next_kevq(struct knote *kn)
|
||||
|
||||
/* fall-back rand robbin*/
|
||||
if (next_kevq == NULL) {
|
||||
rand = random();
|
||||
rand = kqueue_random(&kn->kn_rand_seed);
|
||||
|
||||
KVLST_RLOCK(kq);
|
||||
sz = veclist_size(&kq->kevq_vlist);
|
||||
@ -4782,6 +4832,7 @@ knote_alloc(int mflag)
|
||||
struct knote *ret = uma_zalloc(knote_zone, mflag | M_ZERO);
|
||||
/* CTR1(KTR_KQ, "knote_alloc: allocating knote %p", ret); */
|
||||
mtx_init(&ret->kn_fluxlock, "kn_fluxlock", NULL, MTX_DEF | MTX_DUPOK);
|
||||
kqueue_srandom(&ret->kn_rand_seed, (u_long)ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -313,6 +313,7 @@ struct knote {
|
||||
#define KN_WS 0x100 /* the knote is stolen from another kevq */
|
||||
int kn_fluxwait;
|
||||
int kn_influx;
|
||||
u_long kn_rand_seed;
|
||||
struct mtx kn_fluxlock;
|
||||
int kn_sfflags; /* saved filter flags */
|
||||
int64_t kn_sdata; /* saved data field */
|
||||
|
@ -51,11 +51,11 @@
|
||||
struct kevq {
|
||||
/* 1st cacheline */
|
||||
/* Sched stats */
|
||||
u_long kevq_rand_seed;
|
||||
uint64_t kevq_avg_lat;
|
||||
uint64_t kevq_avg_ev;
|
||||
uint64_t kevq_tot_ev;
|
||||
uint64_t kevq_tot_time;
|
||||
uint64_t kevq_tot_syscall;
|
||||
uint64_t kevq_last_kev;
|
||||
uint32_t kevq_last_nkev;
|
||||
#define KEVQ_SLEEP 0x01
|
||||
@ -65,14 +65,7 @@ struct kevq {
|
||||
int kevq_state;
|
||||
int kn_count; /* number of pending knotes */
|
||||
int kn_rt_count; /* number of runtime knotes */
|
||||
|
||||
/* 2nd cacheline */
|
||||
uint64_t kevq_tot_ws;
|
||||
/* TODO: maybe these should be in kqdomain or global */
|
||||
uint64_t kevq_tot_fallback;
|
||||
uint64_t kevq_tot_kqd_mismatch;
|
||||
uint64_t kevq_tot_sched;
|
||||
uint64_t kevq_tot_realtime;
|
||||
/* end 1st cache line */
|
||||
|
||||
LIST_ENTRY(kevq) kevq_th_e; /* entry into kevq_thred's hashtable */
|
||||
LIST_ENTRY(kevq) kq_e; /* entry into kq */
|
||||
@ -83,10 +76,19 @@ struct kevq {
|
||||
struct kevq_thred *kevq_th; /* the thread that the kevq belongs to */
|
||||
struct mtx lock; /* the lock for the kevq */
|
||||
struct ktailq kn_head; /* list of pending knotes */
|
||||
struct knote kn_marker;
|
||||
struct knote *kn_marker;
|
||||
struct ktailq kn_rt_head; /* list of pending knotes with runtime priority */
|
||||
struct knote kn_marker_rt;
|
||||
struct knote *kn_marker_rt;
|
||||
int kevq_refcnt;
|
||||
|
||||
/* TODO: maybe these should be in kqdomain or global */
|
||||
uint64_t kevq_tot_fallback;
|
||||
uint64_t kevq_tot_kqd_mismatch;
|
||||
uint64_t kevq_tot_sched;
|
||||
uint64_t kevq_tot_realtime;
|
||||
uint64_t kevq_tot_syscall;
|
||||
uint64_t kevq_tot_ws;
|
||||
uint64_t kevq_avg_rlimit;
|
||||
};
|
||||
|
||||
/* TODO: assumed that threads don't get rescheduled across cores */
|
||||
|
@ -928,6 +928,18 @@ test_evfilt_read_m()
|
||||
test_socket_brutal("rand");
|
||||
close(g_kqfd);
|
||||
|
||||
/* BO2 */
|
||||
flags = KQSCHED_MAKE(KQ_SCHED_BEST,2,0,0);
|
||||
g_kqfd = kqueue();
|
||||
error = ioctl(g_kqfd, FKQMULTI, &flags);
|
||||
if (error == -1) {
|
||||
err(1, "ioctl");
|
||||
}
|
||||
|
||||
test_socket_read(1);
|
||||
test_socket_brutal("best2");
|
||||
close(g_kqfd);
|
||||
|
||||
/* Queue + bo0 */
|
||||
flags = KQSCHED_MAKE(KQ_SCHED_QUEUE,0,0,0);
|
||||
g_kqfd = kqueue();
|
||||
@ -981,18 +993,6 @@ test_evfilt_read_m()
|
||||
test_socket_brutal("cpu2");
|
||||
close(g_kqfd);
|
||||
|
||||
/* BO2 */
|
||||
flags = KQSCHED_MAKE(KQ_SCHED_BEST,2,0,0);
|
||||
g_kqfd = kqueue();
|
||||
error = ioctl(g_kqfd, FKQMULTI, &flags);
|
||||
if (error == -1) {
|
||||
err(1, "ioctl");
|
||||
}
|
||||
|
||||
test_socket_read(1);
|
||||
test_socket_brutal("best2");
|
||||
close(g_kqfd);
|
||||
|
||||
/* WS */
|
||||
flags = KQSCHED_MAKE(0,0,KQ_SCHED_FEAT_WS,1);
|
||||
g_kqfd = kqueue();
|
||||
|
Loading…
Reference in New Issue
Block a user