diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c index 64776b27b32e..caf834f422d1 100644 --- a/sys/kern/kern_event.c +++ b/sys/kern/kern_event.c @@ -89,8 +89,8 @@ __FBSDID("$FreeBSD$"); static MALLOC_DEFINE(M_KQUEUE, "kqueue", "memory for kqueue system"); #define KQDOM_FLAGS (KQ_SCHED_CPU | KQ_SCHED_QUEUE) -#define KEVQ_LAT_FLAGS (KQ_SCHED_CPU | KQ_SCHED_QUEUE | KQ_SCHED_BEST) - +#define KEVQ_LAT_FLAGS ((uint64_t)-1) //(KQ_SCHED_CPU | KQ_SCHED_QUEUE | KQ_SCHED_BEST) +#define DUMP_INDENT (4) /* * This lock is used if multiple kq locks are required. This possibly * should be made into a per proc lock. @@ -112,11 +112,13 @@ TASKQUEUE_DEFINE_THREAD(kqueue_ctx); // TODO: only use it in SMP extern struct cpu_group *cpu_top; -#define AVG_WEIGHT_FACTOR_OLD (4) -#define AVG_WEIGHT_FACTOR_NEW (1) -/* no need to handle overflow as long as the existing org/cur doesn't overflow */ -#define CALC_OVERTIME_AVG(prev, cur) (((prev) * AVG_WEIGHT_FACTOR_OLD + (cur) * AVG_WEIGHT_FACTOR_NEW) / (AVG_WEIGHT_FACTOR_OLD + AVG_WEIGHT_FACTOR_NEW)) +static inline uint64_t +calc_overtime_avg(uint64_t prev, uint64_t cur, uint32_t prev_pct) +{ + KASSERT(prev_pct < 100, ("overtime average prev_pct >= 100")); + return (prev * prev_pct + cur * (100 - prev_pct)) / 100; +} static struct kevq * kevqlist_find(struct kevqlist *kevq_list, struct kqueue *kq); static void kevq_thred_init(struct kevq_thred *kevq_th); @@ -127,7 +129,7 @@ static void kevq_release(struct kevq* kevq, int locked); static void kevq_destroy(struct kevq *kevq); static int kevq_acquire(struct kevq *kevq, int locked); static void kevq_worksteal(struct kevq *kevq); -void kevq_drain(struct kevq *kevq, struct thread *td); +static void kevq_drain(struct kevq *kevq, struct thread *td); static void kevq_activate(struct kevq *kevq, struct thread *td); @@ -1474,13 +1476,14 @@ kqueue_kevent(struct kqueue *kq, struct kevq *kevq, struct thread *td, int nchan CTR3(KTR_KQ, "kevent: td %d nkev %d kevent (delta) %ld ns", td->td_tid, kevq->kevq_last_nkev, cur_ts); - /* divide by the number of events processed */ - avg = cur_ts / kevq->kevq_last_nkev; + /* update total time */ + kevq->kevq_tot_time += cur_ts; + /* update average latency */ + avg = cur_ts / kevq->kevq_last_nkev; CTR3(KTR_KQ, "kevent: td %d nkev %d kevent (avg) %ld ns", td->td_tid, kevq->kevq_last_nkev, avg); - if (kevq->kevq_avg_lat != 0) { - kevq->kevq_avg_lat = CALC_OVERTIME_AVG(kevq->kevq_avg_lat, avg); + kevq->kevq_avg_lat = calc_overtime_avg(kevq->kevq_avg_lat, avg, 80); } else { kevq->kevq_avg_lat = avg; } @@ -2339,7 +2342,26 @@ kqdom_destroy(struct kqdom *root) static void kevq_dump(struct kevq *kevq, int level, struct sbuf *buf) { - sbuf_printf(buf, "%*c\n", level * 2, ' ', kevq, kevq->kn_count, kevq->kevq_avg_lat); + int error; + + error = sbuf_printf(buf, "%*c\n", + level * DUMP_INDENT, ' ', kevq, kevq->kn_count, + kevq->kevq_tot_time, + kevq->kevq_tot_syscall, + kevq->kevq_tot_ev, + kevq->kevq_avg_lat, + kevq->kevq_avg_ev, + kevq->kevq_tot_fallback, + kevq->kevq_tot_kqd_mismatch); + + KASSERT(error == 0, ("error writing sbuf")); } static void @@ -2347,10 +2369,8 @@ kqdom_dump(struct kqdom *kqd, int level, struct sbuf *buf) { /* XXX: No potential race between this and kqdom_build() for now. * If we move kqdom_build() out of kqueue() syscall then there is a potential race */ - - - /* XXX2: check return code */ - sbuf_printf(buf, "%*c\n", level * 2, ' ', + int error; + error = sbuf_printf(buf, "%*c\n", level * DUMP_INDENT, ' ', kqd->id, level, kqd->cpu_mask.__bits[0], @@ -2358,11 +2378,12 @@ kqdom_dump(struct kqdom *kqd, int level, struct sbuf *buf) veclist_size(&kqd->kqd_activelist), kqdom_is_leaf(kqd), veclist_size(&kqd->kqd_kevqs)); + KASSERT(error == 0, ("error writing sbuf")); + if (kqdom_is_leaf(kqd)) { KQD_RLOCK(kqd); /* print all kevqs */ for (int i = 0; i < veclist_size(&kqd->kqd_kevqs); i++) { - /* XXX: check return code */ kevq_dump(veclist_at(&kqd->kqd_kevqs, i), level + 1, buf); } KQD_RUNLOCK(kqd); @@ -2371,9 +2392,8 @@ kqdom_dump(struct kqdom *kqd, int level, struct sbuf *buf) kqdom_dump(veclist_at(&kqd->children, i), level + 1, buf); } } - - /* XXX: check return code */ - sbuf_printf(buf, "%*c\n", level * 2, ' '); + error = sbuf_printf(buf, "%*c\n", level * DUMP_INDENT, ' '); + KASSERT(error == 0, ("error writing sbuf")); } @@ -2440,7 +2460,7 @@ kqdom_update_lat(struct kqdom *leaf, uint64_t avg) while(leaf != NULL) { if (leaf->avg_lat != 0) { // bit rot race here? - leaf->avg_lat = CALC_OVERTIME_AVG(leaf->avg_lat, avg); + leaf->avg_lat = calc_overtime_avg(leaf->avg_lat, avg, 80); } else { leaf->avg_lat = avg; } @@ -3005,11 +3025,20 @@ kqueue_scan(struct kevq *kevq, int maxevents, struct kevent_copyops *k_ops, KEVQ_UNLOCK(kevq); - if (nkev != 0 && (KQSCHED_GET_SCHED(kq) & KEVQ_LAT_FLAGS)) { + if (nkev > 0 && (KQSCHED_GET_SCHED(kq) & KEVQ_LAT_FLAGS)) { /* book keep the statistics */ kevq->kevq_last_kev = get_cyclecount(); kevq->kevq_last_nkev = nkev; CTR3(KTR_KQ, "kevent: td %d nkev %d kevent (exit) %ld ns", td->td_tid, kevq->kevq_last_nkev, kevq->kevq_last_kev); + + /* update total ev */ + kevq->kevq_tot_ev += nkev; + kevq->kevq_tot_syscall++; + if (kevq->kevq_avg_ev == 0) { + kevq->kevq_avg_ev = nkev; + } else { + kevq->kevq_avg_ev = calc_overtime_avg(kevq->kevq_avg_ev, nkev, 80); + } } CTR2(KTR_KQ, "kqueue_scan: knote_free marker %p td %d", marker, td->td_tid); @@ -3099,11 +3128,11 @@ kqueue_ioctl(struct file *fp, u_long cmd, void *data, if (kq->kq_flags & KQ_FLAG_MULTI) { /* 4MB buffer */ dat = malloc(sizeof(char) * 4096 * 1024, M_KQUEUE, M_WAITOK | M_ZERO); - sbuf_new(&buf, dat, 4096 * 1024 * sizeof(char), SBUF_AUTOEXTEND | SBUF_INCLUDENUL); + sbuf_new(&buf, dat, 4096 * 1024 * sizeof(char), SBUF_INCLUDENUL); /* dump kvlst */ - /* XXX: check return status of sbuf_printf */ - sbuf_printf(&buf, "KQ 0x%p dump:\n\nActive KEVQ:\n", kq); + error = sbuf_printf(&buf, "KQ 0x%p dump:\n\nActive KEVQ:\n", kq); + KASSERT(error == 0, ("error writing sbuf")); KVLST_RLOCK(kq); for(int i = 0; i < veclist_size(&kq->kevq_vlist); i++) { kevq_dump(veclist_at(&kq->kevq_vlist, i), 0, &buf); @@ -3112,11 +3141,13 @@ kqueue_ioctl(struct file *fp, u_long cmd, void *data, /* dump kqdom if used */ if (KQSCHED_GET_SCHED(kq) & KQDOM_FLAGS) { - sbuf_printf(&buf, "\nKQDOM:\n"); + error = sbuf_printf(&buf, "\nKQDOM:\n"); + KASSERT(error == 0, ("error writing sbuf")); kqdom_dump(kq->kq_kqd, 0, &buf); } - sbuf_finish(&buf); + error = sbuf_finish(&buf); + KASSERT(error == 0, ("error sbuf_finish")); uprintf("%s", sbuf_data(&buf)); sbuf_delete(&buf); free(dat, M_KQUEUE); @@ -4141,7 +4172,9 @@ knote_next_kevq(struct knote *kn) int sargs; int sched; int rand, sz; + int kqd_mismatch; + kqd_mismatch = 0; next_kevq = NULL; kq = kn->kn_kq; sargs = KQSCHED_GET_SARGS(kq); @@ -4180,10 +4213,14 @@ knote_next_kevq(struct knote *kn) kn->kn_kqd = kqdom_find(kq->kq_kqd, PCPU_GET(cpuid)); KASSERT(kn->kn_kqd != NULL, ("knote scheduled on an unidentified CPU")); - CTR3(KTR_KQ, "knote_next_kevq: [QUEUE%d] knote %p attached to kqdom id %d", sargs, kn, kn->kn_kqd->id); + CTR4(KTR_KQ, "knote_next_kevq: [QUEUE%d] knote %p attached to kqdom id %d cpuset 0x%lx", sargs, kn, kn->kn_kqd->id, kn->kn_kqd->cpu_mask.__bits[0]); } kqd = kn->kn_kqd; - + + /* Check if the knote interrupt is triggered on a cpu that's different from the memorized one */ + if (!CPU_ISSET(PCPU_GET(cpuid), &kqd->cpu_mask)) { + kqd_mismatch = 1; + } done_cq: KASSERT(kqdom_is_leaf(kqd), ("found kqdom not leaf")); @@ -4203,6 +4240,10 @@ knote_next_kevq(struct knote *kn) next_kevq = kevq_lock_check_avail(next_kevq); KQD_RUNLOCK(kqd); + if (kqd_mismatch && next_kevq != NULL) { + next_kevq->kevq_tot_kqd_mismatch++; + } + CTR3(KTR_KQ, "knote_next_kevq: [QUEUE/CPU%d] next kevq %p for kn %p", sargs, next_kevq, kn); break; @@ -4232,6 +4273,7 @@ knote_next_kevq(struct knote *kn) next_kevq = kevq_lock_check_avail(next_kevq); if (next_kevq != NULL) { + next_kevq->kevq_tot_fallback++; break; } diff --git a/sys/sys/eventvar.h b/sys/sys/eventvar.h index 694892b9039c..3aaa8c67110f 100644 --- a/sys/sys/eventvar.h +++ b/sys/sys/eventvar.h @@ -65,10 +65,18 @@ struct kevq { int kevq_state; int kevq_refcnt; - /* Used by the scheduler */ - uint64_t kevq_avg_lat; uint64_t kevq_last_kev; uint64_t kevq_last_nkev; + /* Sched stats */ + uint64_t kevq_avg_lat; + uint64_t kevq_avg_ev; + uint64_t kevq_tot_ev; + uint64_t kevq_tot_time; + uint64_t kevq_tot_syscall; + + /* TODO: maybe these should be in kqdomain or global */ + uint64_t kevq_tot_fallback; + uint64_t kevq_tot_kqd_mismatch; }; /* TODO: assumed that threads don't get rescheduled across cores */ diff --git a/tests/sys/kqueue/libkqueue/read_m.c b/tests/sys/kqueue/libkqueue/read_m.c index 6fc4822c9eee..c50792f18a5a 100644 --- a/tests/sys/kqueue/libkqueue/read_m.c +++ b/tests/sys/kqueue/libkqueue/read_m.c @@ -274,7 +274,7 @@ test_socket_queue(void) { int error = 0; const char *test_id = "[Multi][Queue]kevent(EVFILT_READ)"; - + test_begin(test_id); if (socketpair(AF_UNIX, SOCK_STREAM, 0, &g_sockfd[0]) < 0)