diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c index d4a111c28b08..e1e3d68b0339 100644 --- a/sys/kern/kern_event.c +++ b/sys/kern/kern_event.c @@ -451,6 +451,7 @@ kevq_avail(struct kevq *kevq) static inline struct kevq * kevq_lock_check_avail(struct kevq *next_kevq) { + CTR1(KTR_KQ, "kevq_lock_check_avail: kevq %p", next_kevq); if (next_kevq != NULL) { KEVQ_NOTOWNED(next_kevq); KEVQ_LOCK(next_kevq); @@ -2420,7 +2421,9 @@ kevq_dump(struct sbuf *buf, struct kevq *kevq, int level) "avg_events=\"%ld\" " "total_fallbacks=\"%ld\" " "total_mismatches=\"%ld\" " - "total_worksteal=\"%ld\" />\n", + "total_worksteal=\"%ld\" " + "total_realtime=\"%ld\" " + "total_sched=\"%ld\" />\n", level * DUMP_INDENT, ' ', kevq, kevq->kn_count, kevq->kn_rt_count, kevq->kevq_tot_time, kevq->kevq_tot_syscall, @@ -2429,7 +2432,9 @@ kevq_dump(struct sbuf *buf, struct kevq *kevq, int level) kevq->kevq_avg_ev, kevq->kevq_tot_fallback, kevq->kevq_tot_kqd_mismatch, - kevq->kevq_tot_ws); + kevq->kevq_tot_ws, + kevq->kevq_tot_realtime, + kevq->kevq_tot_sched); } static void @@ -2739,7 +2744,7 @@ kevq_worksteal(struct kevq *kevq) tgt_count = KQSCHED_GET_FARGS(kq); /* XXX: hack */ - KASSERT(tgt_count < 8, ("too many kevq ws knotes")); + KASSERT(tgt_count <= 8, ("too many kevq ws knotes")); KVLST_RLOCK(kq); other_kevq = kevq_vec_select_kevq(&kq->kevq_vlist, 1); @@ -2765,7 +2770,6 @@ kevq_worksteal(struct kevq *kevq) /* steal from the first because it arrived late */ ws_kn = kevq_peek_knote(other_kevq); - /* TODO: maybe limit the number of knotes to go through */ while((ws_count < tgt_count) && (ws_kn != NULL)) { /* fast fail */ @@ -2918,8 +2922,16 @@ kqueue_scan(struct kevq *kevq, int maxevents, struct kevent_copyops *k_ops, /* adjust rtlimit according to the target share * = ceil(maxevents * kq->kq_rtshare%) */ + + /* XXX: actually rtlimit can be 0 but we don't allow it yet/forever? + * the current implementation has an issue when only runtime events are present and rtlimit = 0 + * since kevq_total_knotes returns > 0, but rtlimit = 0 so we don't dequeue any runtime event + * the function will be trapped infinitely in (wakeup because tot_ev > 0 -> dequeue normal marker -> count = 0 -> retry -> wakeup because tot ev > 0) + * We simply don't allow users to set rlimit to 0 so we at least hand back one rt event, otherwise the solution might be very complicated + * because it involves sleep waiting on different queues as rtshare changes, AND in RUNTIME too? Not worth it really. + */ rtlimit = (maxevents * kq->kq_rtshare + 99) / 100; - KASSERT(rtlimit >= 0, ("the math above is fundamentally broken")); + KASSERT(rtlimit > 0, ("the math above is fundamentally broken")); rsbt = 0; if (tsp != NULL) { @@ -2976,7 +2988,7 @@ kqueue_scan(struct kevq *kevq, int maxevents, struct kevent_copyops *k_ops, KEVQ_OWNED(kevq); kevp = keva; - CTR3(KTR_KQ, "kqueue_scan: td %d on kevq %p has %d events", td->td_tid, kevq, kevq_total_knote(kevq)); + CTR4(KTR_KQ, "kqueue_scan: td %d on kevq %p has %d events, max_ev %d", td->td_tid, kevq, kevq_total_knote(kevq), maxevents); if (kevq_total_knote(kevq) == 0) { if (fsbt == -1) { @@ -3048,7 +3060,9 @@ kqueue_scan(struct kevq *kevq, int maxevents, struct kevent_copyops *k_ops, } KEVQ_OWNED(kevq); - + // if (kevq_total_knote(kevq) > 0) { + // KASSERT(!(TAILQ_FIRST(&kevq->kn_rt_head) == NULL && TAILQ_FIRST(&kevq->kn_head) == NULL), ("NULL > 0?")); + // } /* quick check */ if (curr < rtlimit) { rdrained = 0; @@ -3105,7 +3119,7 @@ kqueue_scan(struct kevq *kevq, int maxevents, struct kevent_copyops *k_ops, /* Now we have exclusive access to kn */ TAILQ_REMOVE(kntq, kn, kn_tqe); - CTR3(KTR_KQ, "kqueue_scan: td %d on kevq %p dequeued knote %p", td->td_tid, kevq, kn); + CTR4(KTR_KQ, "kqueue_scan: td %d on kevq %p dequeued knote %p, curr %d", td->td_tid, kevq, kn, curr); if ((kn->kn_status & KN_DISABLED) == KN_DISABLED) { kn->kn_status &= ~(KN_QUEUED | KN_PROCESSING | KN_WS); *kncnt -= 1; @@ -3117,7 +3131,7 @@ kqueue_scan(struct kevq *kevq, int maxevents, struct kevent_copyops *k_ops, /* We are dequeuing our marker, wakeup threads waiting on it */ knote_flux_wakeup(kn); KN_FLUX_UNLOCK(kn); - CTR2(KTR_KQ, "kqueue_scan: td %d MARKER WAKEUP %p", td->td_tid, kn); + CTR3(KTR_KQ, "kqueue_scan: td %d MARKER WAKEUP %p PRI %d", td->td_tid, kn, pri); if (kn == rtmarker) { rdrained = 1; @@ -3247,6 +3261,7 @@ kqueue_scan(struct kevq *kevq, int maxevents, struct kevent_copyops *k_ops, if (pri) { curr++; + kevq->kevq_tot_realtime++; } if (nkev == KQ_NEVENTS) { @@ -3395,7 +3410,7 @@ kqueue_ioctl(struct file *fp, u_long cmd, void *data, switch KQTUNE_PARSE_OBJ(tune) { case KQTUNE_RTSHARE: tune = KQTUNE_PARSE_ARGS(tune); - if (tune >= 0 && tune <= 100) + if (tune > 0 && tune <= 100) kq->kq_rtshare = tune; else error = (EINVAL); @@ -4530,14 +4545,18 @@ knote_next_kevq(struct knote *kn) if (sargs > 0) { KVLST_RLOCK(kq); other_kevq = kevq_vec_select_kevq(&kq->kevq_vlist, sargs); - KVLST_RUNLOCK(kq); if (next_kevq == NULL || (other_kevq != NULL && kevq_lat_wcmp(next_kevq, other_kevq, 90) > 0)) { next_kevq = other_kevq; } } - next_kevq = kevq_lock_check_avail(next_kevq); + next_kevq = kevq_lock_check_avail(next_kevq); + + /* need to unlock after kevq lock acquire because other_kevq might be drained too */ + if (sargs > 0) { + KVLST_RUNLOCK(kq); + } KQD_RUNLOCK(kqd); if (kqd_mismatch && next_kevq != NULL) { @@ -4583,8 +4602,10 @@ knote_next_kevq(struct knote *kn) CTR2(KTR_KQ, "knote_next_kevq: [RAND] next kevq %p for kn %p", next_kevq, kn); } - if (next_kevq != NULL) + if (next_kevq != NULL) { KEVQ_OWNED(next_kevq); + next_kevq->kevq_tot_sched++; + } return next_kevq; } diff --git a/sys/sys/eventvar.h b/sys/sys/eventvar.h index 91a817e74766..ae506f5d8cde 100644 --- a/sys/sys/eventvar.h +++ b/sys/sys/eventvar.h @@ -49,6 +49,31 @@ #define KQDIR_INACTIVE (1) struct kevq { + /* 1st cacheline */ + /* Sched stats */ + uint64_t kevq_avg_lat; + uint64_t kevq_avg_ev; + uint64_t kevq_tot_ev; + uint64_t kevq_tot_time; + uint64_t kevq_tot_syscall; + uint64_t kevq_last_kev; + uint32_t kevq_last_nkev; +#define KEVQ_SLEEP 0x01 +#define KEVQ_CLOSING 0x02 +#define KEVQ_ACTIVE 0x04 +#define KEVQ_WS 0x08 /* the kevq is work stealing */ + int kevq_state; + int kn_count; /* number of pending knotes */ + int kn_rt_count; /* number of runtime knotes */ + + /* 2nd cacheline */ + uint64_t kevq_tot_ws; + /* TODO: maybe these should be in kqdomain or global */ + uint64_t kevq_tot_fallback; + uint64_t kevq_tot_kqd_mismatch; + uint64_t kevq_tot_sched; + uint64_t kevq_tot_realtime; + LIST_ENTRY(kevq) kevq_th_e; /* entry into kevq_thred's hashtable */ LIST_ENTRY(kevq) kq_e; /* entry into kq */ LIST_ENTRY(kevq) kevq_th_tqe; /* entry into kevq_thred's kevq_list */ @@ -61,29 +86,7 @@ struct kevq { struct knote kn_marker; struct ktailq kn_rt_head; /* list of pending knotes with runtime priority */ struct knote kn_marker_rt; - int kn_count; /* number of pending knotes */ - int kn_rt_count; /* number of runtime knotes */ - -#define KEVQ_SLEEP 0x01 -#define KEVQ_CLOSING 0x02 -#define KEVQ_ACTIVE 0x04 -#define KEVQ_WS 0x08 /* the kevq is work stealing */ - int kevq_state; int kevq_refcnt; - - uint64_t kevq_last_kev; - uint64_t kevq_last_nkev; - /* Sched stats */ - uint64_t kevq_avg_lat; - uint64_t kevq_avg_ev; - uint64_t kevq_tot_ev; - uint64_t kevq_tot_time; - uint64_t kevq_tot_syscall; - uint64_t kevq_tot_ws; - - /* TODO: maybe these should be in kqdomain or global */ - uint64_t kevq_tot_fallback; - uint64_t kevq_tot_kqd_mismatch; }; /* TODO: assumed that threads don't get rescheduled across cores */ diff --git a/tests/sys/kqueue/libkqueue/read_m.c b/tests/sys/kqueue/libkqueue/read_m.c index cc0a5cb1e0a4..b4a78d6afe21 100644 --- a/tests/sys/kqueue/libkqueue/read_m.c +++ b/tests/sys/kqueue/libkqueue/read_m.c @@ -325,12 +325,14 @@ test_socket_queue(void) tid++; group[i][j].evcnt = 0; group[i][j].group_id = i; - pthread_create(&group[i][j].thrd, NULL, test_socket_queue_thrd, &group[i][j]); + pthread_attr_t attr; + pthread_attr_init(&attr); CPU_ZERO(&cpuset); CPU_SET(i, &cpuset); - if (pthread_setaffinity_np(group[i][j].thrd, sizeof(cpuset_t), &cpuset) < 0) { + if (pthread_attr_setaffinity_np(&attr, sizeof(cpuset_t), &cpuset) < 0) { err(1, "thread_affinity"); } + pthread_create(&group[i][j].thrd, &attr, test_socket_queue_thrd, &group[i][j]); #ifdef TEST_DEBUG printf("READ_M: created and affinitized thread %d to core group %d\n", group[i][j].tid, i); #endif @@ -636,10 +638,10 @@ test_socket_ws_timeout() /*************************** * Brutal test ***************************/ -#define THREAD_BRUTE_CNT (16) -#define SOCK_BRUTE_CNT (128) -#define PACKET_BRUTE_CNT (50 * (SOCK_BRUTE_CNT)) -#define THREAD_EXIT_PROB (67) +#define THREAD_BRUTE_CNT (8) +#define SOCK_BRUTE_CNT (512) +#define PACKET_BRUTE_CNT (100 * (SOCK_BRUTE_CNT)) +#define THREAD_EXIT_PROB (50) #define BRUTE_REALTIME_PROB (50) #define BRUTE_MAX_FREQ (10000) #define BRUTE_MIN_FREQ (1) @@ -776,7 +778,7 @@ test_socket_brutal(char* name) int error; int val; - val = KQTUNE_MAKE(KQTUNE_RTSHARE, rand() % 100); + val = KQTUNE_MAKE(KQTUNE_RTSHARE, (rand() % 100) + 1); error = ioctl(g_kqfd, FKQTUNE, &val); if (error == -1) { err(1, "ioctl TUNE"); @@ -894,7 +896,7 @@ test_socket_realtime() socket_push(socks[i][1], '.'); } - for (int i = 0; i <= 100; i++) { + for (int i = 1; i <= 100; i++) { test_socket_rt_share(kqfd, 4, i); } @@ -933,10 +935,32 @@ test_evfilt_read_m() if (error == -1) { err(1, "ioctl"); } - test_socket_queue(); + //test_socket_queue(); test_socket_brutal("queue0"); close(g_kqfd); + /* Queue + bo2*/ + flags = KQSCHED_MAKE(KQ_SCHED_QUEUE,2, 0, 0); + g_kqfd = kqueue(); + error = ioctl(g_kqfd, FKQMULTI, &flags); + if (error == -1) { + err(1, "ioctl"); + } + //test_socket_queue(); + test_socket_brutal("queue2"); + close(g_kqfd); + + /* Queue + bo2 + ws */ + flags = KQSCHED_MAKE(KQ_SCHED_QUEUE,2, KQ_SCHED_FEAT_WS, 1); + g_kqfd = kqueue(); + error = ioctl(g_kqfd, FKQMULTI, &flags); + if (error == -1) { + err(1, "ioctl"); + } + //test_socket_queue(); + test_socket_brutal("queue2_ws"); + close(g_kqfd); + /* CPU + Bo0 */ flags = KQSCHED_MAKE(KQ_SCHED_CPU,0,0,0);; g_kqfd = kqueue(); @@ -947,16 +971,6 @@ test_evfilt_read_m() test_socket_brutal("cpu0"); close(g_kqfd); - /* CPU + Bo1 */ - flags = KQSCHED_MAKE(KQ_SCHED_CPU,1,0,0);; - g_kqfd = kqueue(); - error = ioctl(g_kqfd, FKQMULTI, &flags); - if (error == -1) { - err(1, "ioctl"); - } - test_socket_brutal("cpu1"); - close(g_kqfd); - /* CPU + Bo2 */ flags = KQSCHED_MAKE(KQ_SCHED_CPU,2,0,0); g_kqfd = kqueue();