fix infinite loop when rtshare=0; fix a crash in Queue/CPU + best2; Added stat counter for sched and priority

2019-09-15 04:57:34 -04:00 · 2019-09-15 04:57:34 -04:00 · 7f31542099
commit 7f31542099
parent 638e096379
3 changed files with 92 additions and 54 deletions
--- a/sys/kern/kern_event.c
+++ b/sys/kern/kern_event.c
@ -451,6 +451,7 @@ kevq_avail(struct kevq *kevq)
 static inline struct kevq * 
 kevq_lock_check_avail(struct kevq *next_kevq)
 {
+	CTR1(KTR_KQ, "kevq_lock_check_avail: kevq %p", next_kevq);
 	if (next_kevq != NULL) {
 		KEVQ_NOTOWNED(next_kevq);
 		KEVQ_LOCK(next_kevq);
@ -2420,7 +2421,9 @@ kevq_dump(struct sbuf *buf, struct kevq *kevq, int level)
 						"avg_events=\"%ld\" "
 						"total_fallbacks=\"%ld\" "
 						"total_mismatches=\"%ld\" "
-						"total_worksteal=\"%ld\" />\n",
+						"total_worksteal=\"%ld\" "
+						"total_realtime=\"%ld\" "
+						"total_sched=\"%ld\" />\n",
 						level * DUMP_INDENT, ' ',  kevq, kevq->kn_count, kevq->kn_rt_count, 
 						kevq->kevq_tot_time,
 						kevq->kevq_tot_syscall,
@ -2429,7 +2432,9 @@ kevq_dump(struct sbuf *buf, struct kevq *kevq, int level)
 						kevq->kevq_avg_ev,
 						kevq->kevq_tot_fallback,
 						kevq->kevq_tot_kqd_mismatch,
-						kevq->kevq_tot_ws);
+						kevq->kevq_tot_ws,
+						kevq->kevq_tot_realtime,
+						kevq->kevq_tot_sched);
 }

 static void
@ -2739,7 +2744,7 @@ kevq_worksteal(struct kevq *kevq)
 	tgt_count = KQSCHED_GET_FARGS(kq);

 	/* XXX: hack */
-	KASSERT(tgt_count < 8, ("too many kevq ws knotes"));
+	KASSERT(tgt_count <= 8, ("too many kevq ws knotes"));

 	KVLST_RLOCK(kq);
 	other_kevq = kevq_vec_select_kevq(&kq->kevq_vlist, 1);
@ -2765,7 +2770,6 @@ kevq_worksteal(struct kevq *kevq)
 		/* steal from the first because it arrived late */
 		ws_kn = kevq_peek_knote(other_kevq);

-		/* TODO: maybe limit the number of knotes to go through */
 		while((ws_count < tgt_count) && (ws_kn != NULL)) {

 			/* fast fail */
@ -2918,8 +2922,16 @@ kqueue_scan(struct kevq *kevq, int maxevents, struct kevent_copyops *k_ops,
 	/* adjust rtlimit according to the target share 
 	 * = ceil(maxevents * kq->kq_rtshare%)
 	 */
+
+	/* XXX: actually rtlimit can be 0 but we don't allow it yet/forever?
+	 * the current implementation has an issue when only runtime events are present and rtlimit = 0
+	 * since kevq_total_knotes returns > 0, but rtlimit = 0 so we don't dequeue any runtime event
+	 * the function will be trapped infinitely in (wakeup because tot_ev > 0 -> dequeue normal marker -> count = 0 -> retry -> wakeup because tot ev > 0)
+	 * We simply don't allow users to set rlimit to 0 so we at least hand back one rt event, otherwise the solution might be very complicated
+	 * because it involves sleep waiting on different queues as rtshare changes, AND in RUNTIME too? Not worth it really.
+	 */
 	rtlimit = (maxevents * kq->kq_rtshare + 99) / 100;
-	KASSERT(rtlimit >= 0, ("the math above is fundamentally broken"));
+	KASSERT(rtlimit > 0, ("the math above is fundamentally broken"));

 	rsbt = 0;
 	if (tsp != NULL) {
@ -2976,7 +2988,7 @@ kqueue_scan(struct kevq *kevq, int maxevents, struct kevent_copyops *k_ops,
 	KEVQ_OWNED(kevq);
 	
 	kevp = keva;
-	CTR3(KTR_KQ, "kqueue_scan: td %d on kevq %p has %d events", td->td_tid, kevq, kevq_total_knote(kevq));
+	CTR4(KTR_KQ, "kqueue_scan: td %d on kevq %p has %d events, max_ev %d", td->td_tid, kevq, kevq_total_knote(kevq), maxevents);

 	if (kevq_total_knote(kevq) == 0) {
 		if (fsbt == -1) {
@ -3048,7 +3060,9 @@ kqueue_scan(struct kevq *kevq, int maxevents, struct kevent_copyops *k_ops,
 	}

 	KEVQ_OWNED(kevq);
-
+	// if (kevq_total_knote(kevq) > 0) {
+	// 	KASSERT(!(TAILQ_FIRST(&kevq->kn_rt_head) == NULL && TAILQ_FIRST(&kevq->kn_head) == NULL), ("NULL > 0?"));
+	// }
 	/* quick check */
 	if (curr < rtlimit) {
 		rdrained = 0;
@ -3105,7 +3119,7 @@ kqueue_scan(struct kevq *kevq, int maxevents, struct kevent_copyops *k_ops,
 		/* Now we have exclusive access to kn */
 		TAILQ_REMOVE(kntq, kn, kn_tqe);

-		CTR3(KTR_KQ, "kqueue_scan: td %d on kevq %p dequeued knote %p", td->td_tid, kevq, kn);
+		CTR4(KTR_KQ, "kqueue_scan: td %d on kevq %p dequeued knote %p, curr %d", td->td_tid, kevq, kn, curr);
 		if ((kn->kn_status & KN_DISABLED) == KN_DISABLED) {
 			kn->kn_status &= ~(KN_QUEUED | KN_PROCESSING | KN_WS);
 			*kncnt -= 1;
@ -3117,7 +3131,7 @@ kqueue_scan(struct kevq *kevq, int maxevents, struct kevent_copyops *k_ops,
 			/* We are dequeuing our marker, wakeup threads waiting on it */
 			knote_flux_wakeup(kn);
 			KN_FLUX_UNLOCK(kn);
-			CTR2(KTR_KQ, "kqueue_scan: td %d MARKER WAKEUP %p", td->td_tid, kn);
+			CTR3(KTR_KQ, "kqueue_scan: td %d MARKER WAKEUP %p PRI %d", td->td_tid, kn, pri);

 			if (kn == rtmarker) {
 				rdrained = 1;
@ -3247,6 +3261,7 @@ kqueue_scan(struct kevq *kevq, int maxevents, struct kevent_copyops *k_ops,

 		if (pri) {
 			curr++;
+			kevq->kevq_tot_realtime++;
 		}

 		if (nkev == KQ_NEVENTS) {
@ -3395,7 +3410,7 @@ kqueue_ioctl(struct file *fp, u_long cmd, void *data,
 			switch KQTUNE_PARSE_OBJ(tune) {
 				case KQTUNE_RTSHARE:
 					tune = KQTUNE_PARSE_ARGS(tune);
-					if (tune >= 0 && tune <= 100)
+					if (tune > 0 && tune <= 100)
 						kq->kq_rtshare = tune;
 					else
 						error = (EINVAL);
@ -4530,14 +4545,18 @@ knote_next_kevq(struct knote *kn)
 			if (sargs > 0) {
 				KVLST_RLOCK(kq);
 				other_kevq = kevq_vec_select_kevq(&kq->kevq_vlist, sargs);
-				KVLST_RUNLOCK(kq);

 				if (next_kevq == NULL || (other_kevq != NULL && kevq_lat_wcmp(next_kevq, other_kevq, 90) > 0)) {
 					next_kevq = other_kevq;
 				}
 			}

-			next_kevq = kevq_lock_check_avail(next_kevq);
+ 			next_kevq = kevq_lock_check_avail(next_kevq);
+
+			/* need to unlock after kevq lock acquire because other_kevq might be drained too */
+			if (sargs > 0) {
+				KVLST_RUNLOCK(kq);
+			}
 			KQD_RUNLOCK(kqd);

 			if (kqd_mismatch && next_kevq != NULL) {
@ -4583,8 +4602,10 @@ knote_next_kevq(struct knote *kn)
 		CTR2(KTR_KQ, "knote_next_kevq: [RAND] next kevq %p for kn %p", next_kevq, kn);
 	}

-	if (next_kevq != NULL)
+	if (next_kevq != NULL) {
 		KEVQ_OWNED(next_kevq);
+		next_kevq->kevq_tot_sched++;
+	}

 	return next_kevq;
 }
--- a/sys/sys/eventvar.h
+++ b/sys/sys/eventvar.h
@ -49,6 +49,31 @@
 #define KQDIR_INACTIVE (1)

 struct kevq {
+	/* 1st cacheline */
+	/* Sched stats */
+	uint64_t kevq_avg_lat;
+	uint64_t kevq_avg_ev;
+	uint64_t kevq_tot_ev;
+	uint64_t kevq_tot_time;
+	uint64_t kevq_tot_syscall;
+	uint64_t kevq_last_kev;
+	uint32_t kevq_last_nkev;
+#define KEVQ_SLEEP	0x01
+#define KEVQ_CLOSING  0x02
+#define KEVQ_ACTIVE	0x04
+#define KEVQ_WS 0x08 /* the kevq is work stealing */
+	int		kevq_state;
+	int		kn_count;				/* number of pending knotes */
+	int		kn_rt_count;			/* number of runtime knotes */
+
+	/* 2nd cacheline */
+	uint64_t kevq_tot_ws;
+	/* TODO: maybe these should be in kqdomain or global */
+	uint64_t kevq_tot_fallback;
+	uint64_t kevq_tot_kqd_mismatch;
+	uint64_t kevq_tot_sched;
+	uint64_t kevq_tot_realtime;
+
 	LIST_ENTRY(kevq)	kevq_th_e; /* entry into kevq_thred's hashtable */
 	LIST_ENTRY(kevq)	kq_e; /* entry into kq */
 	LIST_ENTRY(kevq)	kevq_th_tqe; /* entry into kevq_thred's kevq_list */
@ -61,29 +86,7 @@ struct kevq {
 	struct 		knote kn_marker;	
 	struct		ktailq kn_rt_head; /* list of pending knotes with runtime priority */
 	struct		knote kn_marker_rt;	
-	int		kn_count;				/* number of pending knotes */
-	int		kn_rt_count;			/* number of runtime knotes */
-
-#define KEVQ_SLEEP	0x01
-#define KEVQ_CLOSING  0x02
-#define KEVQ_ACTIVE	0x04
-#define KEVQ_WS 0x08 /* the kevq is work stealing */
-	int		kevq_state;
 	int		kevq_refcnt;
-
-	uint64_t kevq_last_kev;
-	uint64_t kevq_last_nkev;
-	/* Sched stats */
-	uint64_t kevq_avg_lat;
-	uint64_t kevq_avg_ev;
-	uint64_t kevq_tot_ev;
-	uint64_t kevq_tot_time;
-	uint64_t kevq_tot_syscall;
-	uint64_t kevq_tot_ws;
-
-	/* TODO: maybe these should be in kqdomain or global */
-	uint64_t kevq_tot_fallback;
-	uint64_t kevq_tot_kqd_mismatch;
 };

 /* TODO: assumed that threads don't get rescheduled across cores */
--- a/tests/sys/kqueue/libkqueue/read_m.c
+++ b/tests/sys/kqueue/libkqueue/read_m.c
@ -325,12 +325,14 @@ test_socket_queue(void)
            tid++;
            group[i][j].evcnt = 0;
            group[i][j].group_id = i;
-            pthread_create(&group[i][j].thrd, NULL, test_socket_queue_thrd, &group[i][j]);
+            pthread_attr_t attr;
+            pthread_attr_init(&attr);
            CPU_ZERO(&cpuset);
            CPU_SET(i, &cpuset);
-            if (pthread_setaffinity_np(group[i][j].thrd, sizeof(cpuset_t), &cpuset) < 0) {
+            if (pthread_attr_setaffinity_np(&attr, sizeof(cpuset_t), &cpuset) < 0) {
                err(1, "thread_affinity");
            }
+            pthread_create(&group[i][j].thrd, &attr, test_socket_queue_thrd, &group[i][j]);
 #ifdef TEST_DEBUG
    printf("READ_M: created and affinitized thread %d to core group %d\n", group[i][j].tid, i);
 #endif
@ -636,10 +638,10 @@ test_socket_ws_timeout()
 /***************************
 * Brutal test
 ***************************/
-#define THREAD_BRUTE_CNT (16)
-#define SOCK_BRUTE_CNT (128)
-#define PACKET_BRUTE_CNT (50 * (SOCK_BRUTE_CNT))
-#define THREAD_EXIT_PROB (67)
+#define THREAD_BRUTE_CNT (8)
+#define SOCK_BRUTE_CNT (512)
+#define PACKET_BRUTE_CNT (100 * (SOCK_BRUTE_CNT))
+#define THREAD_EXIT_PROB (50)
 #define BRUTE_REALTIME_PROB (50)
 #define BRUTE_MAX_FREQ (10000)
 #define BRUTE_MIN_FREQ (1)
@ -776,7 +778,7 @@ test_socket_brutal(char* name)
        int error;
        int val;

-        val = KQTUNE_MAKE(KQTUNE_RTSHARE, rand() % 100);
+        val = KQTUNE_MAKE(KQTUNE_RTSHARE, (rand() % 100) + 1);
        error = ioctl(g_kqfd, FKQTUNE, &val);
        if (error == -1) {
            err(1, "ioctl TUNE");
@ -894,7 +896,7 @@ test_socket_realtime()
        socket_push(socks[i][1], '.');
    }

-    for (int i = 0; i <= 100; i++) {
+    for (int i = 1; i <= 100; i++) {
        test_socket_rt_share(kqfd, 4, i);
    }

@ -933,10 +935,32 @@ test_evfilt_read_m()
    if (error == -1) {
        err(1, "ioctl");
    }
-    test_socket_queue();
+    //test_socket_queue();
    test_socket_brutal("queue0");
    close(g_kqfd);

+    /* Queue + bo2*/
+    flags = KQSCHED_MAKE(KQ_SCHED_QUEUE,2, 0, 0);
+    g_kqfd = kqueue();
+    error = ioctl(g_kqfd, FKQMULTI, &flags);
+    if (error == -1) {
+        err(1, "ioctl");
+    }
+    //test_socket_queue();
+    test_socket_brutal("queue2");
+    close(g_kqfd);
+
+    /* Queue + bo2 + ws */
+    flags = KQSCHED_MAKE(KQ_SCHED_QUEUE,2, KQ_SCHED_FEAT_WS, 1);
+    g_kqfd = kqueue();
+    error = ioctl(g_kqfd, FKQMULTI, &flags);
+    if (error == -1) {
+        err(1, "ioctl");
+    }
+    //test_socket_queue();
+    test_socket_brutal("queue2_ws");
+    close(g_kqfd);
+
    /* CPU + Bo0 */
    flags = KQSCHED_MAKE(KQ_SCHED_CPU,0,0,0);;
    g_kqfd = kqueue();
@ -947,16 +971,6 @@ test_evfilt_read_m()
    test_socket_brutal("cpu0");
    close(g_kqfd);

-    /* CPU + Bo1 */
-    flags = KQSCHED_MAKE(KQ_SCHED_CPU,1,0,0);;
-    g_kqfd = kqueue();
-    error = ioctl(g_kqfd, FKQMULTI, &flags);
-    if (error == -1) {
-        err(1, "ioctl");
-    }
-    test_socket_brutal("cpu1");
-    close(g_kqfd);
-
    /* CPU + Bo2 */
    flags = KQSCHED_MAKE(KQ_SCHED_CPU,2,0,0);
    g_kqfd = kqueue();