From 1d7b9ed2e6830e5d28f73f75e75aa235d40a65f4 Mon Sep 17 00:00:00 2001
From: Julian Elischer <julian@FreeBSD.org>
Date: Mon, 29 Jul 2002 18:33:32 +0000
Subject: [PATCH] Create a new thread state to describe threads that would be
 ready to run except for the fact tha they are presently swapped out. Also add
 a process flag to indicate that the process has started the struggle to swap
 back in. This will be  needed for the case where multiple threads start the
 swapin action top a collision. Also add code to stop a process fropm being
 swapped out if one of the threads in this process is actually off running on
 another CPU.. that might hurt...

Submitted by:	Seigo Tanimura <tanimura@r.dl.itc.u-tokyo.ac.jp>
---
 sys/fs/procfs/procfs_ctl.c |  2 +
 sys/kern/kern_condvar.c    |  1 +
 sys/kern/kern_synch.c      |  7 ++--
 sys/sys/proc.h             |  9 ++++-
 sys/vm/vm_glue.c           | 82 ++++++++++++++++++++++++++++++--------
 5 files changed, 80 insertions(+), 21 deletions(-)

diff --git a/sys/fs/procfs/procfs_ctl.c b/sys/fs/procfs/procfs_ctl.c
index 15ed7189d6ff..88ce6ddee384 100644
--- a/sys/fs/procfs/procfs_ctl.c
+++ b/sys/fs/procfs/procfs_ctl.c
@@ -144,7 +144,9 @@ procfs_control(struct thread *td, struct proc *p, int op)
 		 * Stop the target.
 		 */
 		p->p_flag |= P_TRACED;
+		mtx_lock_spin(&sched_lock);
 		faultin(p);
+		mtx_unlock_spin(&sched_lock);
 		p->p_xstat = 0;		/* XXX ? */
 		if (p->p_pptr != td->td_proc) {
 			p->p_oppid = p->p_pptr->p_pid;
diff --git a/sys/kern/kern_condvar.c b/sys/kern/kern_condvar.c
index e2bbbb4dcb4e..822a4c9ca62b 100644
--- a/sys/kern/kern_condvar.c
+++ b/sys/kern/kern_condvar.c
@@ -533,6 +533,7 @@ cv_wakeup(struct cv *cvp)
 			setrunqueue(td);
 			maybe_resched(td);
 		} else {
+			td->td_state = TDS_SWAPPED;
 			td->td_proc->p_sflag |= PS_SWAPINREQ;
 			wakeup(&proc0); /* XXXKSE */
 		}
diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c
index d55384256619..e4bef85d7d17 100644
--- a/sys/kern/kern_synch.c
+++ b/sys/kern/kern_synch.c
@@ -718,7 +718,7 @@ restart:
 					setrunqueue(td);
 					maybe_resched(td);
 				} else {
-/* XXXKSE Wrong! */			td->td_state = TDS_RUNQ;
+					td->td_state = TDS_SWAPPED;
 					p->p_sflag |= PS_SWAPINREQ;
 					wakeup(&proc0);
 				}
@@ -765,7 +765,7 @@ restart:
 					maybe_resched(td);
 					break;
 				} else {
-/* XXXKSE Wrong */			td->td_state = TDS_RUNQ;
+					td->td_state = TDS_SWAPPED;
 					p->p_sflag |= PS_SWAPINREQ;
 					wakeup(&proc0);
 				}
@@ -919,6 +919,7 @@ setrunnable(struct thread *td)
 	case 0:
 	case TDS_RUNNING:
 	case TDS_IWAIT:
+	case TDS_SWAPPED:
 	default:
 		printf("state is %d", td->td_state);
 		panic("setrunnable(2)");
@@ -939,7 +940,7 @@ setrunnable(struct thread *td)
 		updatepri(td);
 	td->td_ksegrp->kg_slptime = 0;
 	if ((p->p_sflag & PS_INMEM) == 0) {
-		td->td_state = TDS_RUNQ; /* XXXKSE not a good idea */
+		td->td_state = TDS_SWAPPED;
 		p->p_sflag |= PS_SWAPINREQ;
 		wakeup(&proc0);
 	} else {
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index f3dd3321e908..af931ab24263 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -306,7 +306,8 @@ struct thread {
 		TDS_RUNNING,
 		TDS_SUSPENDED,		/* would have liked to have run */
 		TDS_IWAIT,
-		TDS_SURPLUS
+		TDS_SURPLUS,
+		TDS_SWAPPED
 	} td_state;
 	struct callout	td_slpcallout;	/* (h) Callout for sleep. */
 	struct trapframe *td_frame;	/* (k) */
@@ -587,6 +588,7 @@ struct proc {
 #define	PS_SWAPINREQ	0x00100	/* Swapin request due to wakeup. */
 #define	PS_SWAPPING	0x00200	/* Process is being swapped. */
 #define	PS_NEEDSIGCHK	0x02000	/* Process may need signal delivery. */
+#define	PS_SWAPPINGIN	0x04000	/* Swapin in progress. */
 
 /* used only in legacy conversion code */
 #define SIDL	1		/* Process being created by fork. */
@@ -711,8 +713,11 @@ sigonstack(size_t sp)
 } while (0)
 #define	_PHOLD(p) do {							\
 	PROC_LOCK_ASSERT((p), MA_OWNED);				\
-	if ((p)->p_lock++ == 0)						\
+	if ((p)->p_lock++ == 0) {					\
+		mtx_lock_spin(&sched_lock);				\
 		faultin((p));						\
+		mtx_unlock_spin(&sched_lock);				\
+	}								\
 } while (0)
 
 #define	PRELE(p) do {							\
diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c
index ca1d582b9e30..2b4bbea278ea 100644
--- a/sys/vm/vm_glue.c
+++ b/sys/vm/vm_glue.c
@@ -480,7 +480,7 @@ faultin(p)
 
 	GIANT_REQUIRED;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
-	mtx_lock_spin(&sched_lock);
+	mtx_assert(&sched_lock, MA_OWNED);
 #ifdef NO_SWAPPING
 	if ((p->p_sflag & PS_INMEM) == 0)
 		panic("faultin: proc swapped out with NO_SWAPPING!");
@@ -489,6 +489,19 @@ faultin(p)
 		struct thread *td;
 
 		++p->p_lock;
+		/*
+		 * If another process is swapping in this process,
+		 * just wait until it finishes.
+		 */
+		if (p->p_sflag & PS_SWAPPINGIN) {
+			mtx_unlock_spin(&sched_lock);
+			msleep(&p->p_sflag, &p->p_mtx, PVM, "faultin", 0);
+			mtx_lock_spin(&sched_lock);
+			--p->p_lock;
+			return;
+		}
+
+		p->p_sflag |= PS_SWAPPINGIN;
 		mtx_unlock_spin(&sched_lock);
 		PROC_UNLOCK(p);
 
@@ -499,19 +512,17 @@ faultin(p)
 		PROC_LOCK(p);
 		mtx_lock_spin(&sched_lock);
 		FOREACH_THREAD_IN_PROC (p, td)
-			if (td->td_state == TDS_RUNQ) {	/* XXXKSE */
-				/* XXXKSE TDS_RUNQ causes assertion failure. */
-				td->td_state = TDS_UNQUEUED;
+			if (td->td_state == TDS_SWAPPED)	/* XXXKSE */
 				setrunqueue(td);
-			}
 
+		p->p_sflag &= ~PS_SWAPPINGIN;
 		p->p_sflag |= PS_INMEM;
+		wakeup(&p->p_sflag);
 
 		/* undo the effect of setting SLOCK above */
 		--p->p_lock;
 	}
 #endif
-	mtx_unlock_spin(&sched_lock);
 }
 
 /*
@@ -548,13 +559,16 @@ loop:
 	sx_slock(&allproc_lock);
 	FOREACH_PROC_IN_SYSTEM(p) {
 		struct ksegrp *kg;
-		if (p->p_sflag & (PS_INMEM | PS_SWAPPING)) {
+		if (p->p_sflag & (PS_INMEM | PS_SWAPPING | PS_SWAPPINGIN)) {
 			continue;
 		}
 		mtx_lock_spin(&sched_lock);
 		FOREACH_THREAD_IN_PROC(p, td) {
-			/* Only consider runnable threads */
-			if (td->td_state == TDS_RUNQ) {
+			/*
+			 * A runnable thread of a process swapped out is in
+			 * TDS_SWAPPED.
+			 */
+			if (td->td_state == TDS_SWAPPED) {
 				kg = td->td_ksegrp;
 				pri = p->p_swtime + kg->kg_slptime;
 				if ((p->p_sflag & PS_SWAPINREQ) == 0) {
@@ -584,18 +598,28 @@ loop:
 		tsleep(&proc0, PVM, "sched", maxslp * hz / 2);
 		goto loop;
 	}
+	PROC_LOCK(p);
 	mtx_lock_spin(&sched_lock);
+
+	/*
+	 * Another process may be bringing or may have already
+	 * brought this process in while we traverse all threads.
+	 * Or, this process may even be being swapped out again.
+	 */
+	if (p->p_sflag & (PS_INMEM|PS_SWAPPING|PS_SWAPPINGIN)) {
+		mtx_unlock_spin(&sched_lock);
+		PROC_UNLOCK(p);
+		goto loop;
+	}
+
 	p->p_sflag &= ~PS_SWAPINREQ;
-	mtx_unlock_spin(&sched_lock);
 
 	/*
 	 * We would like to bring someone in. (only if there is space).
 	 * [What checks the space? ]
 	 */
-	PROC_LOCK(p);
 	faultin(p);
 	PROC_UNLOCK(p);
-	mtx_lock_spin(&sched_lock);
 	p->p_swtime = 0;
 	mtx_unlock_spin(&sched_lock);
 	goto loop;
@@ -660,7 +684,7 @@ retry:
 		 */
 		vm = p->p_vmspace;
 		mtx_lock_spin(&sched_lock);
-		if ((p->p_sflag & (PS_INMEM|PS_SWAPPING)) != PS_INMEM) {
+		if ((p->p_sflag & (PS_INMEM|PS_SWAPPING|PS_SWAPPINGIN)) != PS_INMEM) {
 			mtx_unlock_spin(&sched_lock);
 			PROC_UNLOCK(p);
 			continue;
@@ -697,8 +721,18 @@ retry:
 					PROC_UNLOCK(p);
 					goto nextproc;
 				}
+				/*
+				 * Do not swapout a process if there is
+				 * a thread whose pageable memory may
+				 * be accessed.
+				 *
+				 * This could be refined to support
+				 * swapping out a thread.
+				 */
 				FOREACH_THREAD_IN_PROC(p, td) {
-					if ((td->td_priority) < PSOCK) {
+					if ((td->td_priority) < PSOCK ||
+					    !(td->td_state == TDS_SLP ||
+					     td->td_state == TDS_RUNQ)) {
 						mtx_unlock_spin(&sched_lock);
 						PROC_UNLOCK(p);
 						goto nextproc;
@@ -773,19 +807,35 @@ swapout(p)
 #if defined(SWAP_DEBUG)
 	printf("swapping out %d\n", p->p_pid);
 #endif
+	mtx_lock_spin(&sched_lock);
+
+	/*
+	 * Make sure that all threads are safe to be swapped out.
+	 *
+	 * Alternatively, we could swap out only safe threads.
+	 */
+	FOREACH_THREAD_IN_PROC(p, td) {
+		if (!(td->td_state == TDS_SLP ||
+		     td->td_state == TDS_RUNQ)) {
+			mtx_unlock_spin(&sched_lock);
+			return;
+		}
+	}
+
 	++p->p_stats->p_ru.ru_nswap;
 	/*
 	 * remember the process resident count
 	 */
 	p->p_vmspace->vm_swrss = vmspace_resident_count(p->p_vmspace);
 
-	mtx_lock_spin(&sched_lock);
 	p->p_sflag &= ~PS_INMEM;
 	p->p_sflag |= PS_SWAPPING;
 	PROC_UNLOCK(p);
 	FOREACH_THREAD_IN_PROC (p, td)
-		if (td->td_state == TDS_RUNQ)	/* XXXKSE */
+		if (td->td_state == TDS_RUNQ) {	/* XXXKSE */
 			remrunqueue(td);	/* XXXKSE */
+			td->td_state = TDS_SWAPPED;
+		}
 	mtx_unlock_spin(&sched_lock);
 
 	vm_proc_swapout(p);