Allocate TIDs in thread_init() and deallocate them in thread_fini().

The overhead of unconditionally allocating TIDs (and likewise, unconditionally deallocating them), is amortized across multiple thread creations by the way UMA makes it possible to have type-stable storage. Previously the cost was kept down by having threads created as part of a fork operation use the process' PID as the TID. While this had some nice properties, it also introduced complexity in the way TIDs were allocated. Most importantly, by using the type-stable storage that UMA gives us this was also unnecessary. This change affects how core dumps are created and in particular how the PRSTATUS notes are dumped. Since we don't have a thread with a TID equalling the PID, we now need a different way to preserve the old and previous behavior. We do this by having the given thread (i.e. the thread passed to the core dump code in td) dump it's state first and fill in pr_pid with the actual PID. All other threads will have pr_pid contain their TIDs. The upshot of all this is that the debugger will now likely select the right LWP (=TID) as the initial thread. Credits to: julian@ for spotting how we can utilize UMA. Thanks to: all who provided julian@ with test results.
2004-06-26 18:58:22 +00:00 · 2004-06-26 18:58:22 +00:00 · 247aba2474
commit 247aba2474
parent 3f9d1ef905
6 changed files with 89 additions and 110 deletions
--- a/sys/kern/imgact_elf.c
+++ b/sys/kern/imgact_elf.c
@ -910,10 +910,10 @@ struct sseg_closure {

 static void cb_put_phdr(vm_map_entry_t, void *);
 static void cb_size_segment(vm_map_entry_t, void *);
-static void each_writable_segment(struct proc *, segment_callback, void *);
+static void each_writable_segment(struct thread *, segment_callback, void *);
 static int __elfN(corehdr)(struct thread *, struct vnode *, struct ucred *,
    int, void *, size_t);
-static void __elfN(puthdr)(struct proc *, void *, size_t *, int);
+static void __elfN(puthdr)(struct thread *, void *, size_t *, int);
 static void __elfN(putnote)(void *, size_t *, const char *, int,
    const void *, size_t);

@ -922,11 +922,10 @@ extern int osreldate;
 int
 __elfN(coredump)(td, vp, limit)
 	struct thread *td;
-	register struct vnode *vp;
+	struct vnode *vp;
 	off_t limit;
 {
-	register struct proc *p = td->td_proc;
-	register struct ucred *cred = td->td_ucred;
+	struct ucred *cred = td->td_ucred;
 	int error = 0;
 	struct sseg_closure seginfo;
 	void *hdr;
@ -935,7 +934,7 @@ __elfN(coredump)(td, vp, limit)
 	/* Size the program segments. */
 	seginfo.count = 0;
 	seginfo.size = 0;
-	each_writable_segment(p, cb_size_segment, &seginfo);
+	each_writable_segment(td, cb_size_segment, &seginfo);

 	/*
 	 * Calculate the size of the core file header area by making
@ -943,7 +942,7 @@ __elfN(coredump)(td, vp, limit)
 	 * size is calculated.
 	 */
 	hdrsize = 0;
-	__elfN(puthdr)(p, (void *)NULL, &hdrsize, seginfo.count);
+	__elfN(puthdr)(td, (void *)NULL, &hdrsize, seginfo.count);

 	if (hdrsize + seginfo.size >= limit)
 		return (EFAULT);
@ -1036,11 +1035,12 @@ cb_size_segment(entry, closure)
 * caller-supplied data.
 */
 static void
-each_writable_segment(p, func, closure)
-	struct proc *p;
+each_writable_segment(td, func, closure)
+	struct thread *td;
 	segment_callback func;
 	void *closure;
 {
+	struct proc *p = td->td_proc;
 	vm_map_t map = &p->p_vmspace->vm_map;
 	vm_map_entry_t entry;

@ -1103,13 +1103,12 @@ __elfN(corehdr)(td, vp, cred, numsegs, hdr, hdrsize)
 	size_t hdrsize;
 	void *hdr;
 {
-	struct proc *p = td->td_proc;
 	size_t off;

 	/* Fill in the header. */
 	bzero(hdr, hdrsize);
 	off = 0;
-	__elfN(puthdr)(p, hdr, &off, numsegs);
+	__elfN(puthdr)(td, hdr, &off, numsegs);

 	/* Write it to the core file. */
 	return (vn_rdwr_inchunks(UIO_WRITE, vp, hdr, hdrsize, (off_t)0,
@ -1118,7 +1117,7 @@ __elfN(corehdr)(td, vp, cred, numsegs, hdr, hdrsize)
 }

 static void
-__elfN(puthdr)(struct proc *p, void *dst, size_t *off, int numsegs)
+__elfN(puthdr)(struct thread *td, void *dst, size_t *off, int numsegs)
 {
 	struct {
 		prstatus_t status;
@ -1128,9 +1127,12 @@ __elfN(puthdr)(struct proc *p, void *dst, size_t *off, int numsegs)
 	prstatus_t *status;
 	prfpregset_t *fpregset;
 	prpsinfo_t *psinfo;
-	struct thread *first, *thr;
+	struct proc *p;
+	struct thread *thr;
 	size_t ehoff, noteoff, notesz, phoff;

+	p = td->td_proc;
+
 	ehoff = *off;
 	*off += sizeof(Elf_Ehdr);

@ -1169,22 +1171,16 @@ __elfN(puthdr)(struct proc *p, void *dst, size_t *off, int numsegs)
 	    sizeof *psinfo);

 	/*
-	 * We want to start with the registers of the initial thread in the
-	 * process so that the .reg and .reg2 pseudo-sections created by bfd
-	 * will be identical to the .reg/$PID and .reg2/$PID pseudo-sections.
-	 * This makes sure that any tool that only looks for .reg and .reg2
-	 * and not for .reg/$PID and .reg2/$PID will behave the same as
-	 * before. The first thread is the thread with an ID equal to the
-	 * process' ID.
-	 * Note that the initial thread may already be gone. In that case
-	 * 'first' is NULL.
+	 * For backward compatibility, we dump the registers of the current
+	 * thread (as passed to us in td) first and set pr_pid to the PID of
+	 * the process.  We then dump the other threads, but with pr_pid set
+	 * to the TID of the thread itself. This has two advantages:
+	 * 1) We preserve the meaning of pr_pid for as much as is possible.
+	 * 2) The debugger will select the current thread as its initial
+	 *    "thread", which is likely what we want.
 	 */
-	thr = first = TAILQ_FIRST(&p->p_threads);
-	while (first != NULL && first->td_tid > PID_MAX)
-		first = TAILQ_NEXT(first, td_plist);
-	if (first != NULL)
-		thr = first;
-	do {
+	thr = td;
+	while (thr != NULL) {
 		if (dst != NULL) {
 			status->pr_version = PRSTATUS_VERSION;
 			status->pr_statussz = sizeof(prstatus_t);
@ -1192,7 +1188,7 @@ __elfN(puthdr)(struct proc *p, void *dst, size_t *off, int numsegs)
 			status->pr_fpregsetsz = sizeof(fpregset_t);
 			status->pr_osreldate = osreldate;
 			status->pr_cursig = p->p_sig;
-			status->pr_pid = thr->td_tid;
+			status->pr_pid = (thr == td) ? p->p_pid : thr->td_tid;
 			fill_regs(thr, &status->pr_reg);
 			fill_fpregs(thr, fpregset);
 		}
@ -1200,12 +1196,14 @@ __elfN(puthdr)(struct proc *p, void *dst, size_t *off, int numsegs)
 		    sizeof *status);
 		__elfN(putnote)(dst, off, "FreeBSD", NT_FPREGSET, fpregset,
 		    sizeof *fpregset);
+
 		/* XXX allow for MD specific notes. */
-		thr = (thr == first) ? TAILQ_FIRST(&p->p_threads) :
+
+		thr = (thr == td) ? TAILQ_FIRST(&p->p_threads) :
 		    TAILQ_NEXT(thr, td_plist);
-		if (thr == first && thr != NULL)
+		if (thr == td)
 			thr = TAILQ_NEXT(thr, td_plist);
-	} while (thr != NULL);
+	}

 	notesz = *off - noteoff;

@ -1266,7 +1264,7 @@ __elfN(puthdr)(struct proc *p, void *dst, size_t *off, int numsegs)
 		/* All the writable segments from the program. */
 		phc.phdr = phdr;
 		phc.offset = *off;
-		each_writable_segment(p, cb_put_phdr, &phc);
+		each_writable_segment(td, cb_put_phdr, &phc);
 	}
 }

--- a/sys/kern/kern_fork.c
+++ b/sys/kern/kern_fork.c
@ -493,7 +493,6 @@ fork1(td, flags, pages, procp)
 	    (unsigned) RANGEOF(struct ksegrp, kg_startcopy, kg_endcopy));
 #undef RANGEOF

-	td2->td_tid = p2->p_pid;
 	td2->td_sigstk = td->td_sigstk;

 	/* Set up the thread as an active thread (as if runnable). */
--- a/sys/kern/kern_kse.c
+++ b/sys/kern/kern_kse.c
@ -872,10 +872,8 @@ thread_alloc_spare(struct thread *td, struct thread *spare)

 	if (td->td_standin)
 		return;
-	if (spare == NULL) {
+	if (spare == NULL)
 		spare = thread_alloc();
-		spare->td_tid = thread_new_tid();
-	}
 	td->td_standin = spare;
 	bzero(&spare->td_startzero,
 	    (unsigned)RANGEOF(struct thread, td_startzero, td_endzero));
--- a/sys/kern/kern_thr.c
+++ b/sys/kern/kern_thr.c
@ -130,7 +130,6 @@ thr_create(struct thread *td, struct thr_create_args *uap)

 	/* Initialize our td. */
 	td0 = thread_alloc();
-	td0->td_tid = thread_new_tid();

 	/*
 	 * Try the copyout as soon as we allocate the td so we don't have to
--- a/sys/kern/kern_thread.c
+++ b/sys/kern/kern_thread.c
@ -149,7 +149,6 @@ thread_ctor(void *mem, int size, void *arg)
 	struct thread	*td;

 	td = (struct thread *)mem;
-	td->td_tid = 0;
 	td->td_state = TDS_INACTIVE;
 	td->td_oncpu	= NOCPU;

@ -175,28 +174,9 @@ static void
 thread_dtor(void *mem, int size, void *arg)
 {
 	struct thread *td;
-	struct tid_bitmap_part *bmp;
-	lwpid_t tid;
-	int bit, idx;

 	td = (struct thread *)mem;

-	if (td->td_tid > PID_MAX) {
-		STAILQ_FOREACH(bmp, &tid_bitmap, bmp_next) {
-			if (td->td_tid >= bmp->bmp_base &&
-			    td->td_tid < bmp->bmp_base + TID_IDS_PER_PART)
-				break;
-		}
-		KASSERT(bmp != NULL, ("No TID bitmap?"));
-		mtx_lock(&tid_lock);
-		tid = td->td_tid - bmp->bmp_base;
-		idx = tid / TID_IDS_PER_IDX;
-		bit = 1UL << (tid % TID_IDS_PER_IDX);
-		bmp->bmp_bitmap[idx] |= bit;
-		bmp->bmp_free++;
-		mtx_unlock(&tid_lock);
-	}
-
 #ifdef INVARIANTS
 	/* Verify that this thread is in a safe state to free. */
 	switch (td->td_state) {
@ -225,9 +205,47 @@ thread_dtor(void *mem, int size, void *arg)
 static void
 thread_init(void *mem, int size)
 {
-	struct thread	*td;
+	struct thread *td;
+	struct tid_bitmap_part *bmp, *new;
+	int bit, idx;

 	td = (struct thread *)mem;
+
+	mtx_lock(&tid_lock);
+	STAILQ_FOREACH(bmp, &tid_bitmap, bmp_next) {
+		if (bmp->bmp_free)
+			break;
+	}
+	/* Create a new bitmap if we run out of free bits. */
+	if (bmp == NULL) {
+		mtx_unlock(&tid_lock);
+		new = uma_zalloc(tid_zone, M_WAITOK);
+		mtx_lock(&tid_lock);
+		bmp = STAILQ_LAST(&tid_bitmap, tid_bitmap_part, bmp_next);
+		if (bmp == NULL || bmp->bmp_free < TID_IDS_PER_PART/2) {
+			/* 1=free, 0=assigned. This way we can use ffsl(). */
+			memset(new->bmp_bitmap, ~0U, sizeof(new->bmp_bitmap));
+			new->bmp_base = (bmp == NULL) ? TID_MIN :
+			    bmp->bmp_base + TID_IDS_PER_PART;
+			new->bmp_free = TID_IDS_PER_PART;
+			STAILQ_INSERT_TAIL(&tid_bitmap, new, bmp_next);
+			bmp = new;
+			new = NULL;
+		}
+	} else
+		new = NULL;
+	/* We have a bitmap with available IDs. */
+	idx = 0;
+	while (idx < TID_BITMAP_SIZE && bmp->bmp_bitmap[idx] == 0UL)
+		idx++;
+	bit = ffsl(bmp->bmp_bitmap[idx]) - 1;
+	td->td_tid = bmp->bmp_base + idx * TID_IDS_PER_IDX + bit;
+	bmp->bmp_bitmap[idx] &= ~(1UL << bit);
+	bmp->bmp_free--;
+	mtx_unlock(&tid_lock);
+	if (new != NULL)
+		uma_zfree(tid_zone, new);
+
 	vm_thread_new(td, 0);
 	cpu_thread_setup(td);
 	td->td_sleepqueue = sleepq_alloc();
@ -241,12 +259,29 @@ thread_init(void *mem, int size)
 static void
 thread_fini(void *mem, int size)
 {
-	struct thread	*td;
+	struct thread *td;
+	struct tid_bitmap_part *bmp;
+	lwpid_t tid;
+	int bit, idx;

 	td = (struct thread *)mem;
 	turnstile_free(td->td_turnstile);
 	sleepq_free(td->td_sleepqueue);
 	vm_thread_dispose(td);
+
+	STAILQ_FOREACH(bmp, &tid_bitmap, bmp_next) {
+		if (td->td_tid >= bmp->bmp_base &&
+		    td->td_tid < bmp->bmp_base + TID_IDS_PER_PART)
+			break;
+	}
+	KASSERT(bmp != NULL, ("No TID bitmap?"));
+	mtx_lock(&tid_lock);
+	tid = td->td_tid - bmp->bmp_base;
+	idx = tid / TID_IDS_PER_IDX;
+	bit = 1UL << (tid % TID_IDS_PER_IDX);
+	bmp->bmp_bitmap[idx] |= bit;
+	bmp->bmp_free++;
+	mtx_unlock(&tid_lock);
 }

 /*
@ -535,55 +570,6 @@ thread_free(struct thread *td)
 	uma_zfree(thread_zone, td);
 }

-/*
- * Assign a thread ID.
- */
-lwpid_t
-thread_new_tid(void)
-{
-	struct tid_bitmap_part *bmp, *new;
-	lwpid_t tid;
-	int bit, idx;
-
-	mtx_lock(&tid_lock);
-	STAILQ_FOREACH(bmp, &tid_bitmap, bmp_next) {
-		if (bmp->bmp_free)
-			break;
-	}
-	/* Create a new bitmap if we run out of free bits. */
-	if (bmp == NULL) {
-		mtx_unlock(&tid_lock);
-		new = uma_zalloc(tid_zone, M_WAITOK);
-		mtx_lock(&tid_lock);
-		bmp = STAILQ_LAST(&tid_bitmap, tid_bitmap_part, bmp_next);
-		if (bmp == NULL || bmp->bmp_free < TID_IDS_PER_PART/2) {
-			/* 1=free, 0=assigned. This way we can use ffsl(). */
-			memset(new->bmp_bitmap, ~0U, sizeof(new->bmp_bitmap));
-			new->bmp_base = (bmp == NULL) ? TID_MIN :
-			    bmp->bmp_base + TID_IDS_PER_PART;
-			new->bmp_free = TID_IDS_PER_PART;
-			STAILQ_INSERT_TAIL(&tid_bitmap, new, bmp_next);
-			bmp = new;
-			new = NULL;
-		}
-	} else
-		new = NULL;
-	/* We have a bitmap with available IDs. */
-	idx = 0;
-	while (idx < TID_BITMAP_SIZE && bmp->bmp_bitmap[idx] == 0UL)
-		idx++;
-	bit = ffsl(bmp->bmp_bitmap[idx]) - 1;
-	tid = bmp->bmp_base + idx * TID_IDS_PER_IDX + bit;
-	bmp->bmp_bitmap[idx] &= ~(1UL << bit);
-	bmp->bmp_free--;
-	mtx_unlock(&tid_lock);
-
-	if (new != NULL)
-		uma_zfree(tid_zone, new);
-	return (tid);
-}
-
-
 /*
 * Discard the current thread and exit from its context.
 * Always called with scheduler locked.
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@ -903,7 +903,6 @@ void	thread_exit(void) __dead2;
 int	thread_export_context(struct thread *td, int willexit);
 void	thread_free(struct thread *td);
 void	thread_link(struct thread *td, struct ksegrp *kg);
-lwpid_t	thread_new_tid(void);
 void	thread_reap(void);
 struct thread *thread_schedule_upcall(struct thread *td, struct kse_upcall *ku);
 int	thread_single(int how);