Add a lwpid field into per-cpu structure, the lwpid represents current

running thread's id on each cpu. This allow us to add in-kernel adaptive
spin for user level mutex. While spinning in user space is possible,
without correct thread running state exported from kernel, it hardly
can be implemented efficiently without wasting cpu cycles, however
exporting thread running state unlikely will be implemented soon as
it has to design and stablize interfaces. This implementation is
transparent to user space, it can be disabled dynamically. With this
change, mutex ping-pong program's performance is improved massively on
SMP machine. performance of mysql super-smack select benchmark is increased
about 7% on Intel dual dual-core2 Xeon machine, it indicates on systems
which have bunch of cpus and system-call overhead is low (athlon64, opteron,
and core-2 are known to be fast), the adaptive spin does help performance.

Added sysctls:
    kern.threads.umtx_dflt_spins
        if the sysctl value is non-zero, a zero umutex.m_spincount will
        cause the sysctl value to be used a spin cycle count.
    kern.threads.umtx_max_spins
        the sysctl sets upper limit of spin cycle count.

Tested on: Athlon64 X2 3800+, Dual Xeon 5130
This commit is contained in:
David Xu 2006-12-20 04:40:39 +00:00
parent cd1b20d58a
commit 4e32b7b3cc
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=165369
10 changed files with 89 additions and 4 deletions

View File

@ -206,6 +206,11 @@ sw1:
movq %rbx, (%rax)
movq %rbx, PCPU(RSP0)
movl TD_TID(%rsi), %eax
movq %r8, PCPU(CURPCB)
movl %eax, PCPU(CURTID)
movq %rsi, PCPU(CURTHREAD) /* into next thread */
/* Restore context. */
movq PCB_RBX(%r8),%rbx
movq PCB_RSP(%r8),%rsp
@ -217,9 +222,6 @@ sw1:
movq PCB_RIP(%r8),%rax
movq %rax,(%rsp)
movq %r8, PCPU(CURPCB)
movq %rsi, PCPU(CURTHREAD) /* into next thread */
/* Test if debug registers should be restored. */
testl $PCB_DBREGS,PCB_FLAGS(%r8)
jz 1f

View File

@ -79,6 +79,7 @@ ASSYM(P_SFLAG, offsetof(struct proc, p_sflag));
ASSYM(TD_FLAGS, offsetof(struct thread, td_flags));
ASSYM(TD_PCB, offsetof(struct thread, td_pcb));
ASSYM(TD_PROC, offsetof(struct thread, td_proc));
ASSYM(TD_TID, offsetof(struct thread, td_tid));
ASSYM(TDF_ASTPENDING, TDF_ASTPENDING);
ASSYM(TDF_NEEDRESCHED, TDF_NEEDRESCHED);
@ -190,6 +191,7 @@ ASSYM(PC_SCRATCH_RSP, offsetof(struct pcpu, pc_scratch_rsp));
ASSYM(PC_CURPMAP, offsetof(struct pcpu, pc_curpmap));
ASSYM(PC_TSSP, offsetof(struct pcpu, pc_tssp));
ASSYM(PC_RSP0, offsetof(struct pcpu, pc_rsp0));
ASSYM(PC_CURTID, offsetof(struct pcpu, pc_curtid));
ASSYM(LA_VER, offsetof(struct LAPIC, version));
ASSYM(LA_TPR, offsetof(struct LAPIC, tpr));

View File

@ -1164,6 +1164,7 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
PCPU_SET(prvspace, pc);
PCPU_SET(curthread, &thread0);
PCPU_SET(curpcb, thread0.td_pcb);
PCPU_SET(curtid, thread0.td_tid);
PCPU_SET(tssp, &common_tss[0]);
/*

View File

@ -84,6 +84,7 @@ ASSYM(TD_FLAGS, offsetof(struct thread, td_flags));
ASSYM(TD_PCB, offsetof(struct thread, td_pcb));
ASSYM(TD_PROC, offsetof(struct thread, td_proc));
ASSYM(TD_MD, offsetof(struct thread, td_md));
ASSYM(TD_TID, offsetof(struct thread, td_tid));
ASSYM(P_MD, offsetof(struct proc, p_md));
ASSYM(MD_LDT, offsetof(struct mdproc, md_ldt));
@ -198,6 +199,7 @@ ASSYM(PC_CURRENTLDT, offsetof(struct pcpu, pc_currentldt));
ASSYM(PC_CPUID, offsetof(struct pcpu, pc_cpuid));
ASSYM(PC_CURPMAP, offsetof(struct pcpu, pc_curpmap));
ASSYM(PC_PRIVATE_TSS, offsetof(struct pcpu, pc_private_tss));
ASSYM(PC_CURTID, offsetof(struct pcpu, pc_curtid));
#ifdef DEV_APIC
ASSYM(LA_VER, offsetof(struct LAPIC, version));

View File

@ -2107,6 +2107,7 @@ init386(first)
PCPU_SET(prvspace, pc);
PCPU_SET(curthread, &thread0);
PCPU_SET(curpcb, thread0.td_pcb);
PCPU_SET(curtid, thread0.td_tid);
/*
* Initialize mutexes.

View File

@ -245,7 +245,9 @@ sw1:
popfl
movl %edx, PCPU(CURPCB)
movl TD_TID(%ecx),%eax
movl %ecx, PCPU(CURTHREAD) /* into next thread */
movl %eax, PCPU(CURTID)
/*
* Determine the LDT to use and load it if is the default one and

View File

@ -371,6 +371,7 @@ cpu_switch(struct thread *old, struct thread *new)
oldpcb->pcb_current_pmap =
pmap_switch(newpcb->pcb_current_pmap);
PCPU_SET(curthread, new);
PCPU_SET(curtid, new->td_tid);
#ifdef COMPAT_IA32
ia32_restorectx(newpcb);
#endif
@ -391,6 +392,7 @@ cpu_throw(struct thread *old __unused, struct thread *new)
newpcb = new->td_pcb;
(void)pmap_switch(newpcb->pcb_current_pmap);
PCPU_SET(curthread, new);
PCPU_SET(curtid, new->td_tid);
#ifdef COMPAT_IA32
ia32_restorectx(newpcb);
#endif
@ -609,6 +611,7 @@ ia64_init(void)
ia64_set_k4((u_int64_t)pcpup);
pcpu_init(pcpup, 0, sizeof(pcpu0));
PCPU_SET(curthread, &thread0);
PCPU_SET(curtid, thread0.td_tid);
/*
* Initialize the console before we print anything out.

View File

@ -38,6 +38,7 @@ __FBSDID("$FreeBSD$");
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/sched.h>
#include <sys/smp.h>
#include <sys/sysctl.h>
#include <sys/sysent.h>
#include <sys/systm.h>
@ -51,6 +52,8 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_map.h>
#include <vm/vm_object.h>
#include <machine/cpu.h>
#ifdef COMPAT_IA32
#include <compat/freebsd32/freebsd32_proto.h>
#endif
@ -190,6 +193,13 @@ static int umtx_pi_allocated;
SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug");
SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD,
&umtx_pi_allocated, 0, "Allocated umtx_pi");
SYSCTL_DECL(_kern_threads);
static int umtx_dflt_spins = 0;
SYSCTL_INT(_kern_threads, OID_AUTO, umtx_dflt_spins, CTLFLAG_RW,
&umtx_dflt_spins, 0, "default umtx spin count");
static int umtx_max_spins = 3000;
SYSCTL_INT(_kern_threads, OID_AUTO, umtx_max_spins, CTLFLAG_RW,
&umtx_max_spins, 0, "max umtx spin count");
static void umtxq_sysinit(void *);
static void umtxq_hash(struct umtx_key *key);
@ -1012,16 +1022,33 @@ _do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo,
{
struct umtx_q *uq;
uint32_t owner, old, id;
#ifdef SMP
int spincount;
#endif
int error = 0;
id = td->td_tid;
uq = td->td_umtxq;
#ifdef SMP
if (smp_cpus > 1) {
spincount = fuword32(&m->m_spincount);
if (spincount == 0)
spincount = umtx_dflt_spins;
if (spincount > umtx_max_spins)
spincount = umtx_max_spins;
} else
spincount = 0;
#endif
/*
* Care must be exercised when dealing with umtx structure. It
* can fault on any access.
*/
for (;;) {
#ifdef SMP
try_unowned:
#endif
/*
* Try the uncontested case. This should be done in userland.
*/
@ -1037,6 +1064,9 @@ _do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo,
/* If no one owns it but it is contested try to acquire it. */
if (owner == UMUTEX_CONTESTED) {
#ifdef SMP
try_contested:
#endif
owner = casuword32(&m->m_owner,
UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
@ -1058,6 +1088,46 @@ _do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo,
if (try != 0)
return (EBUSY);
#ifdef SMP
if (spincount > 0 && (owner & ~UMUTEX_CONTESTED) != id) {
int i, found = 0;
struct pcpu *pcpu = NULL;
/* Look for a cpu the owner is running on */
for (i = 0; i < MAXCPU; i++) {
if (CPU_ABSENT(i))
continue;
pcpu = pcpu_find(i);
if ((owner & ~UMUTEX_CONTESTED) == pcpu->pc_curtid) {
found = 1;
break;
}
}
if (__predict_false(!found))
goto end_spin;
while ((owner & ~UMUTEX_CONTESTED) == pcpu->pc_curtid &&
(owner & ~UMUTEX_CONTESTED) != id) {
if (--spincount <= 0)
break;
if ((td->td_flags &
(TDF_NEEDRESCHED|TDF_ASTPENDING|TDF_NEEDSIGCHK)) ||
P_SHOULDSTOP(td->td_proc))
break;
owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
if (owner == UMUTEX_UNOWNED)
goto try_unowned;
if (owner == UMUTEX_CONTESTED)
goto try_contested;
cpu_spinwait();
}
}
end_spin:
spincount = 0;
#endif
/*
* If we caught a signal, we have retried and now
* exit immediately.

View File

@ -74,6 +74,7 @@ struct pcpu {
PCPU_MD_FIELDS;
struct vmmeter pc_cnt; /* VM stats counters */
struct device *pc_device;
lwpid_t pc_curtid;
};
SLIST_HEAD(cpuhead, pcpu);

View File

@ -57,7 +57,8 @@ struct umutex {
volatile __lwpid_t m_owner; /* Owner of the mutex */
uint32_t m_flags; /* Flags of the mutex */
uint32_t m_ceilings[2]; /* Priority protect ceiling */
uint32_t m_spare[4]; /* Spare space */
uint32_t m_spincount; /* Max spinning cycle */
uint32_t m_spare[3];
};
struct ucond {