amd64: move pcb out of kstack to struct thread.
This saves 320 bytes of the precious stack space. The only negative aspect of the change I can think of is that the struct thread increased by 320 bytes obviously, and that 320 bytes are not swapped out anymore. I believe the freed stack space is much more important than that. Also, current struct thread size is 1392 bytes on amd64, so UMA will allocate two thread structures per (4KB) slab, which leaves a space for pcb without increasing zone memory use. Reviewed by: alc, markj Tested by: pho Sponsored by: The FreeBSD Foundation MFC after: 2 weeks Differential revision: https://reviews.freebsd.org/D22138
This commit is contained in:
parent
2fc0413f87
commit
b01d1a3a2f
@ -74,7 +74,7 @@ END(cpu_throw)
|
||||
*/
|
||||
ENTRY(cpu_switch)
|
||||
/* Switch to new thread. First, save context. */
|
||||
movq TD_PCB(%rdi),%r8
|
||||
leaq TD_MD_PCB(%rdi),%r8
|
||||
|
||||
movq (%rsp),%rax /* Hardware registers */
|
||||
movq %r15,PCB_R15(%r8)
|
||||
@ -140,7 +140,7 @@ ctx_switch_xsave:
|
||||
callq pmap_activate_sw
|
||||
movq %r15,TD_LOCK(%r13) /* Release the old thread */
|
||||
sw1:
|
||||
movq TD_PCB(%r12),%r8
|
||||
leaq TD_MD_PCB(%r12),%r8
|
||||
#if defined(SCHED_ULE) && defined(SMP)
|
||||
movq $blocked_lock, %rdx
|
||||
movq TD_LOCK(%r12),%rcx
|
||||
@ -193,11 +193,12 @@ do_kthread:
|
||||
cmpq %rax,%rdx
|
||||
jne do_tss
|
||||
done_tss:
|
||||
movq %r8,PCPU(RSP0)
|
||||
movq TD_MD_STACK_BASE(%r12),%r9
|
||||
movq %r9,PCPU(RSP0)
|
||||
movq %r8,PCPU(CURPCB)
|
||||
movq PCPU(PTI_RSP0),%rax
|
||||
cmpq $~0,PCPU(UCR3)
|
||||
cmove %r8,%rax
|
||||
cmove %r9,%rax
|
||||
movq %rax,TSS_RSP0(%rdx)
|
||||
movq %r12,PCPU(CURTHREAD) /* into next thread */
|
||||
|
||||
|
@ -87,6 +87,8 @@ ASSYM(TD_PFLAGS, offsetof(struct thread, td_pflags));
|
||||
ASSYM(TD_PROC, offsetof(struct thread, td_proc));
|
||||
ASSYM(TD_FRAME, offsetof(struct thread, td_frame));
|
||||
ASSYM(TD_MD, offsetof(struct thread, td_md));
|
||||
ASSYM(TD_MD_PCB, offsetof(struct thread, td_md.md_pcb));
|
||||
ASSYM(TD_MD_STACK_BASE, offsetof(struct thread, td_md.md_stack_base));
|
||||
|
||||
ASSYM(TDF_ASTPENDING, TDF_ASTPENDING);
|
||||
ASSYM(TDF_NEEDRESCHED, TDF_NEEDRESCHED);
|
||||
|
@ -1789,12 +1789,12 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
|
||||
amd64_conf_fast_syscall();
|
||||
|
||||
/*
|
||||
* Temporary forge some valid pointer to PCB, for exception
|
||||
* handlers. It is reinitialized properly below after FPU is
|
||||
* set up. Also set up td_critnest to short-cut the page
|
||||
* fault handler.
|
||||
* We initialize the PCB pointer early so that exception
|
||||
* handlers will work. Also set up td_critnest to short-cut
|
||||
* the page fault handler.
|
||||
*/
|
||||
cpu_max_ext_state_size = sizeof(struct savefpu);
|
||||
set_top_of_stack_td(&thread0);
|
||||
thread0.td_pcb = get_pcb_td(&thread0);
|
||||
thread0.td_critnest = 1;
|
||||
|
||||
@ -1850,11 +1850,10 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
|
||||
fpuinit();
|
||||
|
||||
/*
|
||||
* Set up thread0 pcb after fpuinit calculated pcb + fpu save
|
||||
* Set up thread0 pcb save area after fpuinit calculated fpu save
|
||||
* area size. Zero out the extended state header in fpu save
|
||||
* area.
|
||||
*/
|
||||
thread0.td_pcb = get_pcb_td(&thread0);
|
||||
thread0.td_pcb->pcb_save = get_pcb_user_save_td(&thread0);
|
||||
bzero(get_pcb_user_save_td(&thread0), cpu_max_ext_state_size);
|
||||
if (use_xsave) {
|
||||
@ -1863,7 +1862,7 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
|
||||
xhdr->xstate_bv = xsave_mask;
|
||||
}
|
||||
/* make an initial tss so cpu can get interrupt stack on syscall! */
|
||||
rsp0 = (vm_offset_t)thread0.td_pcb;
|
||||
rsp0 = thread0.td_md.md_stack_base;
|
||||
/* Ensure the stack is aligned to 16 bytes */
|
||||
rsp0 &= ~0xFul;
|
||||
common_tss[0].tss_rsp0 = rsp0;
|
||||
@ -1899,7 +1898,7 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
|
||||
TSEXIT();
|
||||
|
||||
/* Location of kernel stack for locore */
|
||||
return ((u_int64_t)thread0.td_pcb);
|
||||
return (thread0.td_md.md_stack_base);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -87,35 +87,39 @@ __FBSDID("$FreeBSD$");
|
||||
_Static_assert(OFFSETOF_MONITORBUF == offsetof(struct pcpu, pc_monitorbuf),
|
||||
"OFFSETOF_MONITORBUF does not correspond with offset of pc_monitorbuf.");
|
||||
|
||||
void
|
||||
set_top_of_stack_td(struct thread *td)
|
||||
{
|
||||
td->td_md.md_stack_base = td->td_kstack +
|
||||
td->td_kstack_pages * PAGE_SIZE -
|
||||
roundup2(cpu_max_ext_state_size, XSAVE_AREA_ALIGN);
|
||||
}
|
||||
|
||||
struct savefpu *
|
||||
get_pcb_user_save_td(struct thread *td)
|
||||
{
|
||||
vm_offset_t p;
|
||||
|
||||
p = td->td_kstack + td->td_kstack_pages * PAGE_SIZE -
|
||||
roundup2(cpu_max_ext_state_size, XSAVE_AREA_ALIGN);
|
||||
KASSERT((p % XSAVE_AREA_ALIGN) == 0, ("Unaligned pcb_user_save area"));
|
||||
return ((struct savefpu *)p);
|
||||
}
|
||||
|
||||
struct savefpu *
|
||||
get_pcb_user_save_pcb(struct pcb *pcb)
|
||||
{
|
||||
vm_offset_t p;
|
||||
|
||||
p = (vm_offset_t)(pcb + 1);
|
||||
p = td->td_md.md_stack_base;
|
||||
KASSERT((p % XSAVE_AREA_ALIGN) == 0,
|
||||
("Unaligned pcb_user_save area ptr %#lx td %p", p, td));
|
||||
return ((struct savefpu *)p);
|
||||
}
|
||||
|
||||
struct pcb *
|
||||
get_pcb_td(struct thread *td)
|
||||
{
|
||||
vm_offset_t p;
|
||||
|
||||
p = td->td_kstack + td->td_kstack_pages * PAGE_SIZE -
|
||||
roundup2(cpu_max_ext_state_size, XSAVE_AREA_ALIGN) -
|
||||
sizeof(struct pcb);
|
||||
return ((struct pcb *)p);
|
||||
return (&td->td_md.md_pcb);
|
||||
}
|
||||
|
||||
struct savefpu *
|
||||
get_pcb_user_save_pcb(struct pcb *pcb)
|
||||
{
|
||||
struct thread *td;
|
||||
|
||||
td = __containerof(pcb, struct thread, td_md.md_pcb);
|
||||
return (get_pcb_user_save_td(td));
|
||||
}
|
||||
|
||||
void *
|
||||
@ -165,9 +169,9 @@ cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags)
|
||||
fpuexit(td1);
|
||||
update_pcb_bases(td1->td_pcb);
|
||||
|
||||
/* Point the pcb to the top of the stack */
|
||||
pcb2 = get_pcb_td(td2);
|
||||
td2->td_pcb = pcb2;
|
||||
/* Point the stack and pcb to the actual location */
|
||||
set_top_of_stack_td(td2);
|
||||
td2->td_pcb = pcb2 = get_pcb_td(td2);
|
||||
|
||||
/* Copy td1's pcb */
|
||||
bcopy(td1->td_pcb, pcb2, sizeof(*pcb2));
|
||||
@ -186,7 +190,7 @@ cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags)
|
||||
* Copy the trap frame for the return to user mode as if from a
|
||||
* syscall. This copies most of the user mode register values.
|
||||
*/
|
||||
td2->td_frame = (struct trapframe *)td2->td_pcb - 1;
|
||||
td2->td_frame = (struct trapframe *)td2->td_md.md_stack_base - 1;
|
||||
bcopy(td1->td_frame, td2->td_frame, sizeof(struct trapframe));
|
||||
|
||||
td2->td_frame->tf_rax = 0; /* Child returns zero */
|
||||
@ -351,8 +355,9 @@ cpu_thread_alloc(struct thread *td)
|
||||
struct pcb *pcb;
|
||||
struct xstate_hdr *xhdr;
|
||||
|
||||
set_top_of_stack_td(td);
|
||||
td->td_pcb = pcb = get_pcb_td(td);
|
||||
td->td_frame = (struct trapframe *)pcb - 1;
|
||||
td->td_frame = (struct trapframe *)td->td_md.md_stack_base - 1;
|
||||
pcb->pcb_save = get_pcb_user_save_pcb(pcb);
|
||||
if (use_xsave) {
|
||||
xhdr = (struct xstate_hdr *)(pcb->pcb_save + 1);
|
||||
@ -490,7 +495,6 @@ cpu_copy_thread(struct thread *td, struct thread *td0)
|
||||
{
|
||||
struct pcb *pcb2;
|
||||
|
||||
/* Point the pcb to the top of the stack. */
|
||||
pcb2 = td->td_pcb;
|
||||
|
||||
/*
|
||||
|
@ -83,6 +83,7 @@ void fpstate_drop(struct thread *td);
|
||||
void pagezero(void *addr);
|
||||
void setidt(int idx, alias_for_inthand_t *func, int typ, int dpl, int ist);
|
||||
void sse2_pagezero(void *addr);
|
||||
void set_top_of_stack_td(struct thread *td);
|
||||
struct savefpu *get_pcb_user_save_td(struct thread *td);
|
||||
struct savefpu *get_pcb_user_save_pcb(struct pcb *pcb);
|
||||
void pci_early_quirks(void);
|
||||
|
@ -57,16 +57,6 @@ __curthread(void)
|
||||
return (td);
|
||||
}
|
||||
#define curthread (__curthread())
|
||||
|
||||
static __inline __pure2 struct pcb *
|
||||
__curpcb(void)
|
||||
{
|
||||
struct pcb *pcb;
|
||||
|
||||
__asm("movq %%gs:%P1,%0" : "=r" (pcb) : "n" (offsetof(struct pcpu,
|
||||
pc_curpcb)));
|
||||
return (pcb);
|
||||
}
|
||||
#define curpcb (__curpcb())
|
||||
#define curpcb (&curthread->td_md.md_pcb)
|
||||
|
||||
#endif /* _MACHINE_PCPU_AUX_H_ */
|
||||
|
@ -36,6 +36,7 @@
|
||||
#define _MACHINE_PROC_H_
|
||||
|
||||
#include <sys/queue.h>
|
||||
#include <machine/pcb.h>
|
||||
#include <machine/segments.h>
|
||||
|
||||
/*
|
||||
@ -72,6 +73,8 @@ struct mdthread {
|
||||
struct pmap_invl_gen md_invl_gen;
|
||||
register_t md_efirt_tmp; /* (k) */
|
||||
int md_efirt_dis_pf; /* (k) */
|
||||
struct pcb md_pcb;
|
||||
vm_offset_t md_stack_base;
|
||||
};
|
||||
|
||||
struct mdproc {
|
||||
|
@ -84,7 +84,7 @@ _Static_assert(offsetof(struct thread, td_pflags) == 0x104,
|
||||
"struct thread KBI td_pflags");
|
||||
_Static_assert(offsetof(struct thread, td_frame) == 0x478,
|
||||
"struct thread KBI td_frame");
|
||||
_Static_assert(offsetof(struct thread, td_emuldata) == 0x540,
|
||||
_Static_assert(offsetof(struct thread, td_emuldata) == 0x690,
|
||||
"struct thread KBI td_emuldata");
|
||||
_Static_assert(offsetof(struct proc, p_flag) == 0xb0,
|
||||
"struct proc KBI p_flag");
|
||||
|
Loading…
x
Reference in New Issue
Block a user