amd64: stop using top of the thread' kernel stack for FPU user save area
Instead do one more allocation at the thread creation time. This frees a lot of space on the stack. Also do not use alloca() for temporal storage in signal delivery sendsig() function and signal return syscall sys_sigreturn(). This saves equal amount of space, again by the cost of one more allocation at the thread creation time. A useful experiment now would be to reduce KSTACK_PAGES. Reviewed by: jhb, markj Tested by: pho Sponsored by: The FreeBSD Foundation MFC after: 1 week Differential revision: https://reviews.freebsd.org/D31954
This commit is contained in:
parent
0f6829488e
commit
df8dd6025a
@ -135,7 +135,7 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
|
||||
|
||||
if (cpu_max_ext_state_size > sizeof(struct savefpu) && use_xsave) {
|
||||
xfpusave_len = cpu_max_ext_state_size - sizeof(struct savefpu);
|
||||
xfpusave = __builtin_alloca(xfpusave_len);
|
||||
xfpusave = (char *)td->td_md.md_fpu_scratch;
|
||||
} else {
|
||||
xfpusave_len = 0;
|
||||
xfpusave = NULL;
|
||||
@ -674,7 +674,7 @@ set_mcontext(struct thread *td, mcontext_t *mcp)
|
||||
if (mcp->mc_xfpustate_len > cpu_max_ext_state_size -
|
||||
sizeof(struct savefpu))
|
||||
return (EINVAL);
|
||||
xfpustate = __builtin_alloca(mcp->mc_xfpustate_len);
|
||||
xfpustate = (char *)td->td_md.md_fpu_scratch;
|
||||
ret = copyin((void *)mcp->mc_xfpustate, xfpustate,
|
||||
mcp->mc_xfpustate_len);
|
||||
if (ret != 0)
|
||||
|
@ -448,6 +448,8 @@ fpuinitstate(void *arg __unused)
|
||||
xsave_area_elm_descr), M_DEVBUF, M_WAITOK | M_ZERO);
|
||||
}
|
||||
|
||||
cpu_thread_alloc(&thread0);
|
||||
|
||||
saveintr = intr_disable();
|
||||
stop_emulating();
|
||||
|
||||
|
@ -1258,7 +1258,6 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
|
||||
caddr_t kmdp;
|
||||
int gsel_tss, x;
|
||||
struct pcpu *pc;
|
||||
struct xstate_hdr *xhdr;
|
||||
uint64_t cr3, rsp0;
|
||||
pml4_entry_t *pml4e;
|
||||
pdp_entry_t *pdpe;
|
||||
@ -1564,19 +1563,6 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
|
||||
msgbufinit(msgbufp, msgbufsize);
|
||||
fpuinit();
|
||||
|
||||
/*
|
||||
* Reinitialize thread0's stack base now that the xsave area size is
|
||||
* known. Set up thread0's pcb save area after fpuinit calculated fpu
|
||||
* save area size. Zero out the extended state header in fpu save area.
|
||||
*/
|
||||
set_top_of_stack_td(&thread0);
|
||||
thread0.td_pcb->pcb_save = get_pcb_user_save_td(&thread0);
|
||||
bzero(thread0.td_pcb->pcb_save, cpu_max_ext_state_size);
|
||||
if (use_xsave) {
|
||||
xhdr = (struct xstate_hdr *)(get_pcb_user_save_td(&thread0) +
|
||||
1);
|
||||
xhdr->xstate_bv = xsave_mask;
|
||||
}
|
||||
/* make an initial tss so cpu can get interrupt stack on syscall! */
|
||||
rsp0 = thread0.td_md.md_stack_base;
|
||||
/* Ensure the stack is aligned to 16 bytes */
|
||||
|
@ -90,19 +90,17 @@ void
|
||||
set_top_of_stack_td(struct thread *td)
|
||||
{
|
||||
td->td_md.md_stack_base = td->td_kstack +
|
||||
td->td_kstack_pages * PAGE_SIZE -
|
||||
roundup2(cpu_max_ext_state_size, XSAVE_AREA_ALIGN);
|
||||
td->td_kstack_pages * PAGE_SIZE;
|
||||
}
|
||||
|
||||
struct savefpu *
|
||||
get_pcb_user_save_td(struct thread *td)
|
||||
{
|
||||
vm_offset_t p;
|
||||
|
||||
p = td->td_md.md_stack_base;
|
||||
KASSERT((p % XSAVE_AREA_ALIGN) == 0,
|
||||
("Unaligned pcb_user_save area ptr %#lx td %p", p, td));
|
||||
return ((struct savefpu *)p);
|
||||
KASSERT(((vm_offset_t)td->td_md.md_usr_fpu_save %
|
||||
XSAVE_AREA_ALIGN) == 0,
|
||||
("Unaligned pcb_user_save area ptr %p td %p",
|
||||
td->td_md.md_usr_fpu_save, td));
|
||||
return (td->td_md.md_usr_fpu_save);
|
||||
}
|
||||
|
||||
struct pcb *
|
||||
@ -393,6 +391,8 @@ cpu_thread_alloc(struct thread *td)
|
||||
set_top_of_stack_td(td);
|
||||
td->td_pcb = pcb = get_pcb_td(td);
|
||||
td->td_frame = (struct trapframe *)td->td_md.md_stack_base - 1;
|
||||
td->td_md.md_usr_fpu_save = fpu_save_area_alloc();
|
||||
td->td_md.md_fpu_scratch = fpu_save_area_alloc();
|
||||
pcb->pcb_save = get_pcb_user_save_pcb(pcb);
|
||||
if (use_xsave) {
|
||||
xhdr = (struct xstate_hdr *)(pcb->pcb_save + 1);
|
||||
@ -404,8 +404,12 @@ cpu_thread_alloc(struct thread *td)
|
||||
void
|
||||
cpu_thread_free(struct thread *td)
|
||||
{
|
||||
|
||||
cpu_thread_clean(td);
|
||||
|
||||
fpu_save_area_free(td->td_md.md_usr_fpu_save);
|
||||
td->td_md.md_usr_fpu_save = NULL;
|
||||
fpu_save_area_free(td->td_md.md_fpu_scratch);
|
||||
td->td_md.md_fpu_scratch = NULL;
|
||||
}
|
||||
|
||||
bool
|
||||
|
@ -210,7 +210,7 @@ ia32_set_mcontext(struct thread *td, struct ia32_mcontext *mcp)
|
||||
if (mcp->mc_xfpustate_len > cpu_max_ext_state_size -
|
||||
sizeof(struct savefpu))
|
||||
return (EINVAL);
|
||||
xfpustate = __builtin_alloca(mcp->mc_xfpustate_len);
|
||||
xfpustate = (char *)td->td_md.md_fpu_scratch;
|
||||
ret = copyin(PTRIN(mcp->mc_xfpustate), xfpustate,
|
||||
mcp->mc_xfpustate_len);
|
||||
if (ret != 0)
|
||||
@ -579,7 +579,7 @@ ia32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
|
||||
|
||||
if (cpu_max_ext_state_size > sizeof(struct savefpu) && use_xsave) {
|
||||
xfpusave_len = cpu_max_ext_state_size - sizeof(struct savefpu);
|
||||
xfpusave = __builtin_alloca(xfpusave_len);
|
||||
xfpusave = (char *)td->td_md.md_fpu_scratch;
|
||||
} else {
|
||||
xfpusave_len = 0;
|
||||
xfpusave = NULL;
|
||||
@ -882,7 +882,7 @@ freebsd32_sigreturn(td, uap)
|
||||
td->td_proc->p_pid, td->td_name, xfpustate_len);
|
||||
return (EINVAL);
|
||||
}
|
||||
xfpustate = __builtin_alloca(xfpustate_len);
|
||||
xfpustate = (char *)td->td_md.md_fpu_scratch;
|
||||
error = copyin(PTRIN(ucp->uc_mcontext.mc_xfpustate),
|
||||
xfpustate, xfpustate_len);
|
||||
if (error != 0) {
|
||||
|
@ -75,6 +75,8 @@ struct mdthread {
|
||||
int md_efirt_dis_pf; /* (k) */
|
||||
struct pcb md_pcb;
|
||||
vm_offset_t md_stack_base;
|
||||
struct savefpu *md_usr_fpu_save;
|
||||
struct savefpu *md_fpu_scratch;
|
||||
};
|
||||
|
||||
struct mdproc {
|
||||
|
@ -91,7 +91,7 @@ _Static_assert(offsetof(struct thread, td_pflags) == 0x110,
|
||||
"struct thread KBI td_pflags");
|
||||
_Static_assert(offsetof(struct thread, td_frame) == 0x4a8,
|
||||
"struct thread KBI td_frame");
|
||||
_Static_assert(offsetof(struct thread, td_emuldata) == 0x6b0,
|
||||
_Static_assert(offsetof(struct thread, td_emuldata) == 0x6c0,
|
||||
"struct thread KBI td_emuldata");
|
||||
_Static_assert(offsetof(struct proc, p_flag) == 0xb8,
|
||||
"struct proc KBI p_flag");
|
||||
|
Loading…
Reference in New Issue
Block a user