amd64: stop using top of the thread' kernel stack for FPU user save area

Instead do one more allocation at the thread creation time.  This frees
a lot of space on the stack.

Also do not use alloca() for temporal storage in signal delivery sendsig()
function and signal return syscall sys_sigreturn().  This saves equal
amount of space, again by the cost of one more allocation at the thread
creation time.

A useful experiment now would be to reduce KSTACK_PAGES.

Reviewed by:	jhb, markj
Tested by:	pho
Sponsored by:	The FreeBSD Foundation
MFC after:	1 week
Differential revision:	https://reviews.freebsd.org/D31954
This commit is contained in:
Konstantin Belousov 2021-09-14 00:05:47 +03:00
parent 0f6829488e
commit df8dd6025a
7 changed files with 23 additions and 29 deletions

View File

@ -135,7 +135,7 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
if (cpu_max_ext_state_size > sizeof(struct savefpu) && use_xsave) {
xfpusave_len = cpu_max_ext_state_size - sizeof(struct savefpu);
xfpusave = __builtin_alloca(xfpusave_len);
xfpusave = (char *)td->td_md.md_fpu_scratch;
} else {
xfpusave_len = 0;
xfpusave = NULL;
@ -674,7 +674,7 @@ set_mcontext(struct thread *td, mcontext_t *mcp)
if (mcp->mc_xfpustate_len > cpu_max_ext_state_size -
sizeof(struct savefpu))
return (EINVAL);
xfpustate = __builtin_alloca(mcp->mc_xfpustate_len);
xfpustate = (char *)td->td_md.md_fpu_scratch;
ret = copyin((void *)mcp->mc_xfpustate, xfpustate,
mcp->mc_xfpustate_len);
if (ret != 0)

View File

@ -448,6 +448,8 @@ fpuinitstate(void *arg __unused)
xsave_area_elm_descr), M_DEVBUF, M_WAITOK | M_ZERO);
}
cpu_thread_alloc(&thread0);
saveintr = intr_disable();
stop_emulating();

View File

@ -1258,7 +1258,6 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
caddr_t kmdp;
int gsel_tss, x;
struct pcpu *pc;
struct xstate_hdr *xhdr;
uint64_t cr3, rsp0;
pml4_entry_t *pml4e;
pdp_entry_t *pdpe;
@ -1564,19 +1563,6 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
msgbufinit(msgbufp, msgbufsize);
fpuinit();
/*
* Reinitialize thread0's stack base now that the xsave area size is
* known. Set up thread0's pcb save area after fpuinit calculated fpu
* save area size. Zero out the extended state header in fpu save area.
*/
set_top_of_stack_td(&thread0);
thread0.td_pcb->pcb_save = get_pcb_user_save_td(&thread0);
bzero(thread0.td_pcb->pcb_save, cpu_max_ext_state_size);
if (use_xsave) {
xhdr = (struct xstate_hdr *)(get_pcb_user_save_td(&thread0) +
1);
xhdr->xstate_bv = xsave_mask;
}
/* make an initial tss so cpu can get interrupt stack on syscall! */
rsp0 = thread0.td_md.md_stack_base;
/* Ensure the stack is aligned to 16 bytes */

View File

@ -90,19 +90,17 @@ void
set_top_of_stack_td(struct thread *td)
{
td->td_md.md_stack_base = td->td_kstack +
td->td_kstack_pages * PAGE_SIZE -
roundup2(cpu_max_ext_state_size, XSAVE_AREA_ALIGN);
td->td_kstack_pages * PAGE_SIZE;
}
struct savefpu *
get_pcb_user_save_td(struct thread *td)
{
vm_offset_t p;
p = td->td_md.md_stack_base;
KASSERT((p % XSAVE_AREA_ALIGN) == 0,
("Unaligned pcb_user_save area ptr %#lx td %p", p, td));
return ((struct savefpu *)p);
KASSERT(((vm_offset_t)td->td_md.md_usr_fpu_save %
XSAVE_AREA_ALIGN) == 0,
("Unaligned pcb_user_save area ptr %p td %p",
td->td_md.md_usr_fpu_save, td));
return (td->td_md.md_usr_fpu_save);
}
struct pcb *
@ -393,6 +391,8 @@ cpu_thread_alloc(struct thread *td)
set_top_of_stack_td(td);
td->td_pcb = pcb = get_pcb_td(td);
td->td_frame = (struct trapframe *)td->td_md.md_stack_base - 1;
td->td_md.md_usr_fpu_save = fpu_save_area_alloc();
td->td_md.md_fpu_scratch = fpu_save_area_alloc();
pcb->pcb_save = get_pcb_user_save_pcb(pcb);
if (use_xsave) {
xhdr = (struct xstate_hdr *)(pcb->pcb_save + 1);
@ -404,8 +404,12 @@ cpu_thread_alloc(struct thread *td)
void
cpu_thread_free(struct thread *td)
{
cpu_thread_clean(td);
fpu_save_area_free(td->td_md.md_usr_fpu_save);
td->td_md.md_usr_fpu_save = NULL;
fpu_save_area_free(td->td_md.md_fpu_scratch);
td->td_md.md_fpu_scratch = NULL;
}
bool

View File

@ -210,7 +210,7 @@ ia32_set_mcontext(struct thread *td, struct ia32_mcontext *mcp)
if (mcp->mc_xfpustate_len > cpu_max_ext_state_size -
sizeof(struct savefpu))
return (EINVAL);
xfpustate = __builtin_alloca(mcp->mc_xfpustate_len);
xfpustate = (char *)td->td_md.md_fpu_scratch;
ret = copyin(PTRIN(mcp->mc_xfpustate), xfpustate,
mcp->mc_xfpustate_len);
if (ret != 0)
@ -579,7 +579,7 @@ ia32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
if (cpu_max_ext_state_size > sizeof(struct savefpu) && use_xsave) {
xfpusave_len = cpu_max_ext_state_size - sizeof(struct savefpu);
xfpusave = __builtin_alloca(xfpusave_len);
xfpusave = (char *)td->td_md.md_fpu_scratch;
} else {
xfpusave_len = 0;
xfpusave = NULL;
@ -882,7 +882,7 @@ freebsd32_sigreturn(td, uap)
td->td_proc->p_pid, td->td_name, xfpustate_len);
return (EINVAL);
}
xfpustate = __builtin_alloca(xfpustate_len);
xfpustate = (char *)td->td_md.md_fpu_scratch;
error = copyin(PTRIN(ucp->uc_mcontext.mc_xfpustate),
xfpustate, xfpustate_len);
if (error != 0) {

View File

@ -75,6 +75,8 @@ struct mdthread {
int md_efirt_dis_pf; /* (k) */
struct pcb md_pcb;
vm_offset_t md_stack_base;
struct savefpu *md_usr_fpu_save;
struct savefpu *md_fpu_scratch;
};
struct mdproc {

View File

@ -91,7 +91,7 @@ _Static_assert(offsetof(struct thread, td_pflags) == 0x110,
"struct thread KBI td_pflags");
_Static_assert(offsetof(struct thread, td_frame) == 0x4a8,
"struct thread KBI td_frame");
_Static_assert(offsetof(struct thread, td_emuldata) == 0x6b0,
_Static_assert(offsetof(struct thread, td_emuldata) == 0x6c0,
"struct thread KBI td_emuldata");
_Static_assert(offsetof(struct proc, p_flag) == 0xb8,
"struct proc KBI p_flag");