From bd9e0f5df681da8b5ef05a587b4b5b07572d3fc2 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Wed, 15 Sep 2021 21:37:47 +0300 Subject: [PATCH] amd64: eliminate td_md.md_fpu_scratch For signal send, copyout from the user FPU save area directly. For sigreturn, we are in sleepable context and can do temporal allocation of the transient save area. We cannot copying from userspace directly to user save area because XSAVE state needs to be validated, also partial copyins can corrupt it. Requested by: jhb Reviewed by: jhb, markj Tested by: pho Sponsored by: The FreeBSD Foundation MFC after: 1 week Differential revision: https://reviews.freebsd.org/D31954 --- sys/amd64/amd64/exec_machdep.c | 47 +++++++++++++++------------------- sys/amd64/amd64/vm_machdep.c | 3 --- sys/amd64/ia32/ia32_signal.c | 43 +++++++++++++------------------ sys/amd64/include/proc.h | 1 - sys/kern/kern_thread.c | 2 +- 5 files changed, 40 insertions(+), 56 deletions(-) diff --git a/sys/amd64/amd64/exec_machdep.c b/sys/amd64/amd64/exec_machdep.c index 48bda05f9685..168c24cbb65b 100644 --- a/sys/amd64/amd64/exec_machdep.c +++ b/sys/amd64/amd64/exec_machdep.c @@ -96,7 +96,7 @@ __FBSDID("$FreeBSD$"); #include static void get_fpcontext(struct thread *td, mcontext_t *mcp, - char *xfpusave, size_t xfpusave_len); + char **xfpusave, size_t *xfpusave_len); static int set_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpustate, size_t xfpustate_len); @@ -133,14 +133,6 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) regs = td->td_frame; oonstack = sigonstack(regs->tf_rsp); - if (cpu_max_ext_state_size > sizeof(struct savefpu) && use_xsave) { - xfpusave_len = cpu_max_ext_state_size - sizeof(struct savefpu); - xfpusave = (char *)td->td_md.md_fpu_scratch; - } else { - xfpusave_len = 0; - xfpusave = NULL; - } - /* Save user context. */ bzero(&sf, sizeof(sf)); sf.sf_uc.uc_sigmask = *mask; @@ -150,7 +142,7 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; bcopy(regs, &sf.sf_uc.uc_mcontext.mc_rdi, sizeof(*regs)); sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */ - get_fpcontext(td, &sf.sf_uc.uc_mcontext, xfpusave, xfpusave_len); + get_fpcontext(td, &sf.sf_uc.uc_mcontext, &xfpusave, &xfpusave_len); fpstate_drop(td); update_pcb_bases(pcb); sf.sf_uc.uc_mcontext.mc_fsbase = pcb->pcb_fsbase; @@ -301,10 +293,11 @@ sys_sigreturn(struct thread *td, struct sigreturn_args *uap) p->p_pid, td->td_name, xfpustate_len); return (EINVAL); } - xfpustate = __builtin_alloca(xfpustate_len); + xfpustate = (char *)fpu_save_area_alloc(); error = copyin((const void *)uc.uc_mcontext.mc_xfpustate, xfpustate, xfpustate_len); if (error != 0) { + fpu_save_area_free((struct savefpu *)xfpustate); uprintf( "pid %d (%s): sigreturn copying xfpustate failed\n", p->p_pid, td->td_name); @@ -315,6 +308,7 @@ sys_sigreturn(struct thread *td, struct sigreturn_args *uap) xfpustate_len = 0; } ret = set_fpcontext(td, &ucp->uc_mcontext, xfpustate, xfpustate_len); + fpu_save_area_free((struct savefpu *)xfpustate); if (ret != 0) { uprintf("pid %d (%s): sigreturn set_fpcontext err %d\n", p->p_pid, td->td_name, ret); @@ -674,14 +668,17 @@ set_mcontext(struct thread *td, mcontext_t *mcp) if (mcp->mc_xfpustate_len > cpu_max_ext_state_size - sizeof(struct savefpu)) return (EINVAL); - xfpustate = (char *)td->td_md.md_fpu_scratch; + xfpustate = (char *)fpu_save_area_alloc(); ret = copyin((void *)mcp->mc_xfpustate, xfpustate, mcp->mc_xfpustate_len); - if (ret != 0) + if (ret != 0) { + fpu_save_area_free((struct savefpu *)xfpustate); return (ret); + } } else xfpustate = NULL; ret = set_fpcontext(td, mcp, xfpustate, mcp->mc_xfpustate_len); + fpu_save_area_free((struct savefpu *)xfpustate); if (ret != 0) return (ret); tp->tf_r15 = mcp->mc_r15; @@ -719,26 +716,24 @@ set_mcontext(struct thread *td, mcontext_t *mcp) } static void -get_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpusave, - size_t xfpusave_len) +get_fpcontext(struct thread *td, mcontext_t *mcp, char **xfpusave, + size_t *xfpusave_len) { - size_t max_len, len; - mcp->mc_ownedfp = fpugetregs(td); bcopy(get_pcb_user_save_td(td), &mcp->mc_fpstate[0], sizeof(mcp->mc_fpstate)); mcp->mc_fpformat = fpuformat(); - if (!use_xsave || xfpusave_len == 0) + if (xfpusave == NULL) return; - max_len = cpu_max_ext_state_size - sizeof(struct savefpu); - len = xfpusave_len; - if (len > max_len) { - len = max_len; - bzero(xfpusave + max_len, len - max_len); + if (!use_xsave || cpu_max_ext_state_size <= sizeof(struct savefpu)) { + *xfpusave_len = 0; + *xfpusave = NULL; + } else { + mcp->mc_flags |= _MC_HASFPXSTATE; + *xfpusave_len = mcp->mc_xfpustate_len = + cpu_max_ext_state_size - sizeof(struct savefpu); + *xfpusave = (char *)(get_pcb_user_save_td(td) + 1); } - mcp->mc_flags |= _MC_HASFPXSTATE; - mcp->mc_xfpustate_len = len; - bcopy(get_pcb_user_save_td(td) + 1, xfpusave, len); } static int diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c index e42d16d61b3a..0bfcd03d6655 100644 --- a/sys/amd64/amd64/vm_machdep.c +++ b/sys/amd64/amd64/vm_machdep.c @@ -392,7 +392,6 @@ cpu_thread_alloc(struct thread *td) td->td_pcb = pcb = get_pcb_td(td); td->td_frame = (struct trapframe *)td->td_md.md_stack_base - 1; td->td_md.md_usr_fpu_save = fpu_save_area_alloc(); - td->td_md.md_fpu_scratch = fpu_save_area_alloc(); pcb->pcb_save = get_pcb_user_save_pcb(pcb); if (use_xsave) { xhdr = (struct xstate_hdr *)(pcb->pcb_save + 1); @@ -408,8 +407,6 @@ cpu_thread_free(struct thread *td) fpu_save_area_free(td->td_md.md_usr_fpu_save); td->td_md.md_usr_fpu_save = NULL; - fpu_save_area_free(td->td_md.md_fpu_scratch); - td->td_md.md_fpu_scratch = NULL; } bool diff --git a/sys/amd64/ia32/ia32_signal.c b/sys/amd64/ia32/ia32_signal.c index 9b67c7001a87..1ca19072a1dc 100644 --- a/sys/amd64/ia32/ia32_signal.c +++ b/sys/amd64/ia32/ia32_signal.c @@ -87,10 +87,8 @@ static void freebsd4_ia32_sendsig(sig_t, ksiginfo_t *, sigset_t *); static void ia32_get_fpcontext(struct thread *td, struct ia32_mcontext *mcp, - char *xfpusave, size_t xfpusave_len) + char **xfpusave, size_t *xfpusave_len) { - size_t max_len, len; - /* * XXX Format of 64bit and 32bit FXSAVE areas differs. FXSAVE * in 32bit mode saves %cs and %ds, while on 64bit it saves @@ -101,17 +99,15 @@ ia32_get_fpcontext(struct thread *td, struct ia32_mcontext *mcp, bcopy(get_pcb_user_save_td(td), &mcp->mc_fpstate[0], sizeof(mcp->mc_fpstate)); mcp->mc_fpformat = fpuformat(); - if (!use_xsave || xfpusave_len == 0) - return; - max_len = cpu_max_ext_state_size - sizeof(struct savefpu); - len = xfpusave_len; - if (len > max_len) { - len = max_len; - bzero(xfpusave + max_len, len - max_len); + if (!use_xsave || cpu_max_ext_state_size <= sizeof(struct savefpu)) { + *xfpusave_len = 0; + *xfpusave = NULL; + } else { + mcp->mc_flags |= _MC_IA32_HASFPXSTATE; + *xfpusave_len = mcp->mc_xfpustate_len = + cpu_max_ext_state_size - sizeof(struct savefpu); + *xfpusave = (char *)(get_pcb_user_save_td(td) + 1); } - mcp->mc_flags |= _MC_IA32_HASFPXSTATE; - mcp->mc_xfpustate_len = len; - bcopy(get_pcb_user_save_td(td) + 1, xfpusave, len); } static int @@ -210,14 +206,17 @@ ia32_set_mcontext(struct thread *td, struct ia32_mcontext *mcp) if (mcp->mc_xfpustate_len > cpu_max_ext_state_size - sizeof(struct savefpu)) return (EINVAL); - xfpustate = (char *)td->td_md.md_fpu_scratch; + xfpustate = (char *)fpu_save_area_alloc(); ret = copyin(PTRIN(mcp->mc_xfpustate), xfpustate, mcp->mc_xfpustate_len); - if (ret != 0) + if (ret != 0) { + fpu_save_area_free((struct savefpu *)xfpustate); return (ret); + } } else xfpustate = NULL; ret = ia32_set_fpcontext(td, mcp, xfpustate, mcp->mc_xfpustate_len); + fpu_save_area_free((struct savefpu *)xfpustate); if (ret != 0) return (ret); tp->tf_gs = mcp->mc_gs; @@ -577,14 +576,6 @@ ia32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) regs = td->td_frame; oonstack = sigonstack(regs->tf_rsp); - if (cpu_max_ext_state_size > sizeof(struct savefpu) && use_xsave) { - xfpusave_len = cpu_max_ext_state_size - sizeof(struct savefpu); - xfpusave = (char *)td->td_md.md_fpu_scratch; - } else { - xfpusave_len = 0; - xfpusave = NULL; - } - /* Save user context. */ bzero(&sf, sizeof(sf)); sf.sf_uc.uc_sigmask = *mask; @@ -613,7 +604,7 @@ ia32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) sf.sf_uc.uc_mcontext.mc_fs = regs->tf_fs; sf.sf_uc.uc_mcontext.mc_gs = regs->tf_gs; sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */ - ia32_get_fpcontext(td, &sf.sf_uc.uc_mcontext, xfpusave, xfpusave_len); + ia32_get_fpcontext(td, &sf.sf_uc.uc_mcontext, &xfpusave, &xfpusave_len); fpstate_drop(td); sf.sf_uc.uc_mcontext.mc_fsbase = td->td_pcb->pcb_fsbase; sf.sf_uc.uc_mcontext.mc_gsbase = td->td_pcb->pcb_gsbase; @@ -882,10 +873,11 @@ freebsd32_sigreturn(td, uap) td->td_proc->p_pid, td->td_name, xfpustate_len); return (EINVAL); } - xfpustate = (char *)td->td_md.md_fpu_scratch; + xfpustate = (char *)fpu_save_area_alloc(); error = copyin(PTRIN(ucp->uc_mcontext.mc_xfpustate), xfpustate, xfpustate_len); if (error != 0) { + fpu_save_area_free((struct savefpu *)xfpustate); uprintf( "pid %d (%s): sigreturn copying xfpustate failed\n", td->td_proc->p_pid, td->td_name); @@ -897,6 +889,7 @@ freebsd32_sigreturn(td, uap) } ret = ia32_set_fpcontext(td, &ucp->uc_mcontext, xfpustate, xfpustate_len); + fpu_save_area_free((struct savefpu *)xfpustate); if (ret != 0) { uprintf("pid %d (%s): sigreturn set_fpcontext err %d\n", td->td_proc->p_pid, td->td_name, ret); diff --git a/sys/amd64/include/proc.h b/sys/amd64/include/proc.h index bd07f70f8d44..94ac69d31e65 100644 --- a/sys/amd64/include/proc.h +++ b/sys/amd64/include/proc.h @@ -76,7 +76,6 @@ struct mdthread { struct pcb md_pcb; vm_offset_t md_stack_base; struct savefpu *md_usr_fpu_save; - struct savefpu *md_fpu_scratch; }; struct mdproc { diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c index 62f939406374..65c5cc65c87e 100644 --- a/sys/kern/kern_thread.c +++ b/sys/kern/kern_thread.c @@ -91,7 +91,7 @@ _Static_assert(offsetof(struct thread, td_pflags) == 0x110, "struct thread KBI td_pflags"); _Static_assert(offsetof(struct thread, td_frame) == 0x4a8, "struct thread KBI td_frame"); -_Static_assert(offsetof(struct thread, td_emuldata) == 0x6c0, +_Static_assert(offsetof(struct thread, td_emuldata) == 0x6b0, "struct thread KBI td_emuldata"); _Static_assert(offsetof(struct proc, p_flag) == 0xb8, "struct proc KBI p_flag");