Add FPU_KERN_NOCTX flag to the fpu_kern_enter() function on amd64.

The flag specifies that the block which uses FPU must be executed in
critical section, i.e. take no context switches, and does not need an
FPU save area during the execution.

It is intended to be applied around fast and short code pathes where
save area allocation is impossible or undesirable, due to context or
due to the relative cost of calculation vs. allocation.

Sponsored by:	The FreeBSD Foundation
MFC after:	2 weeks
This commit is contained in:
Konstantin Belousov 2016-09-11 09:14:07 +00:00
parent f14f55d622
commit cf1c47763f
4 changed files with 69 additions and 14 deletions

View File

@ -120,6 +120,16 @@ could be used from both kernel thread and syscall contexts.
The The
.Fn fpu_kern_leave .Fn fpu_kern_leave
function correctly handles such contexts. function correctly handles such contexts.
.It Dv FPU_KERN_NOCTX
Avoid nesting save area.
If the flag is specified, the
.Fa ctx
must be passed as
.Va NULL .
The flag should only be used for really short code blocks
which can be executed in a critical section.
It avoids the need to allocate the FPU context by the cost
of increased system latency.
.El .El
.El .El
.Pp .Pp

View File

@ -633,6 +633,8 @@ fpudna(void)
*/ */
critical_enter(); critical_enter();
KASSERT((curpcb->pcb_flags & PCB_FPUNOSAVE) == 0,
("fpudna while in fpu_kern_enter(FPU_KERN_NOCTX)"));
if (PCPU_GET(fpcurthread) == curthread) { if (PCPU_GET(fpcurthread) == curthread) {
printf("fpudna: fpcurthread == curthread\n"); printf("fpudna: fpcurthread == curthread\n");
stop_emulating(); stop_emulating();
@ -964,13 +966,39 @@ fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags)
{ {
struct pcb *pcb; struct pcb *pcb;
KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) == 0, ("using inuse ctx")); pcb = td->td_pcb;
KASSERT((flags & FPU_KERN_NOCTX) != 0 || ctx != NULL,
("ctx is required when !FPU_KERN_NOCTX"));
KASSERT(ctx == NULL || (ctx->flags & FPU_KERN_CTX_INUSE) == 0,
("using inuse ctx"));
KASSERT((pcb->pcb_flags & PCB_FPUNOSAVE) == 0,
("recursive fpu_kern_enter while in PCB_FPUNOSAVE state"));
if ((flags & FPU_KERN_NOCTX) != 0) {
critical_enter();
stop_emulating();
if (curthread == PCPU_GET(fpcurthread)) {
fpusave(curpcb->pcb_save);
PCPU_SET(fpcurthread, NULL);
} else {
KASSERT(PCPU_GET(fpcurthread) == NULL,
("invalid fpcurthread"));
}
/*
* This breaks XSAVEOPT tracker, but
* PCB_FPUNOSAVE state is supposed to never need to
* save FPU context at all.
*/
fpurestore(fpu_initialstate);
set_pcb_flags(pcb, PCB_KERNFPU | PCB_FPUNOSAVE |
PCB_FPUINITDONE);
return (0);
}
if ((flags & FPU_KERN_KTHR) != 0 && is_fpu_kern_thread(0)) { if ((flags & FPU_KERN_KTHR) != 0 && is_fpu_kern_thread(0)) {
ctx->flags = FPU_KERN_CTX_DUMMY | FPU_KERN_CTX_INUSE; ctx->flags = FPU_KERN_CTX_DUMMY | FPU_KERN_CTX_INUSE;
return (0); return (0);
} }
pcb = td->td_pcb;
KASSERT(!PCB_USER_FPU(pcb) || pcb->pcb_save == KASSERT(!PCB_USER_FPU(pcb) || pcb->pcb_save ==
get_pcb_user_save_pcb(pcb), ("mangled pcb_save")); get_pcb_user_save_pcb(pcb), ("mangled pcb_save"));
ctx->flags = FPU_KERN_CTX_INUSE; ctx->flags = FPU_KERN_CTX_INUSE;
@ -989,19 +1017,34 @@ fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx)
{ {
struct pcb *pcb; struct pcb *pcb;
KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) != 0,
("leaving not inuse ctx"));
ctx->flags &= ~FPU_KERN_CTX_INUSE;
if (is_fpu_kern_thread(0) && (ctx->flags & FPU_KERN_CTX_DUMMY) != 0)
return (0);
KASSERT((ctx->flags & FPU_KERN_CTX_DUMMY) == 0, ("dummy ctx"));
pcb = td->td_pcb; pcb = td->td_pcb;
critical_enter();
if (curthread == PCPU_GET(fpcurthread)) if ((pcb->pcb_flags & PCB_FPUNOSAVE) != 0) {
fpudrop(); KASSERT(ctx == NULL, ("non-null ctx after FPU_KERN_NOCTX"));
critical_exit(); KASSERT(PCPU_GET(fpcurthread) == NULL,
pcb->pcb_save = ctx->prev; ("non-NULL fpcurthread for PCB_FPUNOSAVE"));
CRITICAL_ASSERT(td);
clear_pcb_flags(pcb, PCB_FPUNOSAVE | PCB_FPUINITDONE);
start_emulating();
critical_exit();
} else {
KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) != 0,
("leaving not inuse ctx"));
ctx->flags &= ~FPU_KERN_CTX_INUSE;
if (is_fpu_kern_thread(0) &&
(ctx->flags & FPU_KERN_CTX_DUMMY) != 0)
return (0);
KASSERT((ctx->flags & FPU_KERN_CTX_DUMMY) == 0,
("dummy ctx"));
critical_enter();
if (curthread == PCPU_GET(fpcurthread))
fpudrop();
critical_exit();
pcb->pcb_save = ctx->prev;
}
if (pcb->pcb_save == get_pcb_user_save_pcb(pcb)) { if (pcb->pcb_save == get_pcb_user_save_pcb(pcb)) {
if ((pcb->pcb_flags & PCB_USERFPUINITDONE) != 0) { if ((pcb->pcb_flags & PCB_USERFPUINITDONE) != 0) {
set_pcb_flags(pcb, PCB_FPUINITDONE); set_pcb_flags(pcb, PCB_FPUINITDONE);

View File

@ -86,6 +86,7 @@ void fpu_save_area_reset(struct savefpu *fsa);
#define FPU_KERN_NORMAL 0x0000 #define FPU_KERN_NORMAL 0x0000
#define FPU_KERN_NOWAIT 0x0001 #define FPU_KERN_NOWAIT 0x0001
#define FPU_KERN_KTHR 0x0002 #define FPU_KERN_KTHR 0x0002
#define FPU_KERN_NOCTX 0x0004
#endif #endif

View File

@ -83,6 +83,7 @@ struct pcb {
#define PCB_FPUINITDONE 0x08 /* fpu state is initialized */ #define PCB_FPUINITDONE 0x08 /* fpu state is initialized */
#define PCB_USERFPUINITDONE 0x10 /* fpu user state is initialized */ #define PCB_USERFPUINITDONE 0x10 /* fpu user state is initialized */
#define PCB_32BIT 0x40 /* process has 32 bit context (segs etc) */ #define PCB_32BIT 0x40 /* process has 32 bit context (segs etc) */
#define PCB_FPUNOSAVE 0x80 /* no save area for current FPU ctx */
uint16_t pcb_initial_fpucw; uint16_t pcb_initial_fpucw;