Change the way the lcall $7,$0 is reflected to usermode. Instead of

setting call gate, which must be 64 bit, put a code segment descriptor
into ldt slot 0.

This way, syscall shim does not switch temporary to 64bit trampoline,
and does not create a window where signal delivery interrupts 64 bit
mode (signal handler cannot return).  The cost is shim running with
non-zero based segment in %cs, which requires vfork() handling make
more assumptions.

Sponsored by:	The FreeBSD Foundation
MFC after:	1 week
This commit is contained in:
Konstantin Belousov 2014-12-27 23:19:08 +00:00
parent dcf83ff0e9
commit 4cc6942f37
2 changed files with 25 additions and 57 deletions

View File

@ -86,34 +86,14 @@ ia32_osigcode:
* executed, we would have a window where the ring 0 code is
* executed with the wrong gsbase.
*
* Instead, reflect the lcall $7,$0 back to ring 3 trampoline
* which sets up the frame for int $0x80.
* Instead, set LDT descriptor 0 as code segment, which reflects
* the lcall $7,$0 back to ring 3 trampoline. The trampoline sets up
* the frame for int $0x80.
*/
ALIGN_TEXT
lcall_tramp:
.code64
/*
* There, we are in 64bit mode and need to return to 32bit.
* First, convert call frame from 64 to 32 bit format.
*/
pushq %rax
movl 16(%rsp),%eax
movl %eax,20(%rsp) /* ret %cs */
movl 8(%rsp),%eax
movl %eax,16(%rsp) /* ret %rip -> %eip */
popq %rax
addq $8,%rsp
/* Now return to 32bit */
pushq $0x33 /* _ucode32sel UPL */
callq 1f
1:
addq $2f-1b,(%rsp)
lretq
2:
/* Back in 32bit mode */
.code32
cmpl $SYS_vfork,%eax
je 4f
je 1f
pushl %ebp
movl %esp,%ebp
pushl 0x24(%ebp) /* arg 6 */
@ -122,21 +102,20 @@ lcall_tramp:
pushl 0x18(%ebp)
pushl 0x14(%ebp)
pushl 0x10(%ebp) /* arg 1 */
pushl 0xc(%ebp) /* gap */
subl $4,%esp /* gap */
int $0x80
leavel
3:
lretl
4:
1:
/*
* vfork handling is special and relies on the libc stub saving
* the return ip in %ecx. If vfork failed, then there is no
* child which can corrupt the frame created by call gate.
* the return ip in %ecx. Also, we assume that the call was done
* with ucode32 selector in %cs.
*/
int $0x80
jb 3b
addl $8,%esp
jmpl *%ecx
movl $0x33,4(%esp) /* GUCODE32_SEL | SEL_UPL */
movl %ecx,(%esp)
lretl
#endif
ALIGN_TEXT

View File

@ -223,39 +223,28 @@ int
setup_lcall_gate(void)
{
struct i386_ldt_args uap;
struct user_segment_descriptor descs[2];
struct gate_descriptor *ssd;
struct user_segment_descriptor desc;
uint32_t lcall_addr;
int error;
bzero(&uap, sizeof(uap));
uap.start = 0;
uap.num = 2;
/*
* This is the easiest way to cut the space for system
* descriptor in ldt. Manually adjust the descriptor type to
* the call gate later.
*/
bzero(&descs[0], sizeof(descs));
descs[0].sd_type = SDT_SYSNULL;
descs[1].sd_type = SDT_SYSNULL;
error = amd64_set_ldt(curthread, &uap, descs);
uap.num = 1;
lcall_addr = curproc->p_sysent->sv_psstrings - sz_lcall_tramp;
bzero(&desc, sizeof(desc));
desc.sd_type = SDT_MEMERA;
desc.sd_dpl = SEL_UPL;
desc.sd_p = 1;
desc.sd_def32 = 1;
desc.sd_gran = 1;
desc.sd_lolimit = 0xffff;
desc.sd_hilimit = 0xf;
desc.sd_lobase = lcall_addr;
desc.sd_hibase = lcall_addr >> 24;
error = amd64_set_ldt(curthread, &uap, &desc);
if (error != 0)
return (error);
lcall_addr = curproc->p_sysent->sv_psstrings - sz_lcall_tramp;
mtx_lock(&dt_lock);
ssd = (struct gate_descriptor *)(curproc->p_md.md_ldt->ldt_base);
bzero(ssd, sizeof(*ssd));
ssd->gd_looffset = lcall_addr;
ssd->gd_hioffset = lcall_addr >> 16;
ssd->gd_selector = _ucodesel;
ssd->gd_type = SDT_SYSCGT;
ssd->gd_dpl = SEL_UPL;
ssd->gd_p = 1;
mtx_unlock(&dt_lock);
return (0);
}
#endif