Improve the lcall $7,$0 syscall emulation on amd64.
Current code, which copies the potential syscall arguments into the current frame, puts an arbitrary limit on the number of syscall arguments. Apparently, mmap(2) and lseek(2) (?) require larger number. But there is an issue that stack is only need to be mapped to contain the number of arguments required by the syscall, so copying arbitrary large number of words from the stack is not completely safe. Use different approach to convert lcall frame into int $0x80 frame in place, by doing the retl in kernel. This also allows to stop proceed vfork case specially, and stop making assumptions about %cs at the syscall time. Also, improve comments with the formulations provided by bde. Reviewed and tested by: bde Sponsored by: The FreeBSD Foundation MFC after: 1 week
This commit is contained in:
parent
e24e568336
commit
a37d4032ed
@ -78,44 +78,23 @@ ia32_osigcode:
|
||||
1:
|
||||
jmp 1b
|
||||
|
||||
|
||||
/*
|
||||
* The lcall $7,$0 emulator cannot use the call gate that does an
|
||||
* inter-privilege transition. The reason is that the call gate
|
||||
* does not disable interrupts, and, before the swapgs is
|
||||
* executed, we would have a window where the ring 0 code is
|
||||
* executed with the wrong gsbase.
|
||||
* Our lcall $7,$0 handler remains in user mode (ring 3), since lcalls
|
||||
* don't change the interrupt mask, so if this one went directly to the
|
||||
* kernel then there would be a window with interrupts enabled in kernel
|
||||
* mode, and all interrupt handlers would have to be almost as complicated
|
||||
* as the NMI handler to support this.
|
||||
*
|
||||
* Instead, set LDT descriptor 0 as code segment, which reflects
|
||||
* the lcall $7,$0 back to ring 3 trampoline. The trampoline sets up
|
||||
* the frame for int $0x80.
|
||||
* Instead, convert the lcall to an int0x80 call. The kernel does most
|
||||
* of the conversion by popping the lcall return values off the user
|
||||
* stack and returning to them instead of to here, except when the
|
||||
* conversion itself fails. Adjusting the stack here is impossible for
|
||||
* vfork() and harder for other syscalls.
|
||||
*/
|
||||
ALIGN_TEXT
|
||||
lcall_tramp:
|
||||
cmpl $SYS_vfork,%eax
|
||||
je 1f
|
||||
pushl %ebp
|
||||
movl %esp,%ebp
|
||||
pushl 0x24(%ebp) /* arg 6 */
|
||||
pushl 0x20(%ebp)
|
||||
pushl 0x1c(%ebp)
|
||||
pushl 0x18(%ebp)
|
||||
pushl 0x14(%ebp)
|
||||
pushl 0x10(%ebp) /* arg 1 */
|
||||
subl $4,%esp /* gap */
|
||||
int $0x80
|
||||
leavel
|
||||
lretl
|
||||
1:
|
||||
/*
|
||||
* vfork handling is special and relies on the libc stub saving
|
||||
* the return ip in %ecx. Also, we assume that the call was done
|
||||
* with ucode32 selector in %cs.
|
||||
*/
|
||||
int $0x80
|
||||
movl $0x33,4(%esp) /* GUCODE32_SEL | SEL_UPL */
|
||||
movl %ecx,(%esp)
|
||||
lretl
|
||||
1: jmp 1b
|
||||
#endif
|
||||
|
||||
ALIGN_TEXT
|
||||
|
@ -116,11 +116,39 @@ ia32_fetch_syscall_args(struct thread *td)
|
||||
caddr_t params;
|
||||
u_int32_t args[8], tmp;
|
||||
int error, i;
|
||||
#ifdef COMPAT_43
|
||||
u_int32_t eip;
|
||||
int cs;
|
||||
#endif
|
||||
|
||||
p = td->td_proc;
|
||||
frame = td->td_frame;
|
||||
sa = &td->td_sa;
|
||||
|
||||
#ifdef COMPAT_43
|
||||
if (__predict_false(frame->tf_cs == 7 && frame->tf_rip == 2)) {
|
||||
/*
|
||||
* In lcall $7,$0 after int $0x80. Convert the user
|
||||
* frame to what it would be for a direct int 0x80 instead
|
||||
* of lcall $7,$0, by popping the lcall return address.
|
||||
*/
|
||||
error = fueword32((void *)frame->tf_rsp, &eip);
|
||||
if (error == -1)
|
||||
return (EFAULT);
|
||||
cs = fuword16((void *)(frame->tf_rsp + sizeof(u_int32_t)));
|
||||
if (cs == -1)
|
||||
return (EFAULT);
|
||||
|
||||
/*
|
||||
* Unwind in-kernel frame after all stack frame pieces
|
||||
* were successfully read.
|
||||
*/
|
||||
frame->tf_rip = eip;
|
||||
frame->tf_cs = cs;
|
||||
frame->tf_rsp += 2 * sizeof(u_int32_t);
|
||||
}
|
||||
#endif
|
||||
|
||||
params = (caddr_t)frame->tf_rsp + sizeof(u_int32_t);
|
||||
sa->code = frame->tf_rax;
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user