Do not read sigfastblock word on syscall entry.
On machines with SMAP, fueword executes two serializing instructions which can be seen in microbenchmarks. As a measure to restore microbenchmark numbers, only read the word on the attempt to deliver signal in ast(). If the word is set, signal is not delivered and word is kept, preventing interruption of interruptible sleeps by signals until userspace calls sigfastblock(UNBLOCK) which clears the word. This way, the spurious EINTR that userspace can see while in critical section is on first interruptible sleep, if a signal is pending, and on signal posting. It is believed that it is not important for rtld and lbithr critical sections. It might be visible for the application code e.g. for the callback of dl_iterate_phdr(3), but again the belief is that the non-compliance is acceptable. Most important is that the retry of the sleeping syscall does not interrupt unless additional signal is posted. For now I added the knob kern.sigfastblock_fetch_always to enable the word read on syscall entry to be able to diagnose possible issues due to spurious EINTR. While there, do some code restructuting to have all sigfastblock() handling located in kern_sig.c. Reviewed by: jeff Discussed with: mjg Tested by: pho Sponsored by: The FreeBSD Foundation Differential revision: https://reviews.freebsd.org/D23622
This commit is contained in:
parent
a1a6c01e41
commit
a113b17f10
@ -1035,9 +1035,7 @@ exec_new_vmspace(struct image_params *imgp, struct sysentvec *sv)
|
||||
imgp->vmspace_destroyed = 1;
|
||||
imgp->sysent = sv;
|
||||
|
||||
td->td_pflags &= ~TDP_SIGFASTBLOCK;
|
||||
td->td_sigblock_ptr = NULL;
|
||||
td->td_sigblock_val = 0;
|
||||
sigfastblock_clear(td);
|
||||
|
||||
/* May be called with Giant held */
|
||||
EVENTHANDLER_DIRECT_INVOKE(process_exec, p, imgp);
|
||||
|
@ -157,6 +157,12 @@ static int kern_lognosys = 0;
|
||||
SYSCTL_INT(_kern, OID_AUTO, lognosys, CTLFLAG_RWTUN, &kern_lognosys, 0,
|
||||
"Log invalid syscalls");
|
||||
|
||||
__read_frequently bool sigfastblock_fetch_always = false;
|
||||
SYSCTL_BOOL(_kern, OID_AUTO, sigfastblock_fetch_always, CTLFLAG_RWTUN,
|
||||
&sigfastblock_fetch_always, 0,
|
||||
"Fetch sigfastblock word on each syscall entry for proper "
|
||||
"blocking semantic");
|
||||
|
||||
SYSINIT(signal, SI_SUB_P1003_1B, SI_ORDER_FIRST+3, sigqueue_start, NULL);
|
||||
|
||||
/*
|
||||
@ -2005,6 +2011,7 @@ trapsignal(struct thread *td, ksiginfo_t *ksi)
|
||||
code = ksi->ksi_code;
|
||||
KASSERT(_SIG_VALID(sig), ("invalid signal"));
|
||||
|
||||
sigfastblock_fetch(td);
|
||||
PROC_LOCK(p);
|
||||
ps = p->p_sigacts;
|
||||
mtx_lock(&ps->ps_mtx);
|
||||
@ -3952,6 +3959,42 @@ sig_drop_caught(struct proc *p)
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
sigfastblock_failed(struct thread *td, bool sendsig, bool write)
|
||||
{
|
||||
ksiginfo_t ksi;
|
||||
|
||||
/*
|
||||
* Prevent further fetches and SIGSEGVs, allowing thread to
|
||||
* issue syscalls despite corruption.
|
||||
*/
|
||||
sigfastblock_clear(td);
|
||||
|
||||
if (!sendsig)
|
||||
return;
|
||||
ksiginfo_init_trap(&ksi);
|
||||
ksi.ksi_signo = SIGSEGV;
|
||||
ksi.ksi_code = write ? SEGV_ACCERR : SEGV_MAPERR;
|
||||
ksi.ksi_addr = td->td_sigblock_ptr;
|
||||
trapsignal(td, &ksi);
|
||||
}
|
||||
|
||||
static bool
|
||||
sigfastblock_fetch_sig(struct thread *td, bool sendsig, uint32_t *valp)
|
||||
{
|
||||
uint32_t res;
|
||||
|
||||
if ((td->td_pflags & TDP_SIGFASTBLOCK) == 0)
|
||||
return (true);
|
||||
if (fueword32((void *)td->td_sigblock_ptr, &res) == -1) {
|
||||
sigfastblock_failed(td, sendsig, false);
|
||||
return (false);
|
||||
}
|
||||
*valp = res;
|
||||
td->td_sigblock_val = res & ~SIGFASTBLOCK_FLAGS;
|
||||
return (true);
|
||||
}
|
||||
|
||||
int
|
||||
sys_sigfastblock(struct thread *td, struct sigfastblock_args *uap)
|
||||
{
|
||||
@ -3960,6 +4003,7 @@ sys_sigfastblock(struct thread *td, struct sigfastblock_args *uap)
|
||||
uint32_t oldval;
|
||||
|
||||
error = 0;
|
||||
p = td->td_proc;
|
||||
switch (uap->cmd) {
|
||||
case SIGFASTBLOCK_SETPTR:
|
||||
if ((td->td_pflags & TDP_SIGFASTBLOCK) != 0) {
|
||||
@ -3975,18 +4019,22 @@ sys_sigfastblock(struct thread *td, struct sigfastblock_args *uap)
|
||||
break;
|
||||
|
||||
case SIGFASTBLOCK_UNBLOCK:
|
||||
if ((td->td_pflags & TDP_SIGFASTBLOCK) != 0) {
|
||||
if ((td->td_pflags & TDP_SIGFASTBLOCK) == 0) {
|
||||
error = EINVAL;
|
||||
break;
|
||||
}
|
||||
again:
|
||||
res = casueword32(td->td_sigblock_ptr, SIGFASTBLOCK_PEND,
|
||||
&oldval, 0);
|
||||
if (res == -1) {
|
||||
error = EFAULT;
|
||||
break;
|
||||
}
|
||||
if (res == 1) {
|
||||
|
||||
for (;;) {
|
||||
res = casueword32(td->td_sigblock_ptr,
|
||||
SIGFASTBLOCK_PEND, &oldval, 0);
|
||||
if (res == -1) {
|
||||
error = EFAULT;
|
||||
sigfastblock_failed(td, false, true);
|
||||
break;
|
||||
}
|
||||
if (res == 0)
|
||||
break;
|
||||
MPASS(res == 1);
|
||||
if (oldval != SIGFASTBLOCK_PEND) {
|
||||
error = EBUSY;
|
||||
break;
|
||||
@ -3994,8 +4042,22 @@ sys_sigfastblock(struct thread *td, struct sigfastblock_args *uap)
|
||||
error = thread_check_susp(td, false);
|
||||
if (error != 0)
|
||||
break;
|
||||
goto again;
|
||||
}
|
||||
if (error != 0)
|
||||
break;
|
||||
|
||||
/*
|
||||
* td_sigblock_val is cleared there, but not on a
|
||||
* syscall exit. The end effect is that a single
|
||||
* interruptible sleep, while user sigblock word is
|
||||
* set, might return EINTR or ERESTART to usermode
|
||||
* without delivering signal. All further sleeps,
|
||||
* until userspace clears the word and does
|
||||
* sigfastblock(UNBLOCK), observe current word and no
|
||||
* longer get interrupted. It is slight
|
||||
* non-conformance, with alternative to have read the
|
||||
* sigblock word on each syscall entry.
|
||||
*/
|
||||
td->td_sigblock_val = 0;
|
||||
|
||||
/*
|
||||
@ -4003,7 +4065,6 @@ sys_sigfastblock(struct thread *td, struct sigfastblock_args *uap)
|
||||
* signals to current thread. But notify others about
|
||||
* fake unblock.
|
||||
*/
|
||||
p = td->td_proc;
|
||||
if (error == 0 && p->p_numthreads != 1) {
|
||||
PROC_LOCK(p);
|
||||
reschedule_signals(p, td->td_sigmask, 0);
|
||||
@ -4016,8 +4077,7 @@ sys_sigfastblock(struct thread *td, struct sigfastblock_args *uap)
|
||||
error = EINVAL;
|
||||
break;
|
||||
}
|
||||
res = fueword32(td->td_sigblock_ptr, &oldval);
|
||||
if (res == -1) {
|
||||
if (!sigfastblock_fetch_sig(td, false, &oldval)) {
|
||||
error = EFAULT;
|
||||
break;
|
||||
}
|
||||
@ -4025,8 +4085,7 @@ sys_sigfastblock(struct thread *td, struct sigfastblock_args *uap)
|
||||
error = EBUSY;
|
||||
break;
|
||||
}
|
||||
td->td_pflags &= ~TDP_SIGFASTBLOCK;
|
||||
td->td_sigblock_val = 0;
|
||||
sigfastblock_clear(td);
|
||||
break;
|
||||
|
||||
default:
|
||||
@ -4037,32 +4096,59 @@ sys_sigfastblock(struct thread *td, struct sigfastblock_args *uap)
|
||||
}
|
||||
|
||||
void
|
||||
fetch_sigfastblock(struct thread *td)
|
||||
sigfastblock_clear(struct thread *td)
|
||||
{
|
||||
struct proc *p;
|
||||
bool resched;
|
||||
|
||||
if ((td->td_pflags & TDP_SIGFASTBLOCK) == 0)
|
||||
return;
|
||||
if (fueword32(td->td_sigblock_ptr, &td->td_sigblock_val) == -1) {
|
||||
fetch_sigfastblock_failed(td, false);
|
||||
return;
|
||||
td->td_sigblock_val = 0;
|
||||
resched = (td->td_pflags & TDP_SIGFASTPENDING) != 0;
|
||||
td->td_pflags &= ~(TDP_SIGFASTBLOCK | TDP_SIGFASTPENDING);
|
||||
if (resched) {
|
||||
p = td->td_proc;
|
||||
PROC_LOCK(p);
|
||||
reschedule_signals(p, td->td_sigmask, 0);
|
||||
PROC_UNLOCK(p);
|
||||
}
|
||||
td->td_sigblock_val &= ~SIGFASTBLOCK_FLAGS;
|
||||
}
|
||||
|
||||
void
|
||||
fetch_sigfastblock_failed(struct thread *td, bool write)
|
||||
sigfastblock_fetch(struct thread *td)
|
||||
{
|
||||
ksiginfo_t ksi;
|
||||
uint32_t val;
|
||||
|
||||
/*
|
||||
* Prevent further fetches and SIGSEGVs, allowing thread to
|
||||
* issue syscalls despite corruption.
|
||||
*/
|
||||
td->td_pflags &= ~TDP_SIGFASTBLOCK;
|
||||
|
||||
ksiginfo_init_trap(&ksi);
|
||||
ksi.ksi_signo = SIGSEGV;
|
||||
ksi.ksi_code = write ? SEGV_ACCERR : SEGV_MAPERR;
|
||||
ksi.ksi_addr = td->td_sigblock_ptr;
|
||||
trapsignal(td, &ksi);
|
||||
(void)sigfastblock_fetch_sig(td, true, &val);
|
||||
}
|
||||
|
||||
void
|
||||
sigfastblock_setpend(struct thread *td)
|
||||
{
|
||||
int res;
|
||||
uint32_t oldval;
|
||||
|
||||
if ((td->td_pflags & TDP_SIGFASTBLOCK) == 0)
|
||||
return;
|
||||
res = fueword32((void *)td->td_sigblock_ptr, &oldval);
|
||||
if (res == -1) {
|
||||
sigfastblock_failed(td, true, false);
|
||||
return;
|
||||
}
|
||||
for (;;) {
|
||||
res = casueword32(td->td_sigblock_ptr, oldval, &oldval,
|
||||
oldval | SIGFASTBLOCK_PEND);
|
||||
if (res == -1) {
|
||||
sigfastblock_failed(td, true, true);
|
||||
return;
|
||||
}
|
||||
if (res == 0) {
|
||||
td->td_sigblock_val = oldval & ~SIGFASTBLOCK_FLAGS;
|
||||
td->td_pflags &= ~TDP_SIGFASTPENDING;
|
||||
break;
|
||||
}
|
||||
MPASS(res == 1);
|
||||
if (thread_check_susp(td, false) != 0)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -132,11 +132,11 @@ syscallenter(struct thread *td)
|
||||
}
|
||||
|
||||
/*
|
||||
* Fetch fast sigblock value at the time of syscall
|
||||
* entry because sleepqueue primitives might call
|
||||
* cursig().
|
||||
* Fetch fast sigblock value at the time of syscall entry to
|
||||
* handle sleepqueue primitives which might call cursig().
|
||||
*/
|
||||
fetch_sigfastblock(td);
|
||||
if (__predict_false(sigfastblock_fetch_always))
|
||||
(void)sigfastblock_fetch(td);
|
||||
|
||||
/* Let system calls set td_errno directly. */
|
||||
td->td_pflags &= ~TDP_NERRNO;
|
||||
|
@ -134,6 +134,7 @@ userret(struct thread *td, struct trapframe *frame)
|
||||
#ifdef KTRACE
|
||||
KTRUSERRET(td);
|
||||
#endif
|
||||
|
||||
td_softdep_cleanup(td);
|
||||
MPASS(td->td_su == NULL);
|
||||
|
||||
@ -222,8 +223,7 @@ ast(struct trapframe *framep)
|
||||
{
|
||||
struct thread *td;
|
||||
struct proc *p;
|
||||
uint32_t oldval;
|
||||
int flags, sig, res;
|
||||
int flags, sig;
|
||||
|
||||
td = curthread;
|
||||
p = td->td_proc;
|
||||
@ -325,11 +325,12 @@ ast(struct trapframe *framep)
|
||||
*/
|
||||
if (flags & TDF_NEEDSIGCHK || p->p_pendingcnt > 0 ||
|
||||
!SIGISEMPTY(p->p_siglist)) {
|
||||
fetch_sigfastblock(td);
|
||||
sigfastblock_fetch(td);
|
||||
PROC_LOCK(p);
|
||||
mtx_lock(&p->p_sigacts->ps_mtx);
|
||||
if ((td->td_pflags & TDP_SIGFASTBLOCK) != 0 &&
|
||||
td->td_sigblock_val != 0) {
|
||||
sigfastblock_setpend(td);
|
||||
reschedule_signals(p, fastblock_mask,
|
||||
SIGPROCMASK_PS_LOCKED | SIGPROCMASK_FASTBLK);
|
||||
} else {
|
||||
@ -346,32 +347,8 @@ ast(struct trapframe *framep)
|
||||
* Handle deferred update of the fast sigblock value, after
|
||||
* the postsig() loop was performed.
|
||||
*/
|
||||
if (td->td_pflags & TDP_SIGFASTPENDING) {
|
||||
td->td_pflags &= ~TDP_SIGFASTPENDING;
|
||||
res = fueword32(td->td_sigblock_ptr, &oldval);
|
||||
if (res == -1) {
|
||||
fetch_sigfastblock_failed(td, false);
|
||||
} else {
|
||||
for (;;) {
|
||||
oldval |= SIGFASTBLOCK_PEND;
|
||||
res = casueword32(td->td_sigblock_ptr, oldval,
|
||||
&oldval, oldval | SIGFASTBLOCK_PEND);
|
||||
if (res == -1) {
|
||||
fetch_sigfastblock_failed(td, true);
|
||||
break;
|
||||
}
|
||||
if (res == 0) {
|
||||
td->td_sigblock_val = oldval &
|
||||
~SIGFASTBLOCK_FLAGS;
|
||||
break;
|
||||
}
|
||||
MPASS(res == 1);
|
||||
res = thread_check_susp(td, false);
|
||||
if (res != 0)
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (td->td_pflags & TDP_SIGFASTPENDING)
|
||||
sigfastblock_setpend(td);
|
||||
|
||||
/*
|
||||
* We need to check to see if we have to exit or wait due to a
|
||||
|
@ -273,6 +273,7 @@ int __sys_sigfastblock(int cmd, void *ptr);
|
||||
|
||||
#ifdef _KERNEL
|
||||
extern sigset_t fastblock_mask;
|
||||
extern bool sigfastblock_fetch_always;
|
||||
|
||||
/* Return nonzero if process p has an unmasked pending signal. */
|
||||
#define SIGPENDING(td) \
|
||||
@ -382,8 +383,6 @@ sigallowstop(int prev)
|
||||
|
||||
int cursig(struct thread *td);
|
||||
void execsigs(struct proc *p);
|
||||
void fetch_sigfastblock(struct thread *td);
|
||||
void fetch_sigfastblock_failed(struct thread *td, bool write);
|
||||
void gsignal(int pgid, int sig, ksiginfo_t *ksi);
|
||||
void killproc(struct proc *p, char *why);
|
||||
ksiginfo_t * ksiginfo_alloc(int wait);
|
||||
@ -405,6 +404,9 @@ void sig_drop_caught(struct proc *p);
|
||||
void sigexit(struct thread *td, int sig) __dead2;
|
||||
int sigev_findtd(struct proc *p, struct sigevent *sigev, struct thread **);
|
||||
int sig_ffs(sigset_t *set);
|
||||
void sigfastblock_clear(struct thread *td);
|
||||
void sigfastblock_fetch(struct thread *td);
|
||||
void sigfastblock_setpend(struct thread *td);
|
||||
void siginit(struct proc *p);
|
||||
void signotify(struct thread *td);
|
||||
void sigqueue_delete(struct sigqueue *queue, int sig);
|
||||
|
Loading…
Reference in New Issue
Block a user