Do not read sigfastblock word on syscall entry.

On machines with SMAP, fueword executes two serializing instructions
which can be seen in microbenchmarks.

As a measure to restore microbenchmark numbers, only read the word on
the attempt to deliver signal in ast().  If the word is set, signal is
not delivered and word is kept, preventing interruption of
interruptible sleeps by signals until userspace calls
sigfastblock(UNBLOCK) which clears the word.

This way, the spurious EINTR that userspace can see while in critical
section is on first interruptible sleep, if a signal is pending, and
on signal posting.  It is believed that it is not important for rtld
and lbithr critical sections.  It might be visible for the application
code e.g. for the callback of dl_iterate_phdr(3), but again the belief
is that the non-compliance is acceptable.  Most important is that the
retry of the sleeping syscall does not interrupt unless additional
signal is posted.

For now I added the knob kern.sigfastblock_fetch_always to enable the
word read on syscall entry to be able to diagnose possible issues due
to spurious EINTR.

While there, do some code restructuting to have all sigfastblock()
handling located in kern_sig.c.

Reviewed by:	jeff
Discussed with:	mjg
Tested by:	pho
Sponsored by:	The FreeBSD Foundation
Differential revision:	https://reviews.freebsd.org/D23622
This commit is contained in:
Konstantin Belousov 2020-02-20 15:34:02 +00:00
parent a1a6c01e41
commit a113b17f10
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=358168
5 changed files with 134 additions and 71 deletions

View File

@ -1035,9 +1035,7 @@ exec_new_vmspace(struct image_params *imgp, struct sysentvec *sv)
imgp->vmspace_destroyed = 1;
imgp->sysent = sv;
td->td_pflags &= ~TDP_SIGFASTBLOCK;
td->td_sigblock_ptr = NULL;
td->td_sigblock_val = 0;
sigfastblock_clear(td);
/* May be called with Giant held */
EVENTHANDLER_DIRECT_INVOKE(process_exec, p, imgp);

View File

@ -157,6 +157,12 @@ static int kern_lognosys = 0;
SYSCTL_INT(_kern, OID_AUTO, lognosys, CTLFLAG_RWTUN, &kern_lognosys, 0,
"Log invalid syscalls");
__read_frequently bool sigfastblock_fetch_always = false;
SYSCTL_BOOL(_kern, OID_AUTO, sigfastblock_fetch_always, CTLFLAG_RWTUN,
&sigfastblock_fetch_always, 0,
"Fetch sigfastblock word on each syscall entry for proper "
"blocking semantic");
SYSINIT(signal, SI_SUB_P1003_1B, SI_ORDER_FIRST+3, sigqueue_start, NULL);
/*
@ -2005,6 +2011,7 @@ trapsignal(struct thread *td, ksiginfo_t *ksi)
code = ksi->ksi_code;
KASSERT(_SIG_VALID(sig), ("invalid signal"));
sigfastblock_fetch(td);
PROC_LOCK(p);
ps = p->p_sigacts;
mtx_lock(&ps->ps_mtx);
@ -3952,6 +3959,42 @@ sig_drop_caught(struct proc *p)
}
}
static void
sigfastblock_failed(struct thread *td, bool sendsig, bool write)
{
ksiginfo_t ksi;
/*
* Prevent further fetches and SIGSEGVs, allowing thread to
* issue syscalls despite corruption.
*/
sigfastblock_clear(td);
if (!sendsig)
return;
ksiginfo_init_trap(&ksi);
ksi.ksi_signo = SIGSEGV;
ksi.ksi_code = write ? SEGV_ACCERR : SEGV_MAPERR;
ksi.ksi_addr = td->td_sigblock_ptr;
trapsignal(td, &ksi);
}
static bool
sigfastblock_fetch_sig(struct thread *td, bool sendsig, uint32_t *valp)
{
uint32_t res;
if ((td->td_pflags & TDP_SIGFASTBLOCK) == 0)
return (true);
if (fueword32((void *)td->td_sigblock_ptr, &res) == -1) {
sigfastblock_failed(td, sendsig, false);
return (false);
}
*valp = res;
td->td_sigblock_val = res & ~SIGFASTBLOCK_FLAGS;
return (true);
}
int
sys_sigfastblock(struct thread *td, struct sigfastblock_args *uap)
{
@ -3960,6 +4003,7 @@ sys_sigfastblock(struct thread *td, struct sigfastblock_args *uap)
uint32_t oldval;
error = 0;
p = td->td_proc;
switch (uap->cmd) {
case SIGFASTBLOCK_SETPTR:
if ((td->td_pflags & TDP_SIGFASTBLOCK) != 0) {
@ -3975,18 +4019,22 @@ sys_sigfastblock(struct thread *td, struct sigfastblock_args *uap)
break;
case SIGFASTBLOCK_UNBLOCK:
if ((td->td_pflags & TDP_SIGFASTBLOCK) != 0) {
if ((td->td_pflags & TDP_SIGFASTBLOCK) == 0) {
error = EINVAL;
break;
}
again:
res = casueword32(td->td_sigblock_ptr, SIGFASTBLOCK_PEND,
&oldval, 0);
if (res == -1) {
error = EFAULT;
break;
}
if (res == 1) {
for (;;) {
res = casueword32(td->td_sigblock_ptr,
SIGFASTBLOCK_PEND, &oldval, 0);
if (res == -1) {
error = EFAULT;
sigfastblock_failed(td, false, true);
break;
}
if (res == 0)
break;
MPASS(res == 1);
if (oldval != SIGFASTBLOCK_PEND) {
error = EBUSY;
break;
@ -3994,8 +4042,22 @@ sys_sigfastblock(struct thread *td, struct sigfastblock_args *uap)
error = thread_check_susp(td, false);
if (error != 0)
break;
goto again;
}
if (error != 0)
break;
/*
* td_sigblock_val is cleared there, but not on a
* syscall exit. The end effect is that a single
* interruptible sleep, while user sigblock word is
* set, might return EINTR or ERESTART to usermode
* without delivering signal. All further sleeps,
* until userspace clears the word and does
* sigfastblock(UNBLOCK), observe current word and no
* longer get interrupted. It is slight
* non-conformance, with alternative to have read the
* sigblock word on each syscall entry.
*/
td->td_sigblock_val = 0;
/*
@ -4003,7 +4065,6 @@ sys_sigfastblock(struct thread *td, struct sigfastblock_args *uap)
* signals to current thread. But notify others about
* fake unblock.
*/
p = td->td_proc;
if (error == 0 && p->p_numthreads != 1) {
PROC_LOCK(p);
reschedule_signals(p, td->td_sigmask, 0);
@ -4016,8 +4077,7 @@ sys_sigfastblock(struct thread *td, struct sigfastblock_args *uap)
error = EINVAL;
break;
}
res = fueword32(td->td_sigblock_ptr, &oldval);
if (res == -1) {
if (!sigfastblock_fetch_sig(td, false, &oldval)) {
error = EFAULT;
break;
}
@ -4025,8 +4085,7 @@ sys_sigfastblock(struct thread *td, struct sigfastblock_args *uap)
error = EBUSY;
break;
}
td->td_pflags &= ~TDP_SIGFASTBLOCK;
td->td_sigblock_val = 0;
sigfastblock_clear(td);
break;
default:
@ -4037,32 +4096,59 @@ sys_sigfastblock(struct thread *td, struct sigfastblock_args *uap)
}
void
fetch_sigfastblock(struct thread *td)
sigfastblock_clear(struct thread *td)
{
struct proc *p;
bool resched;
if ((td->td_pflags & TDP_SIGFASTBLOCK) == 0)
return;
if (fueword32(td->td_sigblock_ptr, &td->td_sigblock_val) == -1) {
fetch_sigfastblock_failed(td, false);
return;
td->td_sigblock_val = 0;
resched = (td->td_pflags & TDP_SIGFASTPENDING) != 0;
td->td_pflags &= ~(TDP_SIGFASTBLOCK | TDP_SIGFASTPENDING);
if (resched) {
p = td->td_proc;
PROC_LOCK(p);
reschedule_signals(p, td->td_sigmask, 0);
PROC_UNLOCK(p);
}
td->td_sigblock_val &= ~SIGFASTBLOCK_FLAGS;
}
void
fetch_sigfastblock_failed(struct thread *td, bool write)
sigfastblock_fetch(struct thread *td)
{
ksiginfo_t ksi;
uint32_t val;
/*
* Prevent further fetches and SIGSEGVs, allowing thread to
* issue syscalls despite corruption.
*/
td->td_pflags &= ~TDP_SIGFASTBLOCK;
ksiginfo_init_trap(&ksi);
ksi.ksi_signo = SIGSEGV;
ksi.ksi_code = write ? SEGV_ACCERR : SEGV_MAPERR;
ksi.ksi_addr = td->td_sigblock_ptr;
trapsignal(td, &ksi);
(void)sigfastblock_fetch_sig(td, true, &val);
}
void
sigfastblock_setpend(struct thread *td)
{
int res;
uint32_t oldval;
if ((td->td_pflags & TDP_SIGFASTBLOCK) == 0)
return;
res = fueword32((void *)td->td_sigblock_ptr, &oldval);
if (res == -1) {
sigfastblock_failed(td, true, false);
return;
}
for (;;) {
res = casueword32(td->td_sigblock_ptr, oldval, &oldval,
oldval | SIGFASTBLOCK_PEND);
if (res == -1) {
sigfastblock_failed(td, true, true);
return;
}
if (res == 0) {
td->td_sigblock_val = oldval & ~SIGFASTBLOCK_FLAGS;
td->td_pflags &= ~TDP_SIGFASTPENDING;
break;
}
MPASS(res == 1);
if (thread_check_susp(td, false) != 0)
break;
}
}

View File

@ -132,11 +132,11 @@ syscallenter(struct thread *td)
}
/*
* Fetch fast sigblock value at the time of syscall
* entry because sleepqueue primitives might call
* cursig().
* Fetch fast sigblock value at the time of syscall entry to
* handle sleepqueue primitives which might call cursig().
*/
fetch_sigfastblock(td);
if (__predict_false(sigfastblock_fetch_always))
(void)sigfastblock_fetch(td);
/* Let system calls set td_errno directly. */
td->td_pflags &= ~TDP_NERRNO;

View File

@ -134,6 +134,7 @@ userret(struct thread *td, struct trapframe *frame)
#ifdef KTRACE
KTRUSERRET(td);
#endif
td_softdep_cleanup(td);
MPASS(td->td_su == NULL);
@ -222,8 +223,7 @@ ast(struct trapframe *framep)
{
struct thread *td;
struct proc *p;
uint32_t oldval;
int flags, sig, res;
int flags, sig;
td = curthread;
p = td->td_proc;
@ -325,11 +325,12 @@ ast(struct trapframe *framep)
*/
if (flags & TDF_NEEDSIGCHK || p->p_pendingcnt > 0 ||
!SIGISEMPTY(p->p_siglist)) {
fetch_sigfastblock(td);
sigfastblock_fetch(td);
PROC_LOCK(p);
mtx_lock(&p->p_sigacts->ps_mtx);
if ((td->td_pflags & TDP_SIGFASTBLOCK) != 0 &&
td->td_sigblock_val != 0) {
sigfastblock_setpend(td);
reschedule_signals(p, fastblock_mask,
SIGPROCMASK_PS_LOCKED | SIGPROCMASK_FASTBLK);
} else {
@ -346,32 +347,8 @@ ast(struct trapframe *framep)
* Handle deferred update of the fast sigblock value, after
* the postsig() loop was performed.
*/
if (td->td_pflags & TDP_SIGFASTPENDING) {
td->td_pflags &= ~TDP_SIGFASTPENDING;
res = fueword32(td->td_sigblock_ptr, &oldval);
if (res == -1) {
fetch_sigfastblock_failed(td, false);
} else {
for (;;) {
oldval |= SIGFASTBLOCK_PEND;
res = casueword32(td->td_sigblock_ptr, oldval,
&oldval, oldval | SIGFASTBLOCK_PEND);
if (res == -1) {
fetch_sigfastblock_failed(td, true);
break;
}
if (res == 0) {
td->td_sigblock_val = oldval &
~SIGFASTBLOCK_FLAGS;
break;
}
MPASS(res == 1);
res = thread_check_susp(td, false);
if (res != 0)
break;
}
}
}
if (td->td_pflags & TDP_SIGFASTPENDING)
sigfastblock_setpend(td);
/*
* We need to check to see if we have to exit or wait due to a

View File

@ -273,6 +273,7 @@ int __sys_sigfastblock(int cmd, void *ptr);
#ifdef _KERNEL
extern sigset_t fastblock_mask;
extern bool sigfastblock_fetch_always;
/* Return nonzero if process p has an unmasked pending signal. */
#define SIGPENDING(td) \
@ -382,8 +383,6 @@ sigallowstop(int prev)
int cursig(struct thread *td);
void execsigs(struct proc *p);
void fetch_sigfastblock(struct thread *td);
void fetch_sigfastblock_failed(struct thread *td, bool write);
void gsignal(int pgid, int sig, ksiginfo_t *ksi);
void killproc(struct proc *p, char *why);
ksiginfo_t * ksiginfo_alloc(int wait);
@ -405,6 +404,9 @@ void sig_drop_caught(struct proc *p);
void sigexit(struct thread *td, int sig) __dead2;
int sigev_findtd(struct proc *p, struct sigevent *sigev, struct thread **);
int sig_ffs(sigset_t *set);
void sigfastblock_clear(struct thread *td);
void sigfastblock_fetch(struct thread *td);
void sigfastblock_setpend(struct thread *td);
void siginit(struct proc *p);
void signotify(struct thread *td);
void sigqueue_delete(struct sigqueue *queue, int sig);