Return correct error code to user-space when a system call receives a

signal in the LinuxKPI.

The read(), write() and mmap() system calls can return either EINTR or
ERESTART upon receiving a signal. Add code to figure out the correct
return value by temporarily storing the return code from the relevant
FreeBSD kernel APIs in the Linux task structure.

MFC after:	3 days
Sponsored by:	Mellanox Technologies
This commit is contained in:
hselasky 2018-02-22 15:29:19 +00:00
parent 2858f9ff6e
commit fdcd80b449
6 changed files with 132 additions and 47 deletions

View File

@ -63,7 +63,7 @@ typedef struct mutex {
#define mutex_lock_interruptible(_m) ({ \
MUTEX_SKIP() ? 0 : \
(sx_xlock_sig(&(_m)->sx) ? -EINTR : 0); \
linux_mutex_lock_interruptible(_m); \
})
#define mutex_unlock(_m) do { \
@ -143,4 +143,6 @@ linux_mutex_destroy(mutex_t *m)
sx_destroy(&m->sx);
}
extern int linux_mutex_lock_interruptible(mutex_t *m);
#endif /* _LINUX_MUTEX_H_ */

View File

@ -47,7 +47,7 @@ struct rw_semaphore {
#define up_read(_rw) sx_sunlock(&(_rw)->sx)
#define down_read_trylock(_rw) !!sx_try_slock(&(_rw)->sx)
#define down_write_trylock(_rw) !!sx_try_xlock(&(_rw)->sx)
#define down_write_killable(_rw) !!sx_xlock_sig(&(_rw)->sx)
#define down_write_killable(_rw) linux_down_write_killable(_rw)
#define downgrade_write(_rw) sx_downgrade(&(_rw)->sx)
#define down_read_nested(_rw, _sc) down_read(_rw)
#define init_rwsem(_rw) linux_init_rwsem(_rw, rwsem_name("lnxrwsem"))
@ -79,4 +79,6 @@ linux_init_rwsem(struct rw_semaphore *rw, const char *name)
sx_init_flags(&rw->sx, name, SX_NOWITNESS);
}
extern int linux_down_write_killable(struct rw_semaphore *);
#endif /* _LINUX_RWSEM_H_ */

View File

@ -2,7 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
* Copyright (c) 2013-2017 Mellanox Technologies, Ltd.
* Copyright (c) 2013-2018 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -77,6 +77,7 @@ struct task_struct {
struct completion exited;
TAILQ_ENTRY(task_struct) rcu_entry;
int rcu_recurse;
int bsd_interrupt_value;
};
#define current ({ \
@ -127,12 +128,26 @@ void linux_send_sig(int signo, struct task_struct *task);
#define signal_pending_state(state, task) \
linux_signal_pending_state(state, task)
#define send_sig(signo, task, priv) do { \
CTASSERT(priv == 0); \
CTASSERT((priv) == 0); \
linux_send_sig(signo, task); \
} while (0)
int linux_schedule_timeout(int timeout);
static inline void
linux_schedule_save_interrupt_value(struct task_struct *task, int value)
{
task->bsd_interrupt_value = value;
}
static inline int
linux_schedule_get_interrupt_value(struct task_struct *task)
{
int value = task->bsd_interrupt_value;
task->bsd_interrupt_value = 0;
return (value);
}
#define schedule() \
(void)linux_schedule_timeout(MAX_SCHEDULE_TIMEOUT)
#define schedule_timeout(timeout) \

View File

@ -2,7 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
* Copyright (c) 2013-2017 Mellanox Technologies, Ltd.
* Copyright (c) 2013-2018 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -828,10 +828,27 @@ linux_access_ok(int rw, const void *uaddr, size_t len)
(eaddr > saddr && eaddr <= VM_MAXUSER_ADDRESS));
}
/*
* This function should return either EINTR or ERESTART depending on
* the signal type sent to this thread:
*/
static int
linux_get_error(struct task_struct *task, int error)
{
/* check for signal type interrupt code */
if (error == EINTR || error == ERESTARTSYS || error == ERESTART) {
error = -linux_schedule_get_interrupt_value(task);
if (error == 0)
error = EINTR;
}
return (error);
}
static int
linux_file_ioctl_sub(struct file *fp, struct linux_file *filp,
u_long cmd, caddr_t data, struct thread *td)
{
struct task_struct *task = current;
unsigned size;
int error;
@ -844,8 +861,8 @@ linux_file_ioctl_sub(struct file *fp, struct linux_file *filp,
* Background: Linux code expects a user-space address
* while FreeBSD supplies a kernel-space address.
*/
current->bsd_ioctl_data = data;
current->bsd_ioctl_len = size;
task->bsd_ioctl_data = data;
task->bsd_ioctl_len = size;
data = (void *)LINUX_IOCTL_MIN_PTR;
} else {
/* fetch user-space pointer */
@ -869,16 +886,17 @@ linux_file_ioctl_sub(struct file *fp, struct linux_file *filp,
else
error = ENOTTY;
if (size > 0) {
current->bsd_ioctl_data = NULL;
current->bsd_ioctl_len = 0;
task->bsd_ioctl_data = NULL;
task->bsd_ioctl_len = 0;
}
if (error == EWOULDBLOCK) {
/* update kqfilter status, if any */
linux_file_kqfilter_poll(filp,
LINUX_KQ_FLAG_HAS_READ | LINUX_KQ_FLAG_HAS_WRITE);
} else if (error == ERESTARTSYS)
error = ERESTART;
} else {
error = linux_get_error(task, error);
}
return (error);
}
@ -1111,6 +1129,7 @@ linux_file_mmap_single(struct file *fp, vm_ooffset_t *offset,
vm_size_t size, struct vm_object **object, int nprot,
struct thread *td)
{
struct task_struct *task;
struct vm_area_struct *vmap;
struct mm_struct *mm;
struct linux_file *filp;
@ -1132,7 +1151,8 @@ linux_file_mmap_single(struct file *fp, vm_ooffset_t *offset,
* The atomic reference below makes sure the mm_struct is
* available as long as the vmap is in the linux_vma_head.
*/
mm = current->mm;
task = current;
mm = task->mm;
if (atomic_inc_not_zero(&mm->mm_users) == 0)
return (EINVAL);
@ -1147,11 +1167,10 @@ linux_file_mmap_single(struct file *fp, vm_ooffset_t *offset,
vmap->vm_mm = mm;
if (unlikely(down_write_killable(&vmap->vm_mm->mmap_sem))) {
error = EINTR;
error = linux_get_error(task, EINTR);
} else {
error = -OPW(fp, td, filp->f_op->mmap(filp, vmap));
if (error == ERESTARTSYS)
error = ERESTART;
error = linux_get_error(task, error);
up_write(&vmap->vm_mm->mmap_sem);
}
@ -1290,9 +1309,7 @@ linux_file_read(struct file *file, struct uio *uio, struct ucred *active_cred,
uio->uio_iov->iov_len -= bytes;
uio->uio_resid -= bytes;
} else {
error = -bytes;
if (error == ERESTARTSYS)
error = ERESTART;
error = linux_get_error(current, -bytes);
}
} else
error = ENXIO;
@ -1329,9 +1346,7 @@ linux_file_write(struct file *file, struct uio *uio, struct ucred *active_cred,
uio->uio_iov->iov_len -= bytes;
uio->uio_resid -= bytes;
} else {
error = -bytes;
if (error == ERESTARTSYS)
error = ERESTART;
error = linux_get_error(current, -bytes);
}
} else
error = ENXIO;
@ -1780,6 +1795,7 @@ linux_complete_common(struct completion *c, int all)
int
linux_wait_for_common(struct completion *c, int flags)
{
struct task_struct *task;
int error;
if (SCHEDULER_STOPPED())
@ -1787,6 +1803,8 @@ linux_wait_for_common(struct completion *c, int flags)
DROP_GIANT();
task = current;
if (flags != 0)
flags = SLEEPQ_INTERRUPTIBLE | SLEEPQ_SLEEP;
else
@ -1798,7 +1816,9 @@ linux_wait_for_common(struct completion *c, int flags)
break;
sleepq_add(c, NULL, "completion", flags, 0);
if (flags & SLEEPQ_INTERRUPTIBLE) {
if (sleepq_wait_sig(c, 0) != 0) {
error = -sleepq_wait_sig(c, 0);
if (error != 0) {
linux_schedule_save_interrupt_value(task, error);
error = -ERESTARTSYS;
goto intr;
}
@ -1820,22 +1840,22 @@ linux_wait_for_common(struct completion *c, int flags)
int
linux_wait_for_timeout_common(struct completion *c, int timeout, int flags)
{
struct task_struct *task;
int end = jiffies + timeout;
int error;
int ret;
if (SCHEDULER_STOPPED())
return (0);
DROP_GIANT();
task = current;
if (flags != 0)
flags = SLEEPQ_INTERRUPTIBLE | SLEEPQ_SLEEP;
else
flags = SLEEPQ_SLEEP;
error = 0;
ret = 0;
for (;;) {
sleepq_lock(c);
if (c->done)
@ -1843,26 +1863,30 @@ linux_wait_for_timeout_common(struct completion *c, int timeout, int flags)
sleepq_add(c, NULL, "completion", flags, 0);
sleepq_set_timeout(c, linux_timer_jiffies_until(end));
if (flags & SLEEPQ_INTERRUPTIBLE)
ret = sleepq_timedwait_sig(c, 0);
error = -sleepq_timedwait_sig(c, 0);
else
ret = sleepq_timedwait(c, 0);
if (ret != 0) {
/* check for timeout or signal */
if (ret == EWOULDBLOCK)
error = 0;
else
error = -sleepq_timedwait(c, 0);
if (error != 0) {
/* check for timeout */
if (error == -EWOULDBLOCK) {
error = 0; /* timeout */
} else {
/* signal happened */
linux_schedule_save_interrupt_value(task, error);
error = -ERESTARTSYS;
goto intr;
}
goto done;
}
}
c->done--;
sleepq_release(c);
intr:
/* return how many jiffies are left */
error = linux_timer_jiffies_until(end);
done:
PICKUP_GIANT();
/* return how many jiffies are left */
return (ret != 0 ? error : linux_timer_jiffies_until(end));
return (error);
}
int

View File

@ -28,6 +28,7 @@
#include <sys/queue.h>
#include <linux/sched.h>
#include <linux/ww_mutex.h>
struct ww_mutex_thread {
@ -72,10 +73,13 @@ linux_ww_unlock(void)
int
linux_ww_mutex_lock_sub(struct ww_mutex *lock, int catch_signal)
{
struct task_struct *task;
struct ww_mutex_thread entry;
struct ww_mutex_thread *other;
int retval = 0;
task = current;
linux_ww_lock();
if (unlikely(sx_try_xlock(&lock->base.sx) == 0)) {
entry.thread = curthread;
@ -105,7 +109,9 @@ linux_ww_mutex_lock_sub(struct ww_mutex *lock, int catch_signal)
}
}
if (catch_signal) {
if (cv_wait_sig(&lock->condvar, &ww_mutex_global) != 0) {
retval = -cv_wait_sig(&lock->condvar, &ww_mutex_global);
if (retval != 0) {
linux_schedule_save_interrupt_value(task, retval);
retval = -EINTR;
goto done;
}
@ -134,3 +140,29 @@ linux_ww_mutex_unlock_sub(struct ww_mutex *lock)
cv_signal(&lock->condvar);
linux_ww_unlock();
}
int
linux_mutex_lock_interruptible(mutex_t *m)
{
int error;
error = -sx_xlock_sig(&m->sx);
if (error != 0) {
linux_schedule_save_interrupt_value(current, error);
error = -EINTR;
}
return (error);
}
int
linux_down_write_killable(struct rw_semaphore *rw)
{
int error;
error = -sx_xlock_sig(&rw->sx);
if (error != 0) {
linux_schedule_save_interrupt_value(current, error);
error = -EINTR;
}
return (error);
}

View File

@ -41,7 +41,8 @@ __FBSDID("$FreeBSD$");
#include <linux/wait.h>
static int
linux_add_to_sleepqueue(void *wchan, const char *wmesg, int timeout, int state)
linux_add_to_sleepqueue(void *wchan, struct task_struct *task,
const char *wmesg, int timeout, int state)
{
int flags, ret;
@ -66,8 +67,10 @@ linux_add_to_sleepqueue(void *wchan, const char *wmesg, int timeout, int state)
ret = -sleepq_timedwait(wchan, 0);
}
/* filter return value */
if (ret != 0 && ret != -EWOULDBLOCK)
if (ret != 0 && ret != -EWOULDBLOCK) {
linux_schedule_save_interrupt_value(task, ret);
ret = -ERESTARTSYS;
}
return (ret);
}
@ -235,10 +238,10 @@ linux_wait_event_common(wait_queue_head_t *wqh, wait_queue_t *wq, int timeout,
PHOLD(task->task_thread->td_proc);
sleepq_lock(task);
if (atomic_read(&task->state) != TASK_WAKING) {
ret = linux_add_to_sleepqueue(task, "wevent", timeout, state);
ret = linux_add_to_sleepqueue(task, task, "wevent", timeout, state);
} else {
sleepq_release(task);
ret = linux_signal_pending_state(state, task) ? -ERESTARTSYS : 0;
ret = 0;
}
PRELE(task->task_thread->td_proc);
@ -253,6 +256,7 @@ int
linux_schedule_timeout(int timeout)
{
struct task_struct *task;
int ret;
int state;
int remainder;
@ -270,10 +274,12 @@ linux_schedule_timeout(int timeout)
sleepq_lock(task);
state = atomic_read(&task->state);
if (state != TASK_WAKING)
(void)linux_add_to_sleepqueue(task, "sched", timeout, state);
else
if (state != TASK_WAKING) {
ret = linux_add_to_sleepqueue(task, task, "sched", timeout, state);
} else {
sleepq_release(task);
ret = 0;
}
set_task_state(task, TASK_RUNNING);
PICKUP_GIANT();
@ -283,7 +289,11 @@ linux_schedule_timeout(int timeout)
/* range check return value */
remainder -= ticks;
if (remainder < 0)
/* range check return value */
if (ret == -ERESTARTSYS && remainder < 1)
remainder = 1;
else if (remainder < 0)
remainder = 0;
else if (remainder > timeout)
remainder = timeout;
@ -337,7 +347,7 @@ linux_wait_on_bit_timeout(unsigned long *word, int bit, unsigned int state,
break;
}
set_task_state(task, state);
ret = linux_add_to_sleepqueue(wchan, "wbit", timeout, state);
ret = linux_add_to_sleepqueue(wchan, task, "wbit", timeout, state);
if (ret != 0)
break;
}
@ -374,7 +384,7 @@ linux_wait_on_atomic_t(atomic_t *a, unsigned int state)
break;
}
set_task_state(task, state);
ret = linux_add_to_sleepqueue(wchan, "watomic", 0, state);
ret = linux_add_to_sleepqueue(wchan, task, "watomic", 0, state);
if (ret != 0)
break;
}