d87c627c80
The set_robust_list system call request the kernel to record the head of the list of robust futexes owned by the calling thread. The head argument is the list head to record. The get_robust_list system call should return the head of the robust list of the thread whose thread id is specified in pid argument. The list head should be stored in the location pointed to by head argument. In contrast, our implemenattion of get_robust_list system call copies the known portion of memory pointed by recorded in set_robust_list system call pointer to the head of the robust list to the location pointed by head argument. So, it is possible for a local attacker to read portions of kernel memory, which may result in a privilege escalation. Submitted by: mjg Security: SA-16:03.linux
1281 lines
37 KiB
C
1281 lines
37 KiB
C
/* $NetBSD: linux_futex.c,v 1.7 2006/07/24 19:01:49 manu Exp $ */
|
|
|
|
/*-
|
|
* Copyright (c) 2005 Emmanuel Dreyfus, all rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* 3. All advertising materials mentioning features or use of this software
|
|
* must display the following acknowledgement:
|
|
* This product includes software developed by Emmanuel Dreyfus
|
|
* 4. The name of the author may not be used to endorse or promote
|
|
* products derived from this software without specific prior written
|
|
* permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE THE AUTHOR AND CONTRIBUTORS ``AS IS''
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
|
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
|
|
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#include <sys/cdefs.h>
|
|
__FBSDID("$FreeBSD$");
|
|
#if 0
|
|
__KERNEL_RCSID(1, "$NetBSD: linux_futex.c,v 1.7 2006/07/24 19:01:49 manu Exp $");
|
|
#endif
|
|
|
|
#include "opt_compat.h"
|
|
|
|
#include <sys/param.h>
|
|
#include <sys/systm.h>
|
|
#include <sys/imgact.h>
|
|
#include <sys/kernel.h>
|
|
#include <sys/ktr.h>
|
|
#include <sys/lock.h>
|
|
#include <sys/malloc.h>
|
|
#include <sys/mutex.h>
|
|
#include <sys/priv.h>
|
|
#include <sys/proc.h>
|
|
#include <sys/queue.h>
|
|
#include <sys/sched.h>
|
|
#include <sys/sdt.h>
|
|
#include <sys/sx.h>
|
|
#include <sys/umtx.h>
|
|
|
|
#ifdef COMPAT_LINUX32
|
|
#include <machine/../linux32/linux.h>
|
|
#include <machine/../linux32/linux32_proto.h>
|
|
#else
|
|
#include <machine/../linux/linux.h>
|
|
#include <machine/../linux/linux_proto.h>
|
|
#endif
|
|
#include <compat/linux/linux_dtrace.h>
|
|
#include <compat/linux/linux_emul.h>
|
|
#include <compat/linux/linux_futex.h>
|
|
#include <compat/linux/linux_timer.h>
|
|
#include <compat/linux/linux_util.h>
|
|
|
|
/* DTrace init */
|
|
LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE);
|
|
|
|
/**
|
|
* Futex part for the special DTrace module "locks".
|
|
*/
|
|
LIN_SDT_PROBE_DEFINE1(locks, futex_mtx, locked, "struct mtx *");
|
|
LIN_SDT_PROBE_DEFINE1(locks, futex_mtx, unlock, "struct mtx *");
|
|
|
|
/**
|
|
* Per futex probes.
|
|
*/
|
|
LIN_SDT_PROBE_DEFINE1(futex, futex, create, "struct sx *");
|
|
LIN_SDT_PROBE_DEFINE1(futex, futex, destroy, "struct sx *");
|
|
|
|
/**
|
|
* DTrace probes in this module.
|
|
*/
|
|
LIN_SDT_PROBE_DEFINE2(futex, futex_put, entry, "struct futex *",
|
|
"struct waiting_proc *");
|
|
LIN_SDT_PROBE_DEFINE3(futex, futex_put, destroy, "uint32_t *", "uint32_t",
|
|
"int");
|
|
LIN_SDT_PROBE_DEFINE3(futex, futex_put, unlock, "uint32_t *", "uint32_t",
|
|
"int");
|
|
LIN_SDT_PROBE_DEFINE0(futex, futex_put, return);
|
|
LIN_SDT_PROBE_DEFINE3(futex, futex_get0, entry, "uint32_t *", "struct futex **",
|
|
"uint32_t");
|
|
LIN_SDT_PROBE_DEFINE1(futex, futex_get0, umtx_key_get_error, "int");
|
|
LIN_SDT_PROBE_DEFINE3(futex, futex_get0, shared, "uint32_t *", "uint32_t",
|
|
"int");
|
|
LIN_SDT_PROBE_DEFINE1(futex, futex_get0, null, "uint32_t *");
|
|
LIN_SDT_PROBE_DEFINE3(futex, futex_get0, new, "uint32_t *", "uint32_t", "int");
|
|
LIN_SDT_PROBE_DEFINE1(futex, futex_get0, return, "int");
|
|
LIN_SDT_PROBE_DEFINE3(futex, futex_get, entry, "uint32_t *",
|
|
"struct waiting_proc **", "struct futex **");
|
|
LIN_SDT_PROBE_DEFINE0(futex, futex_get, error);
|
|
LIN_SDT_PROBE_DEFINE1(futex, futex_get, return, "int");
|
|
LIN_SDT_PROBE_DEFINE3(futex, futex_sleep, entry, "struct futex *",
|
|
"struct waiting_proc **", "int");
|
|
LIN_SDT_PROBE_DEFINE5(futex, futex_sleep, requeue_error, "int", "uint32_t *",
|
|
"struct waiting_proc *", "uint32_t *", "uint32_t");
|
|
LIN_SDT_PROBE_DEFINE3(futex, futex_sleep, sleep_error, "int", "uint32_t *",
|
|
"struct waiting_proc *");
|
|
LIN_SDT_PROBE_DEFINE1(futex, futex_sleep, return, "int");
|
|
LIN_SDT_PROBE_DEFINE3(futex, futex_wake, entry, "struct futex *", "int",
|
|
"uint32_t");
|
|
LIN_SDT_PROBE_DEFINE3(futex, futex_wake, iterate, "uint32_t",
|
|
"struct waiting_proc *", "uint32_t");
|
|
LIN_SDT_PROBE_DEFINE1(futex, futex_wake, wakeup, "struct waiting_proc *");
|
|
LIN_SDT_PROBE_DEFINE1(futex, futex_wake, return, "int");
|
|
LIN_SDT_PROBE_DEFINE4(futex, futex_requeue, entry, "struct futex *", "int",
|
|
"struct futex *", "int");
|
|
LIN_SDT_PROBE_DEFINE1(futex, futex_requeue, wakeup, "struct waiting_proc *");
|
|
LIN_SDT_PROBE_DEFINE3(futex, futex_requeue, requeue, "uint32_t *",
|
|
"struct waiting_proc *", "uint32_t");
|
|
LIN_SDT_PROBE_DEFINE1(futex, futex_requeue, return, "int");
|
|
LIN_SDT_PROBE_DEFINE4(futex, futex_wait, entry, "struct futex *",
|
|
"struct waiting_proc **", "int", "uint32_t");
|
|
LIN_SDT_PROBE_DEFINE1(futex, futex_wait, sleep_error, "int");
|
|
LIN_SDT_PROBE_DEFINE1(futex, futex_wait, return, "int");
|
|
LIN_SDT_PROBE_DEFINE3(futex, futex_atomic_op, entry, "struct thread *",
|
|
"int", "uint32_t");
|
|
LIN_SDT_PROBE_DEFINE4(futex, futex_atomic_op, decoded_op, "int", "int", "int",
|
|
"int");
|
|
LIN_SDT_PROBE_DEFINE0(futex, futex_atomic_op, missing_access_check);
|
|
LIN_SDT_PROBE_DEFINE1(futex, futex_atomic_op, unimplemented_op, "int");
|
|
LIN_SDT_PROBE_DEFINE1(futex, futex_atomic_op, unimplemented_cmp, "int");
|
|
LIN_SDT_PROBE_DEFINE1(futex, futex_atomic_op, return, "int");
|
|
LIN_SDT_PROBE_DEFINE2(futex, linux_sys_futex, entry, "struct thread *",
|
|
"struct linux_sys_futex_args *");
|
|
LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_clockswitch);
|
|
LIN_SDT_PROBE_DEFINE1(futex, linux_sys_futex, itimerfix_error, "int");
|
|
LIN_SDT_PROBE_DEFINE1(futex, linux_sys_futex, copyin_error, "int");
|
|
LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, invalid_cmp_requeue_use);
|
|
LIN_SDT_PROBE_DEFINE3(futex, linux_sys_futex, debug_wait, "uint32_t *",
|
|
"uint32_t", "uint32_t");
|
|
LIN_SDT_PROBE_DEFINE4(futex, linux_sys_futex, debug_wait_value_neq,
|
|
"uint32_t *", "uint32_t", "int", "uint32_t");
|
|
LIN_SDT_PROBE_DEFINE3(futex, linux_sys_futex, debug_wake, "uint32_t *",
|
|
"uint32_t", "uint32_t");
|
|
LIN_SDT_PROBE_DEFINE5(futex, linux_sys_futex, debug_cmp_requeue, "uint32_t *",
|
|
"uint32_t", "uint32_t", "uint32_t *", "struct l_timespec *");
|
|
LIN_SDT_PROBE_DEFINE2(futex, linux_sys_futex, debug_cmp_requeue_value_neq,
|
|
"uint32_t", "int");
|
|
LIN_SDT_PROBE_DEFINE5(futex, linux_sys_futex, debug_wake_op, "uint32_t *",
|
|
"int", "uint32_t", "uint32_t *", "uint32_t");
|
|
LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unhandled_efault);
|
|
LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_lock_pi);
|
|
LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_unlock_pi);
|
|
LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_trylock_pi);
|
|
LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, deprecated_requeue);
|
|
LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_wait_requeue_pi);
|
|
LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_cmp_requeue_pi);
|
|
LIN_SDT_PROBE_DEFINE1(futex, linux_sys_futex, unknown_operation, "int");
|
|
LIN_SDT_PROBE_DEFINE1(futex, linux_sys_futex, return, "int");
|
|
LIN_SDT_PROBE_DEFINE2(futex, linux_set_robust_list, entry, "struct thread *",
|
|
"struct linux_set_robust_list_args *");
|
|
LIN_SDT_PROBE_DEFINE0(futex, linux_set_robust_list, size_error);
|
|
LIN_SDT_PROBE_DEFINE1(futex, linux_set_robust_list, return, "int");
|
|
LIN_SDT_PROBE_DEFINE2(futex, linux_get_robust_list, entry, "struct thread *",
|
|
"struct linux_get_robust_list_args *");
|
|
LIN_SDT_PROBE_DEFINE1(futex, linux_get_robust_list, copyout_error, "int");
|
|
LIN_SDT_PROBE_DEFINE1(futex, linux_get_robust_list, return, "int");
|
|
LIN_SDT_PROBE_DEFINE3(futex, handle_futex_death, entry,
|
|
"struct linux_emuldata *", "uint32_t *", "unsigned int");
|
|
LIN_SDT_PROBE_DEFINE1(futex, handle_futex_death, copyin_error, "int");
|
|
LIN_SDT_PROBE_DEFINE1(futex, handle_futex_death, return, "int");
|
|
LIN_SDT_PROBE_DEFINE3(futex, fetch_robust_entry, entry,
|
|
"struct linux_robust_list **", "struct linux_robust_list **",
|
|
"unsigned int *");
|
|
LIN_SDT_PROBE_DEFINE1(futex, fetch_robust_entry, copyin_error, "int");
|
|
LIN_SDT_PROBE_DEFINE1(futex, fetch_robust_entry, return, "int");
|
|
LIN_SDT_PROBE_DEFINE2(futex, release_futexes, entry, "struct thread *",
|
|
"struct linux_emuldata *");
|
|
LIN_SDT_PROBE_DEFINE1(futex, release_futexes, copyin_error, "int");
|
|
LIN_SDT_PROBE_DEFINE0(futex, release_futexes, return);
|
|
|
|
struct futex;
|
|
|
|
struct waiting_proc {
|
|
uint32_t wp_flags;
|
|
struct futex *wp_futex;
|
|
TAILQ_ENTRY(waiting_proc) wp_list;
|
|
};
|
|
|
|
struct futex {
|
|
struct sx f_lck;
|
|
uint32_t *f_uaddr; /* user-supplied value, for debug */
|
|
struct umtx_key f_key;
|
|
uint32_t f_refcount;
|
|
uint32_t f_bitset;
|
|
LIST_ENTRY(futex) f_list;
|
|
TAILQ_HEAD(lf_waiting_proc, waiting_proc) f_waiting_proc;
|
|
};
|
|
|
|
struct futex_list futex_list;
|
|
|
|
#define FUTEX_LOCK(f) sx_xlock(&(f)->f_lck)
|
|
#define FUTEX_UNLOCK(f) sx_xunlock(&(f)->f_lck)
|
|
#define FUTEX_INIT(f) do { \
|
|
sx_init_flags(&(f)->f_lck, "ftlk", \
|
|
SX_DUPOK); \
|
|
LIN_SDT_PROBE1(futex, futex, create, \
|
|
&(f)->f_lck); \
|
|
} while (0)
|
|
#define FUTEX_DESTROY(f) do { \
|
|
LIN_SDT_PROBE1(futex, futex, destroy, \
|
|
&(f)->f_lck); \
|
|
sx_destroy(&(f)->f_lck); \
|
|
} while (0)
|
|
#define FUTEX_ASSERT_LOCKED(f) sx_assert(&(f)->f_lck, SA_XLOCKED)
|
|
|
|
struct mtx futex_mtx; /* protects the futex list */
|
|
#define FUTEXES_LOCK do { \
|
|
mtx_lock(&futex_mtx); \
|
|
LIN_SDT_PROBE1(locks, futex_mtx, \
|
|
locked, &futex_mtx); \
|
|
} while (0)
|
|
#define FUTEXES_UNLOCK do { \
|
|
LIN_SDT_PROBE1(locks, futex_mtx, \
|
|
unlock, &futex_mtx); \
|
|
mtx_unlock(&futex_mtx); \
|
|
} while (0)
|
|
|
|
/* flags for futex_get() */
|
|
#define FUTEX_CREATE_WP 0x1 /* create waiting_proc */
|
|
#define FUTEX_DONTCREATE 0x2 /* don't create futex if not exists */
|
|
#define FUTEX_DONTEXISTS 0x4 /* return EINVAL if futex exists */
|
|
#define FUTEX_SHARED 0x8 /* shared futex */
|
|
|
|
/* wp_flags */
|
|
#define FUTEX_WP_REQUEUED 0x1 /* wp requeued - wp moved from wp_list
|
|
* of futex where thread sleep to wp_list
|
|
* of another futex.
|
|
*/
|
|
#define FUTEX_WP_REMOVED 0x2 /* wp is woken up and removed from futex
|
|
* wp_list to prevent double wakeup.
|
|
*/
|
|
|
|
static void futex_put(struct futex *, struct waiting_proc *);
|
|
static int futex_get0(uint32_t *, struct futex **f, uint32_t);
|
|
static int futex_get(uint32_t *, struct waiting_proc **, struct futex **,
|
|
uint32_t);
|
|
static int futex_sleep(struct futex *, struct waiting_proc *, int);
|
|
static int futex_wake(struct futex *, int, uint32_t);
|
|
static int futex_requeue(struct futex *, int, struct futex *, int);
|
|
static int futex_wait(struct futex *, struct waiting_proc *, int,
|
|
uint32_t);
|
|
static int futex_atomic_op(struct thread *, int, uint32_t *);
|
|
static int handle_futex_death(struct linux_emuldata *, uint32_t *,
|
|
unsigned int);
|
|
static int fetch_robust_entry(struct linux_robust_list **,
|
|
struct linux_robust_list **, unsigned int *);
|
|
|
|
/* support.s */
|
|
int futex_xchgl(int oparg, uint32_t *uaddr, int *oldval);
|
|
int futex_addl(int oparg, uint32_t *uaddr, int *oldval);
|
|
int futex_orl(int oparg, uint32_t *uaddr, int *oldval);
|
|
int futex_andl(int oparg, uint32_t *uaddr, int *oldval);
|
|
int futex_xorl(int oparg, uint32_t *uaddr, int *oldval);
|
|
|
|
|
|
static void
|
|
futex_put(struct futex *f, struct waiting_proc *wp)
|
|
{
|
|
LIN_SDT_PROBE2(futex, futex_put, entry, f, wp);
|
|
|
|
FUTEX_ASSERT_LOCKED(f);
|
|
if (wp != NULL) {
|
|
if ((wp->wp_flags & FUTEX_WP_REMOVED) == 0)
|
|
TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list);
|
|
free(wp, M_FUTEX_WP);
|
|
}
|
|
|
|
FUTEXES_LOCK;
|
|
if (--f->f_refcount == 0) {
|
|
LIST_REMOVE(f, f_list);
|
|
FUTEXES_UNLOCK;
|
|
FUTEX_UNLOCK(f);
|
|
|
|
LIN_SDT_PROBE3(futex, futex_put, destroy, f->f_uaddr,
|
|
f->f_refcount, f->f_key.shared);
|
|
LINUX_CTR3(sys_futex, "futex_put destroy uaddr %p ref %d "
|
|
"shared %d", f->f_uaddr, f->f_refcount, f->f_key.shared);
|
|
umtx_key_release(&f->f_key);
|
|
FUTEX_DESTROY(f);
|
|
free(f, M_FUTEX);
|
|
|
|
LIN_SDT_PROBE0(futex, futex_put, return);
|
|
return;
|
|
}
|
|
|
|
LIN_SDT_PROBE3(futex, futex_put, unlock, f->f_uaddr, f->f_refcount,
|
|
f->f_key.shared);
|
|
LINUX_CTR3(sys_futex, "futex_put uaddr %p ref %d shared %d",
|
|
f->f_uaddr, f->f_refcount, f->f_key.shared);
|
|
FUTEXES_UNLOCK;
|
|
FUTEX_UNLOCK(f);
|
|
|
|
LIN_SDT_PROBE0(futex, futex_put, return);
|
|
}
|
|
|
|
static int
|
|
futex_get0(uint32_t *uaddr, struct futex **newf, uint32_t flags)
|
|
{
|
|
struct futex *f, *tmpf;
|
|
struct umtx_key key;
|
|
int error;
|
|
|
|
LIN_SDT_PROBE3(futex, futex_get0, entry, uaddr, newf, flags);
|
|
|
|
*newf = tmpf = NULL;
|
|
|
|
error = umtx_key_get(uaddr, TYPE_FUTEX, (flags & FUTEX_SHARED) ?
|
|
AUTO_SHARE : THREAD_SHARE, &key);
|
|
if (error) {
|
|
LIN_SDT_PROBE1(futex, futex_get0, umtx_key_get_error, error);
|
|
LIN_SDT_PROBE1(futex, futex_get0, return, error);
|
|
return (error);
|
|
}
|
|
retry:
|
|
FUTEXES_LOCK;
|
|
LIST_FOREACH(f, &futex_list, f_list) {
|
|
if (umtx_key_match(&f->f_key, &key)) {
|
|
if (tmpf != NULL) {
|
|
FUTEX_UNLOCK(tmpf);
|
|
FUTEX_DESTROY(tmpf);
|
|
free(tmpf, M_FUTEX);
|
|
}
|
|
if (flags & FUTEX_DONTEXISTS) {
|
|
FUTEXES_UNLOCK;
|
|
umtx_key_release(&key);
|
|
|
|
LIN_SDT_PROBE1(futex, futex_get0, return,
|
|
EINVAL);
|
|
return (EINVAL);
|
|
}
|
|
|
|
/*
|
|
* Increment refcount of the found futex to
|
|
* prevent it from deallocation before FUTEX_LOCK()
|
|
*/
|
|
++f->f_refcount;
|
|
FUTEXES_UNLOCK;
|
|
umtx_key_release(&key);
|
|
|
|
FUTEX_LOCK(f);
|
|
*newf = f;
|
|
LIN_SDT_PROBE3(futex, futex_get0, shared, uaddr,
|
|
f->f_refcount, f->f_key.shared);
|
|
LINUX_CTR3(sys_futex, "futex_get uaddr %p ref %d shared %d",
|
|
uaddr, f->f_refcount, f->f_key.shared);
|
|
|
|
LIN_SDT_PROBE1(futex, futex_get0, return, 0);
|
|
return (0);
|
|
}
|
|
}
|
|
|
|
if (flags & FUTEX_DONTCREATE) {
|
|
FUTEXES_UNLOCK;
|
|
umtx_key_release(&key);
|
|
LIN_SDT_PROBE1(futex, futex_get0, null, uaddr);
|
|
LINUX_CTR1(sys_futex, "futex_get uaddr %p null", uaddr);
|
|
|
|
LIN_SDT_PROBE1(futex, futex_get0, return, 0);
|
|
return (0);
|
|
}
|
|
|
|
if (tmpf == NULL) {
|
|
FUTEXES_UNLOCK;
|
|
tmpf = malloc(sizeof(*tmpf), M_FUTEX, M_WAITOK | M_ZERO);
|
|
tmpf->f_uaddr = uaddr;
|
|
tmpf->f_key = key;
|
|
tmpf->f_refcount = 1;
|
|
tmpf->f_bitset = FUTEX_BITSET_MATCH_ANY;
|
|
FUTEX_INIT(tmpf);
|
|
TAILQ_INIT(&tmpf->f_waiting_proc);
|
|
|
|
/*
|
|
* Lock the new futex before an insert into the futex_list
|
|
* to prevent futex usage by other.
|
|
*/
|
|
FUTEX_LOCK(tmpf);
|
|
goto retry;
|
|
}
|
|
|
|
LIST_INSERT_HEAD(&futex_list, tmpf, f_list);
|
|
FUTEXES_UNLOCK;
|
|
|
|
LIN_SDT_PROBE3(futex, futex_get0, new, uaddr, tmpf->f_refcount,
|
|
tmpf->f_key.shared);
|
|
LINUX_CTR3(sys_futex, "futex_get uaddr %p ref %d shared %d new",
|
|
uaddr, tmpf->f_refcount, tmpf->f_key.shared);
|
|
*newf = tmpf;
|
|
|
|
LIN_SDT_PROBE1(futex, futex_get0, return, 0);
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
futex_get(uint32_t *uaddr, struct waiting_proc **wp, struct futex **f,
|
|
uint32_t flags)
|
|
{
|
|
int error;
|
|
|
|
LIN_SDT_PROBE3(futex, futex_get, entry, uaddr, wp, f);
|
|
|
|
if (flags & FUTEX_CREATE_WP) {
|
|
*wp = malloc(sizeof(struct waiting_proc), M_FUTEX_WP, M_WAITOK);
|
|
(*wp)->wp_flags = 0;
|
|
}
|
|
error = futex_get0(uaddr, f, flags);
|
|
if (error) {
|
|
LIN_SDT_PROBE0(futex, futex_get, error);
|
|
|
|
if (flags & FUTEX_CREATE_WP)
|
|
free(*wp, M_FUTEX_WP);
|
|
|
|
LIN_SDT_PROBE1(futex, futex_get, return, error);
|
|
return (error);
|
|
}
|
|
if (flags & FUTEX_CREATE_WP) {
|
|
TAILQ_INSERT_HEAD(&(*f)->f_waiting_proc, *wp, wp_list);
|
|
(*wp)->wp_futex = *f;
|
|
}
|
|
|
|
LIN_SDT_PROBE1(futex, futex_get, return, error);
|
|
return (error);
|
|
}
|
|
|
|
static int
|
|
futex_sleep(struct futex *f, struct waiting_proc *wp, int timeout)
|
|
{
|
|
int error;
|
|
|
|
FUTEX_ASSERT_LOCKED(f);
|
|
LIN_SDT_PROBE3(futex, futex_sleep, entry, f, wp, timeout);
|
|
LINUX_CTR4(sys_futex, "futex_sleep enter uaddr %p wp %p timo %d ref %d",
|
|
f->f_uaddr, wp, timeout, f->f_refcount);
|
|
error = sx_sleep(wp, &f->f_lck, PCATCH, "futex", timeout);
|
|
if (wp->wp_flags & FUTEX_WP_REQUEUED) {
|
|
KASSERT(f != wp->wp_futex, ("futex != wp_futex"));
|
|
|
|
if (error) {
|
|
LIN_SDT_PROBE5(futex, futex_sleep, requeue_error, error,
|
|
f->f_uaddr, wp, wp->wp_futex->f_uaddr,
|
|
wp->wp_futex->f_refcount);
|
|
}
|
|
|
|
LINUX_CTR5(sys_futex, "futex_sleep out error %d uaddr %p wp"
|
|
" %p requeued uaddr %p ref %d",
|
|
error, f->f_uaddr, wp, wp->wp_futex->f_uaddr,
|
|
wp->wp_futex->f_refcount);
|
|
futex_put(f, NULL);
|
|
f = wp->wp_futex;
|
|
FUTEX_LOCK(f);
|
|
} else {
|
|
if (error) {
|
|
LIN_SDT_PROBE3(futex, futex_sleep, sleep_error, error,
|
|
f->f_uaddr, wp);
|
|
}
|
|
LINUX_CTR3(sys_futex, "futex_sleep out error %d uaddr %p wp %p",
|
|
error, f->f_uaddr, wp);
|
|
}
|
|
|
|
futex_put(f, wp);
|
|
|
|
LIN_SDT_PROBE1(futex, futex_sleep, return, error);
|
|
return (error);
|
|
}
|
|
|
|
static int
|
|
futex_wake(struct futex *f, int n, uint32_t bitset)
|
|
{
|
|
struct waiting_proc *wp, *wpt;
|
|
int count = 0;
|
|
|
|
LIN_SDT_PROBE3(futex, futex_wake, entry, f, n, bitset);
|
|
|
|
if (bitset == 0) {
|
|
LIN_SDT_PROBE1(futex, futex_wake, return, EINVAL);
|
|
return (EINVAL);
|
|
}
|
|
|
|
FUTEX_ASSERT_LOCKED(f);
|
|
TAILQ_FOREACH_SAFE(wp, &f->f_waiting_proc, wp_list, wpt) {
|
|
LIN_SDT_PROBE3(futex, futex_wake, iterate, f->f_uaddr, wp,
|
|
f->f_refcount);
|
|
LINUX_CTR3(sys_futex, "futex_wake uaddr %p wp %p ref %d",
|
|
f->f_uaddr, wp, f->f_refcount);
|
|
/*
|
|
* Unless we find a matching bit in
|
|
* the bitset, continue searching.
|
|
*/
|
|
if (!(wp->wp_futex->f_bitset & bitset))
|
|
continue;
|
|
|
|
wp->wp_flags |= FUTEX_WP_REMOVED;
|
|
TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list);
|
|
LIN_SDT_PROBE1(futex, futex_wake, wakeup, wp);
|
|
wakeup_one(wp);
|
|
if (++count == n)
|
|
break;
|
|
}
|
|
|
|
LIN_SDT_PROBE1(futex, futex_wake, return, count);
|
|
return (count);
|
|
}
|
|
|
|
static int
|
|
futex_requeue(struct futex *f, int n, struct futex *f2, int n2)
|
|
{
|
|
struct waiting_proc *wp, *wpt;
|
|
int count = 0;
|
|
|
|
LIN_SDT_PROBE4(futex, futex_requeue, entry, f, n, f2, n2);
|
|
|
|
FUTEX_ASSERT_LOCKED(f);
|
|
FUTEX_ASSERT_LOCKED(f2);
|
|
|
|
TAILQ_FOREACH_SAFE(wp, &f->f_waiting_proc, wp_list, wpt) {
|
|
if (++count <= n) {
|
|
LINUX_CTR2(sys_futex, "futex_req_wake uaddr %p wp %p",
|
|
f->f_uaddr, wp);
|
|
wp->wp_flags |= FUTEX_WP_REMOVED;
|
|
TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list);
|
|
LIN_SDT_PROBE1(futex, futex_requeue, wakeup, wp);
|
|
wakeup_one(wp);
|
|
} else {
|
|
LIN_SDT_PROBE3(futex, futex_requeue, requeue,
|
|
f->f_uaddr, wp, f2->f_uaddr);
|
|
LINUX_CTR3(sys_futex, "futex_requeue uaddr %p wp %p to %p",
|
|
f->f_uaddr, wp, f2->f_uaddr);
|
|
wp->wp_flags |= FUTEX_WP_REQUEUED;
|
|
/* Move wp to wp_list of f2 futex */
|
|
TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list);
|
|
TAILQ_INSERT_HEAD(&f2->f_waiting_proc, wp, wp_list);
|
|
|
|
/*
|
|
* Thread which sleeps on wp after waking should
|
|
* acquire f2 lock, so increment refcount of f2 to
|
|
* prevent it from premature deallocation.
|
|
*/
|
|
wp->wp_futex = f2;
|
|
FUTEXES_LOCK;
|
|
++f2->f_refcount;
|
|
FUTEXES_UNLOCK;
|
|
if (count - n >= n2)
|
|
break;
|
|
}
|
|
}
|
|
|
|
LIN_SDT_PROBE1(futex, futex_requeue, return, count);
|
|
return (count);
|
|
}
|
|
|
|
static int
|
|
futex_wait(struct futex *f, struct waiting_proc *wp, int timeout_hz,
|
|
uint32_t bitset)
|
|
{
|
|
int error;
|
|
|
|
LIN_SDT_PROBE4(futex, futex_wait, entry, f, wp, timeout_hz, bitset);
|
|
|
|
if (bitset == 0) {
|
|
LIN_SDT_PROBE1(futex, futex_wait, return, EINVAL);
|
|
return (EINVAL);
|
|
}
|
|
|
|
f->f_bitset = bitset;
|
|
error = futex_sleep(f, wp, timeout_hz);
|
|
if (error)
|
|
LIN_SDT_PROBE1(futex, futex_wait, sleep_error, error);
|
|
if (error == EWOULDBLOCK)
|
|
error = ETIMEDOUT;
|
|
|
|
LIN_SDT_PROBE1(futex, futex_wait, return, error);
|
|
return (error);
|
|
}
|
|
|
|
static int
|
|
futex_atomic_op(struct thread *td, int encoded_op, uint32_t *uaddr)
|
|
{
|
|
int op = (encoded_op >> 28) & 7;
|
|
int cmp = (encoded_op >> 24) & 15;
|
|
int oparg = (encoded_op << 8) >> 20;
|
|
int cmparg = (encoded_op << 20) >> 20;
|
|
int oldval = 0, ret;
|
|
|
|
LIN_SDT_PROBE3(futex, futex_atomic_op, entry, td, encoded_op, uaddr);
|
|
|
|
if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
|
|
oparg = 1 << oparg;
|
|
|
|
LIN_SDT_PROBE4(futex, futex_atomic_op, decoded_op, op, cmp, oparg,
|
|
cmparg);
|
|
|
|
/* XXX: Linux verifies access here and returns EFAULT */
|
|
LIN_SDT_PROBE0(futex, futex_atomic_op, missing_access_check);
|
|
|
|
switch (op) {
|
|
case FUTEX_OP_SET:
|
|
ret = futex_xchgl(oparg, uaddr, &oldval);
|
|
break;
|
|
case FUTEX_OP_ADD:
|
|
ret = futex_addl(oparg, uaddr, &oldval);
|
|
break;
|
|
case FUTEX_OP_OR:
|
|
ret = futex_orl(oparg, uaddr, &oldval);
|
|
break;
|
|
case FUTEX_OP_ANDN:
|
|
ret = futex_andl(~oparg, uaddr, &oldval);
|
|
break;
|
|
case FUTEX_OP_XOR:
|
|
ret = futex_xorl(oparg, uaddr, &oldval);
|
|
break;
|
|
default:
|
|
LIN_SDT_PROBE1(futex, futex_atomic_op, unimplemented_op, op);
|
|
ret = -ENOSYS;
|
|
break;
|
|
}
|
|
|
|
if (ret) {
|
|
LIN_SDT_PROBE1(futex, futex_atomic_op, return, ret);
|
|
return (ret);
|
|
}
|
|
|
|
switch (cmp) {
|
|
case FUTEX_OP_CMP_EQ:
|
|
ret = (oldval == cmparg);
|
|
break;
|
|
case FUTEX_OP_CMP_NE:
|
|
ret = (oldval != cmparg);
|
|
break;
|
|
case FUTEX_OP_CMP_LT:
|
|
ret = (oldval < cmparg);
|
|
break;
|
|
case FUTEX_OP_CMP_GE:
|
|
ret = (oldval >= cmparg);
|
|
break;
|
|
case FUTEX_OP_CMP_LE:
|
|
ret = (oldval <= cmparg);
|
|
break;
|
|
case FUTEX_OP_CMP_GT:
|
|
ret = (oldval > cmparg);
|
|
break;
|
|
default:
|
|
LIN_SDT_PROBE1(futex, futex_atomic_op, unimplemented_cmp, cmp);
|
|
ret = -ENOSYS;
|
|
}
|
|
|
|
LIN_SDT_PROBE1(futex, futex_atomic_op, return, ret);
|
|
return (ret);
|
|
}
|
|
|
|
int
|
|
linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args)
|
|
{
|
|
int clockrt, nrwake, op_ret, ret;
|
|
struct linux_pemuldata *pem;
|
|
struct waiting_proc *wp;
|
|
struct futex *f, *f2;
|
|
struct l_timespec ltimeout;
|
|
struct timespec timeout;
|
|
struct timeval utv, ctv;
|
|
int timeout_hz;
|
|
int error;
|
|
uint32_t flags, val;
|
|
|
|
LIN_SDT_PROBE2(futex, linux_sys_futex, entry, td, args);
|
|
|
|
if (args->op & LINUX_FUTEX_PRIVATE_FLAG) {
|
|
flags = 0;
|
|
args->op &= ~LINUX_FUTEX_PRIVATE_FLAG;
|
|
} else
|
|
flags = FUTEX_SHARED;
|
|
|
|
/*
|
|
* Currently support for switching between CLOCK_MONOTONIC and
|
|
* CLOCK_REALTIME is not present. However Linux forbids the use of
|
|
* FUTEX_CLOCK_REALTIME with any op except FUTEX_WAIT_BITSET and
|
|
* FUTEX_WAIT_REQUEUE_PI.
|
|
*/
|
|
clockrt = args->op & LINUX_FUTEX_CLOCK_REALTIME;
|
|
args->op = args->op & ~LINUX_FUTEX_CLOCK_REALTIME;
|
|
if (clockrt && args->op != LINUX_FUTEX_WAIT_BITSET &&
|
|
args->op != LINUX_FUTEX_WAIT_REQUEUE_PI) {
|
|
LIN_SDT_PROBE0(futex, linux_sys_futex,
|
|
unimplemented_clockswitch);
|
|
LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS);
|
|
return (ENOSYS);
|
|
}
|
|
|
|
error = 0;
|
|
f = f2 = NULL;
|
|
|
|
switch (args->op) {
|
|
case LINUX_FUTEX_WAIT:
|
|
args->val3 = FUTEX_BITSET_MATCH_ANY;
|
|
/* FALLTHROUGH */
|
|
|
|
case LINUX_FUTEX_WAIT_BITSET:
|
|
LIN_SDT_PROBE3(futex, linux_sys_futex, debug_wait, args->uaddr,
|
|
args->val, args->val3);
|
|
LINUX_CTR3(sys_futex, "WAIT uaddr %p val 0x%x bitset 0x%x",
|
|
args->uaddr, args->val, args->val3);
|
|
|
|
if (args->timeout != NULL) {
|
|
error = copyin(args->timeout, <imeout, sizeof(ltimeout));
|
|
if (error) {
|
|
LIN_SDT_PROBE1(futex, linux_sys_futex, copyin_error,
|
|
error);
|
|
LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
|
|
return (error);
|
|
}
|
|
error = linux_to_native_timespec(&timeout, <imeout);
|
|
if (error)
|
|
return (error);
|
|
TIMESPEC_TO_TIMEVAL(&utv, &timeout);
|
|
error = itimerfix(&utv);
|
|
if (error) {
|
|
LIN_SDT_PROBE1(futex, linux_sys_futex, itimerfix_error,
|
|
error);
|
|
LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
|
|
return (error);
|
|
}
|
|
if (clockrt) {
|
|
microtime(&ctv);
|
|
timevalsub(&utv, &ctv);
|
|
} else if (args->op == LINUX_FUTEX_WAIT_BITSET) {
|
|
microuptime(&ctv);
|
|
timevalsub(&utv, &ctv);
|
|
}
|
|
if (utv.tv_sec < 0)
|
|
timevalclear(&utv);
|
|
timeout_hz = tvtohz(&utv);
|
|
} else
|
|
timeout_hz = 0;
|
|
|
|
error = futex_get(args->uaddr, &wp, &f,
|
|
flags | FUTEX_CREATE_WP);
|
|
if (error) {
|
|
LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
|
|
return (error);
|
|
}
|
|
|
|
error = copyin(args->uaddr, &val, sizeof(val));
|
|
if (error) {
|
|
LIN_SDT_PROBE1(futex, linux_sys_futex, copyin_error,
|
|
error);
|
|
LINUX_CTR1(sys_futex, "WAIT copyin failed %d",
|
|
error);
|
|
futex_put(f, wp);
|
|
|
|
LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
|
|
return (error);
|
|
}
|
|
if (val != args->val) {
|
|
LIN_SDT_PROBE4(futex, linux_sys_futex,
|
|
debug_wait_value_neq, args->uaddr, args->val, val,
|
|
args->val3);
|
|
LINUX_CTR3(sys_futex,
|
|
"WAIT uaddr %p val 0x%x != uval 0x%x",
|
|
args->uaddr, args->val, val);
|
|
futex_put(f, wp);
|
|
|
|
LIN_SDT_PROBE1(futex, linux_sys_futex, return,
|
|
EWOULDBLOCK);
|
|
return (EWOULDBLOCK);
|
|
}
|
|
|
|
error = futex_wait(f, wp, timeout_hz, args->val3);
|
|
break;
|
|
|
|
case LINUX_FUTEX_WAKE:
|
|
args->val3 = FUTEX_BITSET_MATCH_ANY;
|
|
/* FALLTHROUGH */
|
|
|
|
case LINUX_FUTEX_WAKE_BITSET:
|
|
LIN_SDT_PROBE3(futex, linux_sys_futex, debug_wake, args->uaddr,
|
|
args->val, args->val3);
|
|
LINUX_CTR3(sys_futex, "WAKE uaddr %p nrwake 0x%x bitset 0x%x",
|
|
args->uaddr, args->val, args->val3);
|
|
|
|
error = futex_get(args->uaddr, NULL, &f,
|
|
flags | FUTEX_DONTCREATE);
|
|
if (error) {
|
|
LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
|
|
return (error);
|
|
}
|
|
|
|
if (f == NULL) {
|
|
td->td_retval[0] = 0;
|
|
|
|
LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
|
|
return (error);
|
|
}
|
|
td->td_retval[0] = futex_wake(f, args->val, args->val3);
|
|
futex_put(f, NULL);
|
|
break;
|
|
|
|
case LINUX_FUTEX_CMP_REQUEUE:
|
|
LIN_SDT_PROBE5(futex, linux_sys_futex, debug_cmp_requeue,
|
|
args->uaddr, args->val, args->val3, args->uaddr2,
|
|
args->timeout);
|
|
LINUX_CTR5(sys_futex, "CMP_REQUEUE uaddr %p "
|
|
"nrwake 0x%x uval 0x%x uaddr2 %p nrequeue 0x%x",
|
|
args->uaddr, args->val, args->val3, args->uaddr2,
|
|
args->timeout);
|
|
|
|
/*
|
|
* Linux allows this, we would not, it is an incorrect
|
|
* usage of declared ABI, so return EINVAL.
|
|
*/
|
|
if (args->uaddr == args->uaddr2) {
|
|
LIN_SDT_PROBE0(futex, linux_sys_futex,
|
|
invalid_cmp_requeue_use);
|
|
LIN_SDT_PROBE1(futex, linux_sys_futex, return, EINVAL);
|
|
return (EINVAL);
|
|
}
|
|
|
|
error = futex_get(args->uaddr, NULL, &f, flags);
|
|
if (error) {
|
|
LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
|
|
return (error);
|
|
}
|
|
|
|
/*
|
|
* To avoid deadlocks return EINVAL if second futex
|
|
* exists at this time.
|
|
*
|
|
* Glibc fall back to FUTEX_WAKE in case of any error
|
|
* returned by FUTEX_CMP_REQUEUE.
|
|
*/
|
|
error = futex_get(args->uaddr2, NULL, &f2,
|
|
flags | FUTEX_DONTEXISTS);
|
|
if (error) {
|
|
futex_put(f, NULL);
|
|
|
|
LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
|
|
return (error);
|
|
}
|
|
error = copyin(args->uaddr, &val, sizeof(val));
|
|
if (error) {
|
|
LIN_SDT_PROBE1(futex, linux_sys_futex, copyin_error,
|
|
error);
|
|
LINUX_CTR1(sys_futex, "CMP_REQUEUE copyin failed %d",
|
|
error);
|
|
futex_put(f2, NULL);
|
|
futex_put(f, NULL);
|
|
|
|
LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
|
|
return (error);
|
|
}
|
|
if (val != args->val3) {
|
|
LIN_SDT_PROBE2(futex, linux_sys_futex,
|
|
debug_cmp_requeue_value_neq, args->val, val);
|
|
LINUX_CTR2(sys_futex, "CMP_REQUEUE val 0x%x != uval 0x%x",
|
|
args->val, val);
|
|
futex_put(f2, NULL);
|
|
futex_put(f, NULL);
|
|
|
|
LIN_SDT_PROBE1(futex, linux_sys_futex, return, EAGAIN);
|
|
return (EAGAIN);
|
|
}
|
|
|
|
nrwake = (int)(unsigned long)args->timeout;
|
|
td->td_retval[0] = futex_requeue(f, args->val, f2, nrwake);
|
|
futex_put(f2, NULL);
|
|
futex_put(f, NULL);
|
|
break;
|
|
|
|
case LINUX_FUTEX_WAKE_OP:
|
|
LIN_SDT_PROBE5(futex, linux_sys_futex, debug_wake_op,
|
|
args->uaddr, args->op, args->val, args->uaddr2, args->val3);
|
|
LINUX_CTR5(sys_futex, "WAKE_OP "
|
|
"uaddr %p nrwake 0x%x uaddr2 %p op 0x%x nrwake2 0x%x",
|
|
args->uaddr, args->val, args->uaddr2, args->val3,
|
|
args->timeout);
|
|
|
|
error = futex_get(args->uaddr, NULL, &f, flags);
|
|
if (error) {
|
|
LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
|
|
return (error);
|
|
}
|
|
|
|
if (args->uaddr != args->uaddr2)
|
|
error = futex_get(args->uaddr2, NULL, &f2, flags);
|
|
if (error) {
|
|
futex_put(f, NULL);
|
|
|
|
LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
|
|
return (error);
|
|
}
|
|
|
|
/*
|
|
* This function returns positive number as results and
|
|
* negative as errors
|
|
*/
|
|
op_ret = futex_atomic_op(td, args->val3, args->uaddr2);
|
|
|
|
LINUX_CTR2(sys_futex, "WAKE_OP atomic_op uaddr %p ret 0x%x",
|
|
args->uaddr, op_ret);
|
|
|
|
if (op_ret < 0) {
|
|
/* XXX: We don't handle the EFAULT yet. */
|
|
if (op_ret != -EFAULT) {
|
|
if (f2 != NULL)
|
|
futex_put(f2, NULL);
|
|
futex_put(f, NULL);
|
|
|
|
LIN_SDT_PROBE1(futex, linux_sys_futex, return,
|
|
-op_ret);
|
|
return (-op_ret);
|
|
} else {
|
|
LIN_SDT_PROBE0(futex, linux_sys_futex,
|
|
unhandled_efault);
|
|
}
|
|
if (f2 != NULL)
|
|
futex_put(f2, NULL);
|
|
futex_put(f, NULL);
|
|
|
|
LIN_SDT_PROBE1(futex, linux_sys_futex, return, EFAULT);
|
|
return (EFAULT);
|
|
}
|
|
|
|
ret = futex_wake(f, args->val, args->val3);
|
|
|
|
if (op_ret > 0) {
|
|
op_ret = 0;
|
|
nrwake = (int)(unsigned long)args->timeout;
|
|
|
|
if (f2 != NULL)
|
|
op_ret += futex_wake(f2, nrwake, args->val3);
|
|
else
|
|
op_ret += futex_wake(f, nrwake, args->val3);
|
|
ret += op_ret;
|
|
|
|
}
|
|
if (f2 != NULL)
|
|
futex_put(f2, NULL);
|
|
futex_put(f, NULL);
|
|
td->td_retval[0] = ret;
|
|
break;
|
|
|
|
case LINUX_FUTEX_LOCK_PI:
|
|
/* not yet implemented */
|
|
pem = pem_find(td->td_proc);
|
|
if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) {
|
|
linux_msg(td,
|
|
"linux_sys_futex: "
|
|
"unsupported futex_pi op\n");
|
|
pem->flags |= LINUX_XUNSUP_FUTEXPIOP;
|
|
LIN_SDT_PROBE0(futex, linux_sys_futex,
|
|
unimplemented_lock_pi);
|
|
}
|
|
LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS);
|
|
return (ENOSYS);
|
|
|
|
case LINUX_FUTEX_UNLOCK_PI:
|
|
/* not yet implemented */
|
|
pem = pem_find(td->td_proc);
|
|
if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) {
|
|
linux_msg(td,
|
|
"linux_sys_futex: "
|
|
"unsupported futex_pi op\n");
|
|
pem->flags |= LINUX_XUNSUP_FUTEXPIOP;
|
|
LIN_SDT_PROBE0(futex, linux_sys_futex,
|
|
unimplemented_unlock_pi);
|
|
}
|
|
LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS);
|
|
return (ENOSYS);
|
|
|
|
case LINUX_FUTEX_TRYLOCK_PI:
|
|
/* not yet implemented */
|
|
pem = pem_find(td->td_proc);
|
|
if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) {
|
|
linux_msg(td,
|
|
"linux_sys_futex: "
|
|
"unsupported futex_pi op\n");
|
|
pem->flags |= LINUX_XUNSUP_FUTEXPIOP;
|
|
LIN_SDT_PROBE0(futex, linux_sys_futex,
|
|
unimplemented_trylock_pi);
|
|
}
|
|
LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS);
|
|
return (ENOSYS);
|
|
|
|
case LINUX_FUTEX_REQUEUE:
|
|
|
|
/*
|
|
* Glibc does not use this operation since version 2.3.3,
|
|
* as it is racy and replaced by FUTEX_CMP_REQUEUE operation.
|
|
* Glibc versions prior to 2.3.3 fall back to FUTEX_WAKE when
|
|
* FUTEX_REQUEUE returned EINVAL.
|
|
*/
|
|
pem = pem_find(td->td_proc);
|
|
if ((pem->flags & LINUX_XDEPR_REQUEUEOP) == 0) {
|
|
linux_msg(td,
|
|
"linux_sys_futex: "
|
|
"unsupported futex_requeue op\n");
|
|
pem->flags |= LINUX_XDEPR_REQUEUEOP;
|
|
LIN_SDT_PROBE0(futex, linux_sys_futex,
|
|
deprecated_requeue);
|
|
}
|
|
|
|
LIN_SDT_PROBE1(futex, linux_sys_futex, return, EINVAL);
|
|
return (EINVAL);
|
|
|
|
case LINUX_FUTEX_WAIT_REQUEUE_PI:
|
|
/* not yet implemented */
|
|
pem = pem_find(td->td_proc);
|
|
if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) {
|
|
linux_msg(td,
|
|
"linux_sys_futex: "
|
|
"unsupported futex_pi op\n");
|
|
pem->flags |= LINUX_XUNSUP_FUTEXPIOP;
|
|
LIN_SDT_PROBE0(futex, linux_sys_futex,
|
|
unimplemented_wait_requeue_pi);
|
|
}
|
|
LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS);
|
|
return (ENOSYS);
|
|
|
|
case LINUX_FUTEX_CMP_REQUEUE_PI:
|
|
/* not yet implemented */
|
|
pem = pem_find(td->td_proc);
|
|
if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) {
|
|
linux_msg(td,
|
|
"linux_sys_futex: "
|
|
"unsupported futex_pi op\n");
|
|
pem->flags |= LINUX_XUNSUP_FUTEXPIOP;
|
|
LIN_SDT_PROBE0(futex, linux_sys_futex,
|
|
unimplemented_cmp_requeue_pi);
|
|
}
|
|
LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS);
|
|
return (ENOSYS);
|
|
|
|
default:
|
|
linux_msg(td,
|
|
"linux_sys_futex: unknown op %d\n", args->op);
|
|
LIN_SDT_PROBE1(futex, linux_sys_futex, unknown_operation,
|
|
args->op);
|
|
LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS);
|
|
return (ENOSYS);
|
|
}
|
|
|
|
LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
|
|
return (error);
|
|
}
|
|
|
|
int
|
|
linux_set_robust_list(struct thread *td, struct linux_set_robust_list_args *args)
|
|
{
|
|
struct linux_emuldata *em;
|
|
|
|
LIN_SDT_PROBE2(futex, linux_set_robust_list, entry, td, args);
|
|
|
|
if (args->len != sizeof(struct linux_robust_list_head)) {
|
|
LIN_SDT_PROBE0(futex, linux_set_robust_list, size_error);
|
|
LIN_SDT_PROBE1(futex, linux_set_robust_list, return, EINVAL);
|
|
return (EINVAL);
|
|
}
|
|
|
|
em = em_find(td);
|
|
em->robust_futexes = args->head;
|
|
|
|
LIN_SDT_PROBE1(futex, linux_set_robust_list, return, 0);
|
|
return (0);
|
|
}
|
|
|
|
int
|
|
linux_get_robust_list(struct thread *td, struct linux_get_robust_list_args *args)
|
|
{
|
|
struct linux_emuldata *em;
|
|
struct linux_robust_list_head *head;
|
|
l_size_t len = sizeof(struct linux_robust_list_head);
|
|
struct thread *td2;
|
|
int error = 0;
|
|
|
|
LIN_SDT_PROBE2(futex, linux_get_robust_list, entry, td, args);
|
|
|
|
if (!args->pid) {
|
|
em = em_find(td);
|
|
KASSERT(em != NULL, ("get_robust_list: emuldata notfound.\n"));
|
|
head = em->robust_futexes;
|
|
} else {
|
|
td2 = tdfind(args->pid, -1);
|
|
if (td2 == NULL) {
|
|
LIN_SDT_PROBE1(futex, linux_get_robust_list, return,
|
|
ESRCH);
|
|
return (ESRCH);
|
|
}
|
|
if (SV_PROC_ABI(td2->td_proc) != SV_ABI_LINUX) {
|
|
LIN_SDT_PROBE1(futex, linux_get_robust_list, return,
|
|
EPERM);
|
|
PROC_UNLOCK(td2->td_proc);
|
|
return (EPERM);
|
|
}
|
|
|
|
em = em_find(td2);
|
|
KASSERT(em != NULL, ("get_robust_list: emuldata notfound.\n"));
|
|
/* XXX: ptrace? */
|
|
if (priv_check(td, PRIV_CRED_SETUID) ||
|
|
priv_check(td, PRIV_CRED_SETEUID) ||
|
|
p_candebug(td, td2->td_proc)) {
|
|
PROC_UNLOCK(td2->td_proc);
|
|
|
|
LIN_SDT_PROBE1(futex, linux_get_robust_list, return,
|
|
EPERM);
|
|
return (EPERM);
|
|
}
|
|
head = em->robust_futexes;
|
|
|
|
PROC_UNLOCK(td2->td_proc);
|
|
}
|
|
|
|
error = copyout(&len, args->len, sizeof(l_size_t));
|
|
if (error) {
|
|
LIN_SDT_PROBE1(futex, linux_get_robust_list, copyout_error,
|
|
error);
|
|
LIN_SDT_PROBE1(futex, linux_get_robust_list, return, EFAULT);
|
|
return (EFAULT);
|
|
}
|
|
|
|
error = copyout(&head, args->head, sizeof(head));
|
|
if (error) {
|
|
LIN_SDT_PROBE1(futex, linux_get_robust_list, copyout_error,
|
|
error);
|
|
}
|
|
|
|
LIN_SDT_PROBE1(futex, linux_get_robust_list, return, error);
|
|
return (error);
|
|
}
|
|
|
|
static int
|
|
handle_futex_death(struct linux_emuldata *em, uint32_t *uaddr,
|
|
unsigned int pi)
|
|
{
|
|
uint32_t uval, nval, mval;
|
|
struct futex *f;
|
|
int error;
|
|
|
|
LIN_SDT_PROBE3(futex, handle_futex_death, entry, em, uaddr, pi);
|
|
|
|
retry:
|
|
error = copyin(uaddr, &uval, 4);
|
|
if (error) {
|
|
LIN_SDT_PROBE1(futex, handle_futex_death, copyin_error, error);
|
|
LIN_SDT_PROBE1(futex, handle_futex_death, return, EFAULT);
|
|
return (EFAULT);
|
|
}
|
|
if ((uval & FUTEX_TID_MASK) == em->em_tid) {
|
|
mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
|
|
nval = casuword32(uaddr, uval, mval);
|
|
|
|
if (nval == -1) {
|
|
LIN_SDT_PROBE1(futex, handle_futex_death, return,
|
|
EFAULT);
|
|
return (EFAULT);
|
|
}
|
|
|
|
if (nval != uval)
|
|
goto retry;
|
|
|
|
if (!pi && (uval & FUTEX_WAITERS)) {
|
|
error = futex_get(uaddr, NULL, &f,
|
|
FUTEX_DONTCREATE | FUTEX_SHARED);
|
|
if (error) {
|
|
LIN_SDT_PROBE1(futex, handle_futex_death,
|
|
return, error);
|
|
return (error);
|
|
}
|
|
if (f != NULL) {
|
|
futex_wake(f, 1, FUTEX_BITSET_MATCH_ANY);
|
|
futex_put(f, NULL);
|
|
}
|
|
}
|
|
}
|
|
|
|
LIN_SDT_PROBE1(futex, handle_futex_death, return, 0);
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
fetch_robust_entry(struct linux_robust_list **entry,
|
|
struct linux_robust_list **head, unsigned int *pi)
|
|
{
|
|
l_ulong uentry;
|
|
int error;
|
|
|
|
LIN_SDT_PROBE3(futex, fetch_robust_entry, entry, entry, head, pi);
|
|
|
|
error = copyin((const void *)head, &uentry, sizeof(l_ulong));
|
|
if (error) {
|
|
LIN_SDT_PROBE1(futex, fetch_robust_entry, copyin_error, error);
|
|
LIN_SDT_PROBE1(futex, fetch_robust_entry, return, EFAULT);
|
|
return (EFAULT);
|
|
}
|
|
|
|
*entry = (void *)(uentry & ~1UL);
|
|
*pi = uentry & 1;
|
|
|
|
LIN_SDT_PROBE1(futex, fetch_robust_entry, return, 0);
|
|
return (0);
|
|
}
|
|
|
|
/* This walks the list of robust futexes releasing them. */
|
|
void
|
|
release_futexes(struct thread *td, struct linux_emuldata *em)
|
|
{
|
|
struct linux_robust_list_head *head = NULL;
|
|
struct linux_robust_list *entry, *next_entry, *pending;
|
|
unsigned int limit = 2048, pi, next_pi, pip;
|
|
l_long futex_offset;
|
|
int rc, error;
|
|
|
|
LIN_SDT_PROBE2(futex, release_futexes, entry, td, em);
|
|
|
|
head = em->robust_futexes;
|
|
|
|
if (head == NULL) {
|
|
LIN_SDT_PROBE0(futex, release_futexes, return);
|
|
return;
|
|
}
|
|
|
|
if (fetch_robust_entry(&entry, PTRIN(&head->list.next), &pi)) {
|
|
LIN_SDT_PROBE0(futex, release_futexes, return);
|
|
return;
|
|
}
|
|
|
|
error = copyin(&head->futex_offset, &futex_offset,
|
|
sizeof(futex_offset));
|
|
if (error) {
|
|
LIN_SDT_PROBE1(futex, release_futexes, copyin_error, error);
|
|
LIN_SDT_PROBE0(futex, release_futexes, return);
|
|
return;
|
|
}
|
|
|
|
if (fetch_robust_entry(&pending, PTRIN(&head->pending_list), &pip)) {
|
|
LIN_SDT_PROBE0(futex, release_futexes, return);
|
|
return;
|
|
}
|
|
|
|
while (entry != &head->list) {
|
|
rc = fetch_robust_entry(&next_entry, PTRIN(&entry->next), &next_pi);
|
|
|
|
if (entry != pending)
|
|
if (handle_futex_death(em,
|
|
(uint32_t *)((caddr_t)entry + futex_offset), pi)) {
|
|
LIN_SDT_PROBE0(futex, release_futexes, return);
|
|
return;
|
|
}
|
|
if (rc) {
|
|
LIN_SDT_PROBE0(futex, release_futexes, return);
|
|
return;
|
|
}
|
|
|
|
entry = next_entry;
|
|
pi = next_pi;
|
|
|
|
if (!--limit)
|
|
break;
|
|
|
|
sched_relinquish(curthread);
|
|
}
|
|
|
|
if (pending)
|
|
handle_futex_death(em, (uint32_t *)((caddr_t)pending + futex_offset), pip);
|
|
|
|
LIN_SDT_PROBE0(futex, release_futexes, return);
|
|
}
|