Replace dummy implementation of RCU in the LinuxKPI with one based on

the in-kernel concurrency kit's ck_epoch API. Factor RCU hlist_xxx()
functions into own rculist.h header file.

Obtained from:		kmacy @
MFC after:		1 week
Sponsored by:		Mellanox Technologies
This commit is contained in:
hselasky 2017-02-21 18:04:21 +00:00
parent c829c2411a
commit f407fff4de
8 changed files with 425 additions and 97 deletions

View File

@ -0,0 +1,85 @@
/*-
* Copyright (c) 2015 François Tigeot
* Copyright (c) 2016-2017 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _LINUX_RCULIST_H_
#define _LINUX_RCULIST_H_
#include <linux/list.h>
#include <linux/rcupdate.h>
#define hlist_first_rcu(head) (*((struct hlist_node **)(&(head)->first)))
#define hlist_next_rcu(node) (*((struct hlist_node **)(&(node)->next)))
#define hlist_pprev_rcu(node) (*((struct hlist_node **)((node)->pprev)))
static inline void
hlist_add_behind_rcu(struct hlist_node *n, struct hlist_node *prev)
{
n->next = prev->next;
n->pprev = &prev->next;
rcu_assign_pointer(hlist_next_rcu(prev), n);
if (n->next)
n->next->pprev = &n->next;
}
#define hlist_for_each_entry_rcu(pos, head, member) \
hlist_for_each_entry(pos, head, member)
static inline void
hlist_del_rcu(struct hlist_node *n)
{
struct hlist_node *next = n->next;
struct hlist_node **pprev = n->pprev;
WRITE_ONCE(*pprev, next);
if (next)
next->pprev = pprev;
}
static inline void
hlist_add_head_rcu(struct hlist_node *n, struct hlist_head *h)
{
struct hlist_node *first = h->first;
n->next = first;
n->pprev = &h->first;
rcu_assign_pointer(hlist_first_rcu(h), n);
if (first)
first->pprev = &n->next;
}
static inline void
hlist_del_init_rcu(struct hlist_node *n)
{
if (!hlist_unhashed(n)) {
hlist_del_rcu(n);
n->pprev = NULL;
}
}
#endif /* _LINUX_RCULIST_H_ */

View File

@ -1,5 +1,5 @@
/*-
* Copyright (c) 2016 Mellanox Technologies, Ltd.
* Copyright (c) 2016-2017 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -28,70 +28,73 @@
#ifndef _LINUX_RCUPDATE_H_
#define _LINUX_RCUPDATE_H_
#include <sys/param.h>
#include <sys/lock.h>
#include <sys/sx.h>
#include <linux/compiler.h>
#include <linux/types.h>
extern struct sx linux_global_rcu_lock;
#include <machine/atomic.h>
struct rcu_head {
};
#define LINUX_KFREE_RCU_OFFSET_MAX 4096 /* exclusive */
typedef void (*rcu_callback_t)(struct rcu_head *);
#define RCU_INITIALIZER(v) \
((typeof(*(v)) __force __rcu *)(v))
static inline void
call_rcu(struct rcu_head *ptr, rcu_callback_t func)
{
sx_xlock(&linux_global_rcu_lock);
func(ptr);
sx_xunlock(&linux_global_rcu_lock);
}
static inline void
rcu_read_lock(void)
{
sx_slock(&linux_global_rcu_lock);
}
static inline void
rcu_read_unlock(void)
{
sx_sunlock(&linux_global_rcu_lock);
}
static inline void
rcu_barrier(void)
{
sx_xlock(&linux_global_rcu_lock);
sx_xunlock(&linux_global_rcu_lock);
}
static inline void
synchronize_rcu(void)
{
sx_xlock(&linux_global_rcu_lock);
sx_xunlock(&linux_global_rcu_lock);
}
#define hlist_add_head_rcu(n, h) \
do { \
sx_xlock(&linux_global_rcu_lock); \
hlist_add_head(n, h); \
sx_xunlock(&linux_global_rcu_lock); \
#define RCU_INIT_POINTER(p, v) do { \
(p) = (v); \
} while (0)
#define hlist_del_init_rcu(n) \
do { \
sx_xlock(&linux_global_rcu_lock); \
hlist_del_init(n); \
sx_xunlock(&linux_global_rcu_lock); \
#define call_rcu(ptr, func) do { \
linux_call_rcu(ptr, func); \
} while (0)
#define hlist_del_rcu(n) \
do { \
sx_xlock(&linux_global_rcu_lock); \
hlist_del(n); \
sx_xunlock(&linux_global_rcu_lock); \
#define rcu_barrier(void) do { \
linux_rcu_barrier(); \
} while (0)
#define rcu_read_lock(void) do { \
linux_rcu_read_lock(); \
} while (0)
#define rcu_read_unlock(void) do { \
linux_rcu_read_unlock(); \
} while (0)
#define synchronize_rcu(void) do { \
linux_synchronize_rcu(); \
} while (0)
#define synchronize_rcu_expedited(void) do { \
linux_synchronize_rcu(); \
} while (0)
#define kfree_rcu(ptr, rcu_head) do { \
CTASSERT(offsetof(__typeof(*(ptr)), rcu_head) < \
LINUX_KFREE_RCU_OFFSET_MAX); \
call_rcu(&(ptr)->rcu_head, (rcu_callback_t)(uintptr_t) \
offsetof(__typeof(*(ptr)), rcu_head)); \
} while (0)
#define rcu_access_pointer(p) \
((typeof(*p) __force __kernel *)(READ_ONCE(p)))
#define rcu_dereference_protected(p, c) \
((typeof(*p) __force __kernel *)(p))
#define rcu_dereference(p) \
rcu_dereference_protected(p, 0)
#define rcu_pointer_handoff(p) (p)
#define rcu_assign_pointer(p, v) do { \
atomic_store_rel_ptr((volatile uintptr_t *)&(p), \
(uintptr_t)(v)); \
} while (0)
/* prototypes */
extern void linux_call_rcu(struct rcu_head *ptr, rcu_callback_t func);
extern void linux_rcu_barrier(void);
extern void linux_rcu_read_lock(void);
extern void linux_rcu_read_unlock(void);
extern void linux_synchronize_rcu(void);
#endif /* _LINUX_RCUPDATE_H_ */

View File

@ -1,5 +1,5 @@
/*-
* Copyright (c) 2015 Mellanox Technologies, Ltd.
* Copyright (c) 2015-2017 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -25,48 +25,22 @@
*
* $FreeBSD$
*/
#ifndef _LINUX_SRCU_H_
#define _LINUX_SRCU_H_
#include <sys/param.h>
#include <sys/lock.h>
#include <sys/sx.h>
struct ck_epoch_record;
struct srcu_struct {
struct sx sx;
struct ck_epoch_record *ss_epoch_record;
};
static inline int
init_srcu_struct(struct srcu_struct *srcu)
{
sx_init(&srcu->sx, "SleepableRCU");
return (0);
}
/* prototypes */
static inline void
cleanup_srcu_struct(struct srcu_struct *srcu)
{
sx_destroy(&srcu->sx);
}
static inline int
srcu_read_lock(struct srcu_struct *srcu)
{
sx_slock(&srcu->sx);
return (0);
}
static inline void
srcu_read_unlock(struct srcu_struct *srcu, int key)
{
sx_sunlock(&srcu->sx);
}
static inline void
synchronize_srcu(struct srcu_struct *srcu)
{
sx_xlock(&srcu->sx);
sx_xunlock(&srcu->sx);
}
extern int srcu_read_lock(struct srcu_struct *);
extern void srcu_read_unlock(struct srcu_struct *, int index);
extern void synchronize_srcu(struct srcu_struct *);
extern int init_srcu_struct(struct srcu_struct *);
extern void cleanup_srcu_struct(struct srcu_struct *);
extern void srcu_barrier(struct srcu_struct *);
#endif /* _LINUX_SRCU_H_ */

View File

@ -63,6 +63,12 @@ typedef u64 phys_addr_t;
#define DECLARE_BITMAP(n, bits) \
unsigned long n[howmany(bits, sizeof(long) * 8)]
struct rcu_head {
void *raw[8];
} __aligned(sizeof(void *));
typedef void (*rcu_callback_t)(struct rcu_head *head);
typedef void (*call_rcu_func_t)(struct rcu_head *head, rcu_callback_t func);
typedef int linux_task_fn_t(void *data);
#endif /* _LINUX_TYPES_H_ */

View File

@ -96,7 +96,6 @@ struct list_head pci_drivers;
struct list_head pci_devices;
struct net init_net;
spinlock_t pci_lock;
struct sx linux_global_rcu_lock;
unsigned long linux_timer_hz_mask;
@ -1474,7 +1473,6 @@ linux_compat_init(void *arg)
#if defined(__i386__) || defined(__amd64__)
linux_cpu_has_clflush = (cpu_feature & CPUID_CLFSH);
#endif
sx_init(&linux_global_rcu_lock, "LinuxGlobalRCU");
rootoid = SYSCTL_ADD_ROOT_NODE(NULL,
OID_AUTO, "sys", CTLFLAG_RD|CTLFLAG_MPSAFE, NULL, "sys");
@ -1507,7 +1505,6 @@ linux_compat_uninit(void *arg)
linux_kobject_kfree_name(&linux_class_misc.kobj);
synchronize_rcu();
sx_destroy(&linux_global_rcu_lock);
}
SYSUNINIT(linux_compat, SI_SUB_DRIVERS, SI_ORDER_SECOND, linux_compat_uninit, NULL);

View File

@ -0,0 +1,259 @@
/*-
* Copyright (c) 2016 Matt Macy (mmacy@nextbsd.org)
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/sched.h>
#include <sys/smp.h>
#include <sys/queue.h>
#include <sys/taskqueue.h>
#include <ck_epoch.h>
#include <linux/rcupdate.h>
#include <linux/srcu.h>
#include <linux/slab.h>
#include <linux/kernel.h>
struct callback_head {
ck_epoch_entry_t epoch_entry;
rcu_callback_t func;
ck_epoch_record_t *epoch_record;
struct task task;
};
/*
* Verify that "struct rcu_head" is big enough to hold "struct
* callback_head". This has been done to avoid having to add special
* compile flags for including ck_epoch.h to all clients of the
* LinuxKPI.
*/
CTASSERT(sizeof(struct rcu_head) >= sizeof(struct callback_head));
static ck_epoch_t linux_epoch;
static MALLOC_DEFINE(M_LRCU, "lrcu", "Linux RCU");
static DPCPU_DEFINE(ck_epoch_record_t *, epoch_record);
static void
linux_rcu_runtime_init(void *arg __unused)
{
ck_epoch_record_t **pcpu_record;
ck_epoch_record_t *record;
int i;
ck_epoch_init(&linux_epoch);
CPU_FOREACH(i) {
record = malloc(sizeof(*record), M_LRCU, M_WAITOK | M_ZERO);
ck_epoch_register(&linux_epoch, record);
pcpu_record = DPCPU_ID_PTR(i, epoch_record);
*pcpu_record = record;
}
/*
* Populate the epoch with 5 * ncpus # of records
*/
for (i = 0; i < 5 * mp_ncpus; i++) {
record = malloc(sizeof(*record), M_LRCU, M_WAITOK | M_ZERO);
ck_epoch_register(&linux_epoch, record);
ck_epoch_unregister(record);
}
}
SYSINIT(linux_rcu_runtime, SI_SUB_LOCK, SI_ORDER_SECOND, linux_rcu_runtime_init, NULL);
static void
linux_rcu_runtime_uninit(void *arg __unused)
{
ck_epoch_record_t **pcpu_record;
ck_epoch_record_t *record;
int i;
while ((record = ck_epoch_recycle(&linux_epoch)) != NULL)
free(record, M_LRCU);
CPU_FOREACH(i) {
pcpu_record = DPCPU_ID_PTR(i, epoch_record);
record = *pcpu_record;
*pcpu_record = NULL;
free(record, M_LRCU);
}
}
SYSUNINIT(linux_rcu_runtime, SI_SUB_LOCK, SI_ORDER_SECOND, linux_rcu_runtime_uninit, NULL);
static ck_epoch_record_t *
linux_rcu_get_record(int canblock)
{
ck_epoch_record_t *record;
if (__predict_true((record = ck_epoch_recycle(&linux_epoch)) != NULL))
return (record);
if ((record = malloc(sizeof(*record), M_LRCU, M_NOWAIT | M_ZERO)) != NULL) {
ck_epoch_register(&linux_epoch, record);
return (record);
} else if (!canblock)
return (NULL);
record = malloc(sizeof(*record), M_LRCU, M_WAITOK | M_ZERO);
ck_epoch_register(&linux_epoch, record);
return (record);
}
static void
linux_rcu_destroy_object(ck_epoch_entry_t *e)
{
struct callback_head *rcu;
uintptr_t offset;
rcu = container_of(e, struct callback_head, epoch_entry);
offset = (uintptr_t)rcu->func;
MPASS(rcu->task.ta_pending == 0);
if (offset < LINUX_KFREE_RCU_OFFSET_MAX)
kfree((char *)rcu - offset);
else
rcu->func((struct rcu_head *)rcu);
}
static void
linux_rcu_cleaner_func(void *context, int pending __unused)
{
struct callback_head *rcu = context;
ck_epoch_record_t *record = rcu->epoch_record;
ck_epoch_barrier(record);
ck_epoch_unregister(record);
}
void
linux_rcu_read_lock(void)
{
ck_epoch_record_t *record;
sched_pin();
record = DPCPU_GET(epoch_record);
MPASS(record != NULL);
ck_epoch_begin(record, NULL);
}
void
linux_rcu_read_unlock(void)
{
ck_epoch_record_t *record;
record = DPCPU_GET(epoch_record);
ck_epoch_end(record, NULL);
sched_unpin();
}
void
linux_synchronize_rcu(void)
{
ck_epoch_record_t *record;
sched_pin();
record = DPCPU_GET(epoch_record);
MPASS(record != NULL);
ck_epoch_synchronize(record);
sched_unpin();
}
void
linux_rcu_barrier(void)
{
ck_epoch_record_t *record;
record = linux_rcu_get_record(0);
ck_epoch_barrier(record);
ck_epoch_unregister(record);
}
void
linux_call_rcu(struct rcu_head *context, rcu_callback_t func)
{
struct callback_head *ptr = (struct callback_head *)context;
ck_epoch_record_t *record;
record = linux_rcu_get_record(0);
sched_pin();
MPASS(record != NULL);
ptr->func = func;
ptr->epoch_record = record;
ck_epoch_call(record, &ptr->epoch_entry, linux_rcu_destroy_object);
TASK_INIT(&ptr->task, 0, linux_rcu_cleaner_func, ptr);
taskqueue_enqueue(taskqueue_fast, &ptr->task);
sched_unpin();
}
int
init_srcu_struct(struct srcu_struct *srcu)
{
ck_epoch_record_t *record;
record = linux_rcu_get_record(0);
srcu->ss_epoch_record = record;
return (0);
}
void
cleanup_srcu_struct(struct srcu_struct *srcu)
{
ck_epoch_record_t *record;
record = srcu->ss_epoch_record;
srcu->ss_epoch_record = NULL;
ck_epoch_unregister(record);
}
int
srcu_read_lock(struct srcu_struct *srcu)
{
ck_epoch_begin(srcu->ss_epoch_record, NULL);
return (0);
}
void
srcu_read_unlock(struct srcu_struct *srcu, int key __unused)
{
ck_epoch_end(srcu->ss_epoch_record, NULL);
}
void
synchronize_srcu(struct srcu_struct *srcu)
{
ck_epoch_synchronize(srcu->ss_epoch_record);
}

View File

@ -4278,6 +4278,8 @@ compat/linuxkpi/common/src/linux_idr.c optional compat_linuxkpi \
compile-with "${LINUXKPI_C}"
compat/linuxkpi/common/src/linux_radix.c optional compat_linuxkpi \
compile-with "${LINUXKPI_C}"
compat/linuxkpi/common/src/linux_rcu.c optional compat_linuxkpi \
compile-with "${LINUXKPI_C} -I$S/contrib/ck/include"
compat/linuxkpi/common/src/linux_usb.c optional compat_linuxkpi usb \
compile-with "${LINUXKPI_C}"

View File

@ -8,6 +8,7 @@ SRCS= linux_kmod.c \
linux_kthread.c \
linux_pci.c \
linux_radix.c \
linux_rcu.c \
linux_tasklet.c \
linux_idr.c \
linux_usb.c
@ -20,5 +21,6 @@ SRCS+= bus_if.h \
opt_usb.h
CFLAGS+= -I${.CURDIR}/../../compat/linuxkpi/common/include
CFLAGS+= -I${.CURDIR}/../../contrib/ck/include
.include <bsd.kmod.mk>