Add read-mostly sleepable locks
To be used when like rmlocks, except when sleeping for readers needs to be allowed. See the manpage for more information. Reviewed by: kib (previous version) Differential Revision: https://reviews.freebsd.org/D22823
This commit is contained in:
parent
e99c4e4d64
commit
1f162fef76
@ -26,7 +26,7 @@
|
||||
.\" $FreeBSD$
|
||||
.\"
|
||||
.\" Based on rwlock.9 man page
|
||||
.Dd November 11, 2017
|
||||
.Dd December 27, 2019
|
||||
.Dt RMLOCK 9
|
||||
.Os
|
||||
.Sh NAME
|
||||
@ -43,7 +43,13 @@
|
||||
.Nm rm_sleep ,
|
||||
.Nm rm_assert ,
|
||||
.Nm RM_SYSINIT ,
|
||||
.Nm RM_SYSINIT_FLAGS
|
||||
.Nm RM_SYSINIT_FLAGS ,
|
||||
.Nm rms_init ,
|
||||
.Nm rms_destroy ,
|
||||
.Nm rms_rlock ,
|
||||
.Nm rms_wlock ,
|
||||
.Nm rms_runlock ,
|
||||
.Nm rms_wunlock
|
||||
.Nd kernel reader/writer lock optimized for read-mostly access patterns
|
||||
.Sh SYNOPSIS
|
||||
.In sys/param.h
|
||||
@ -77,6 +83,18 @@
|
||||
.In sys/kernel.h
|
||||
.Fn RM_SYSINIT "name" "struct rmlock *rm" "const char *desc"
|
||||
.Fn RM_SYSINIT_FLAGS "name" "struct rmlock *rm" "const char *desc" "int flags"
|
||||
.Ft void
|
||||
.Fn rms_init "struct rmslock *rms" "const char *name"
|
||||
.Ft void
|
||||
.Fn rms_destroy "struct rmslock *rms"
|
||||
.Ft void
|
||||
.Fn rms_rlock "struct rmslock *rms"
|
||||
.Ft void
|
||||
.Fn rms_wlock "struct rmslock *rms"
|
||||
.Ft void
|
||||
.Fn rms_runlock "struct rmslock *rms"
|
||||
.Ft void
|
||||
.Fn rms_wunlock "struct rmslock *rms"
|
||||
.Sh DESCRIPTION
|
||||
Read-mostly locks allow shared access to protected data by multiple threads,
|
||||
or exclusive access by a single thread.
|
||||
@ -113,22 +131,22 @@ Readers can recurse if the lock is initialized with the
|
||||
option;
|
||||
however, writers are never allowed to recurse.
|
||||
.Pp
|
||||
Sleepable read-mostly locks are created by passing
|
||||
Sleeping for writers can be allowed by passing
|
||||
.Dv RM_SLEEPABLE
|
||||
to
|
||||
.Fn rm_init_flags .
|
||||
Unlike normal read-mostly locks,
|
||||
sleepable read-mostly locks follow the same lock ordering rules as
|
||||
It changes lock ordering rules to the same as for
|
||||
.Xr sx 9
|
||||
locks.
|
||||
Sleepable read-mostly locks do not propagate priority to writers,
|
||||
but they do propagate priority to readers.
|
||||
Writers are permitted to sleep while holding a read-mostly lock,
|
||||
but readers are not.
|
||||
Unlike other sleepable locks such as
|
||||
They do not propagate priority to writers, but they do propagate priority to
|
||||
readers. Note that readers are not permitted to sleep regardless of the flag.
|
||||
.Pp
|
||||
Sleepable read-mostly locks (created with
|
||||
.Fn rms_init )
|
||||
allow sleeping for both readers and writers, but don't do priority propagation
|
||||
for either. They follow
|
||||
.Xr sx 9
|
||||
locks,
|
||||
readers must use try operations on other sleepable locks to avoid sleeping.
|
||||
lock ordering.
|
||||
.Ss Macros and Functions
|
||||
.Bl -tag -width indent
|
||||
.It Fn rm_init "struct rmlock *rm" "const char *name"
|
||||
@ -286,6 +304,43 @@ Assert that the current thread does not hold a recursive lock of
|
||||
.Fa rm .
|
||||
.El
|
||||
.El
|
||||
.Bl -tag -width indent
|
||||
.It Fn rms_init "struct rmslock *rms" "const char *name"
|
||||
Initialize the sleepable read-mostly lock
|
||||
.Fa rms .
|
||||
The
|
||||
.Fa name
|
||||
description is used as
|
||||
.Fa wmesg
|
||||
parameter to the
|
||||
.Xr msleep 9
|
||||
routine.
|
||||
This function must be called before any other operations on the lock.
|
||||
.It Fn rms_rlock "struct rmlock *rm"
|
||||
Lock
|
||||
.Fa rms
|
||||
as a reader.
|
||||
If any thread holds this lock exclusively, the current thread blocks.
|
||||
.It Fn rms_wlock "struct rmslock *rms"
|
||||
Lock
|
||||
.Fa rms
|
||||
as a writer.
|
||||
If the lock is already taken, the current thread blocks.
|
||||
The
|
||||
.Fn rms_wlock
|
||||
function cannot be called recursively.
|
||||
.It Fn rms_runlock "struct rmslock *rms"
|
||||
This function releases a shared lock previously acquired by
|
||||
.Fn rms_rlock .
|
||||
.It Fn rms_wunlock "struct rmslock *rms"
|
||||
This function releases an exclusive lock previously acquired by
|
||||
.Fn rms_wlock .
|
||||
.It Fn rms_destroy "struct rmslock *rms"
|
||||
This functions destroys a lock previously initialized with
|
||||
.Fn rms_init .
|
||||
The
|
||||
.Fa rms
|
||||
lock must be unlocked.
|
||||
.Sh SEE ALSO
|
||||
.Xr locking 9 ,
|
||||
.Xr mutex 9 ,
|
||||
|
@ -53,6 +53,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include <sys/turnstile.h>
|
||||
#include <sys/lock_profile.h>
|
||||
#include <machine/cpu.h>
|
||||
#include <vm/uma.h>
|
||||
|
||||
#ifdef DDB
|
||||
#include <ddb/ddb.h>
|
||||
@ -853,3 +854,241 @@ db_show_rm(const struct lock_object *lock)
|
||||
lc->lc_ddb_show(&rm->rm_wlock_object);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Read-mostly sleepable locks.
|
||||
*
|
||||
* These primitives allow both readers and writers to sleep. However, neither
|
||||
* readers nor writers are tracked and subsequently there is no priority
|
||||
* propagation.
|
||||
*
|
||||
* They are intended to be only used when write-locking is almost never needed
|
||||
* (e.g., they can guard against unloading a kernel module) while read-locking
|
||||
* happens all the time.
|
||||
*
|
||||
* Concurrent writers take turns taking the lock while going off cpu. If this is
|
||||
* of concern for your usecase, this is not the right primitive.
|
||||
*
|
||||
* Neither rms_rlock nor rms_runlock use fences. Instead compiler barriers are
|
||||
* inserted to prevert reordering of generated code. Execution ordering is
|
||||
* provided with the use of an IPI handler.
|
||||
*/
|
||||
|
||||
void
|
||||
rms_init(struct rmslock *rms, const char *name)
|
||||
{
|
||||
|
||||
rms->writers = 0;
|
||||
rms->readers = 0;
|
||||
mtx_init(&rms->mtx, name, NULL, MTX_DEF | MTX_NEW);
|
||||
rms->readers_pcpu = uma_zalloc_pcpu(pcpu_zone_int, M_WAITOK | M_ZERO);
|
||||
rms->readers_influx = uma_zalloc_pcpu(pcpu_zone_int, M_WAITOK | M_ZERO);
|
||||
}
|
||||
|
||||
void
|
||||
rms_destroy(struct rmslock *rms)
|
||||
{
|
||||
|
||||
MPASS(rms->writers == 0);
|
||||
MPASS(rms->readers == 0);
|
||||
mtx_destroy(&rms->mtx);
|
||||
uma_zfree_pcpu(pcpu_zone_int, rms->readers_pcpu);
|
||||
uma_zfree_pcpu(pcpu_zone_int, rms->readers_influx);
|
||||
}
|
||||
|
||||
static void __noinline
|
||||
rms_rlock_fallback(struct rmslock *rms)
|
||||
{
|
||||
|
||||
(*zpcpu_get(rms->readers_influx)) = 0;
|
||||
critical_exit();
|
||||
|
||||
mtx_lock(&rms->mtx);
|
||||
MPASS(*zpcpu_get(rms->readers_pcpu) == 0);
|
||||
while (rms->writers > 0)
|
||||
msleep(&rms->readers, &rms->mtx, PUSER - 1, mtx_name(&rms->mtx), 0);
|
||||
(*zpcpu_get(rms->readers_pcpu))++;
|
||||
mtx_unlock(&rms->mtx);
|
||||
}
|
||||
|
||||
void
|
||||
rms_rlock(struct rmslock *rms)
|
||||
{
|
||||
int *influx;
|
||||
|
||||
WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
|
||||
|
||||
critical_enter();
|
||||
influx = zpcpu_get(rms->readers_influx);
|
||||
__compiler_membar();
|
||||
*influx = 1;
|
||||
__compiler_membar();
|
||||
if (__predict_false(rms->writers > 0)) {
|
||||
rms_rlock_fallback(rms);
|
||||
return;
|
||||
}
|
||||
__compiler_membar();
|
||||
(*zpcpu_get(rms->readers_pcpu))++;
|
||||
__compiler_membar();
|
||||
*influx = 0;
|
||||
critical_exit();
|
||||
}
|
||||
|
||||
static void __noinline
|
||||
rms_runlock_fallback(struct rmslock *rms)
|
||||
{
|
||||
|
||||
(*zpcpu_get(rms->readers_influx)) = 0;
|
||||
critical_exit();
|
||||
|
||||
mtx_lock(&rms->mtx);
|
||||
MPASS(*zpcpu_get(rms->readers_pcpu) == 0);
|
||||
MPASS(rms->writers > 0);
|
||||
MPASS(rms->readers > 0);
|
||||
rms->readers--;
|
||||
if (rms->readers == 0)
|
||||
wakeup_one(&rms->writers);
|
||||
mtx_unlock(&rms->mtx);
|
||||
}
|
||||
|
||||
void
|
||||
rms_runlock(struct rmslock *rms)
|
||||
{
|
||||
int *influx;
|
||||
|
||||
WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
|
||||
|
||||
critical_enter();
|
||||
influx = zpcpu_get(rms->readers_influx);
|
||||
__compiler_membar();
|
||||
*influx = 1;
|
||||
__compiler_membar();
|
||||
if (__predict_false(rms->writers > 0)) {
|
||||
rms_runlock_fallback(rms);
|
||||
return;
|
||||
}
|
||||
__compiler_membar();
|
||||
(*zpcpu_get(rms->readers_pcpu))--;
|
||||
__compiler_membar();
|
||||
*influx = 0;
|
||||
critical_exit();
|
||||
}
|
||||
|
||||
struct rmslock_ipi {
|
||||
struct rmslock *rms;
|
||||
cpuset_t signal;
|
||||
};
|
||||
|
||||
static void
|
||||
rms_wlock_IPI(void *arg)
|
||||
{
|
||||
struct rmslock_ipi *rmsipi;
|
||||
struct rmslock *rms;
|
||||
int readers;
|
||||
|
||||
rmsipi = arg;
|
||||
rms = rmsipi->rms;
|
||||
|
||||
if (*zpcpu_get(rms->readers_influx))
|
||||
return;
|
||||
readers = zpcpu_replace(rms->readers_pcpu, 0);
|
||||
if (readers != 0)
|
||||
atomic_add_int(&rms->readers, readers);
|
||||
CPU_CLR_ATOMIC(curcpu, &rmsipi->signal);
|
||||
}
|
||||
|
||||
static void
|
||||
rms_wlock_switch(struct rmslock *rms)
|
||||
{
|
||||
struct rmslock_ipi rmsipi;
|
||||
int *in_op;
|
||||
int cpu;
|
||||
|
||||
MPASS(rms->readers == 0);
|
||||
MPASS(rms->writers == 1);
|
||||
|
||||
rmsipi.rms = rms;
|
||||
|
||||
/*
|
||||
* Publishes rms->writers. rlock and runlock will get this ordered
|
||||
* via IPI in the worst case.
|
||||
*/
|
||||
atomic_thread_fence_rel();
|
||||
|
||||
/*
|
||||
* Collect reader counts from all CPUs using an IPI. The handler can
|
||||
* find itself running while the interrupted CPU was doing either
|
||||
* rlock or runlock in which case it will fail.
|
||||
*
|
||||
* Successful attempts clear the cpu id in the bitmap.
|
||||
*
|
||||
* In case of failure we observe all failing CPUs not executing there to
|
||||
* determine when to make the next attempt. Note that threads having
|
||||
* the var set have preemption disabled. Setting of readers_influx
|
||||
* only uses compiler barriers making these loads unreliable, which is
|
||||
* fine -- the IPI handler will always see the correct result.
|
||||
*
|
||||
* We retry until all counts are collected. Forward progress is
|
||||
* guaranteed by that fact that the total number of threads which can
|
||||
* be caught like this is finite and they all are going to block on
|
||||
* their own.
|
||||
*/
|
||||
CPU_COPY(&all_cpus, &rmsipi.signal);
|
||||
for (;;) {
|
||||
smp_rendezvous_cpus(
|
||||
rmsipi.signal,
|
||||
smp_no_rendezvous_barrier,
|
||||
rms_wlock_IPI,
|
||||
smp_no_rendezvous_barrier,
|
||||
&rmsipi);
|
||||
|
||||
if (CPU_EMPTY(&rmsipi.signal))
|
||||
break;
|
||||
|
||||
CPU_FOREACH(cpu) {
|
||||
if (!CPU_ISSET(cpu, &rmsipi.signal))
|
||||
continue;
|
||||
in_op = zpcpu_get_cpu(rms->readers_influx, cpu);
|
||||
while (atomic_load_int(in_op))
|
||||
cpu_spinwait();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
rms_wlock(struct rmslock *rms)
|
||||
{
|
||||
|
||||
WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
|
||||
|
||||
mtx_lock(&rms->mtx);
|
||||
rms->writers++;
|
||||
if (rms->writers > 1) {
|
||||
msleep(&rms->writers, &rms->mtx, PUSER - 1 | PDROP, mtx_name(&rms->mtx), 0);
|
||||
MPASS(rms->readers == 0);
|
||||
return;
|
||||
}
|
||||
|
||||
rms_wlock_switch(rms);
|
||||
|
||||
if (rms->readers > 0)
|
||||
msleep(&rms->writers, &rms->mtx, PUSER - 1 | PDROP, mtx_name(&rms->mtx), 0);
|
||||
else
|
||||
mtx_unlock(&rms->mtx);
|
||||
MPASS(rms->readers == 0);
|
||||
}
|
||||
|
||||
void
|
||||
rms_wunlock(struct rmslock *rms)
|
||||
{
|
||||
|
||||
mtx_lock(&rms->mtx);
|
||||
MPASS(rms->writers >= 1);
|
||||
MPASS(rms->readers == 0);
|
||||
rms->writers--;
|
||||
if (rms->writers > 0)
|
||||
wakeup_one(&rms->writers);
|
||||
else
|
||||
wakeup(&rms->readers);
|
||||
mtx_unlock(&rms->mtx);
|
||||
}
|
||||
|
@ -68,4 +68,14 @@ struct rm_priotracker {
|
||||
LIST_ENTRY(rm_priotracker) rmp_qentry;
|
||||
};
|
||||
|
||||
#include <sys/_mutex.h>
|
||||
|
||||
struct rmslock {
|
||||
struct mtx mtx;
|
||||
int writers;
|
||||
int readers;
|
||||
int *readers_pcpu;
|
||||
int *readers_influx;
|
||||
};
|
||||
|
||||
#endif /* !_SYS__RMLOCK_H_ */
|
||||
|
@ -133,5 +133,12 @@ struct rm_args {
|
||||
#define rm_assert(rm, what)
|
||||
#endif
|
||||
|
||||
void rms_init(struct rmslock *rms, const char *name);
|
||||
void rms_destroy(struct rmslock *rms);
|
||||
void rms_rlock(struct rmslock *rms);
|
||||
void rms_runlock(struct rmslock *rms);
|
||||
void rms_wlock(struct rmslock *rms);
|
||||
void rms_wunlock(struct rmslock *rms);
|
||||
|
||||
#endif /* _KERNEL */
|
||||
#endif /* !_SYS_RMLOCK_H_ */
|
||||
|
Loading…
x
Reference in New Issue
Block a user