Add read-mostly sleepable locks

To be used when like rmlocks, except when sleeping for readers needs to be
allowed. See the manpage for more information.

Reviewed by:	kib (previous version)
Differential Revision:	https://reviews.freebsd.org/D22823
This commit is contained in:
Mateusz Guzik 2019-12-27 11:19:57 +00:00
parent e99c4e4d64
commit 1f162fef76
4 changed files with 323 additions and 12 deletions

View File

@ -26,7 +26,7 @@
.\" $FreeBSD$
.\"
.\" Based on rwlock.9 man page
.Dd November 11, 2017
.Dd December 27, 2019
.Dt RMLOCK 9
.Os
.Sh NAME
@ -43,7 +43,13 @@
.Nm rm_sleep ,
.Nm rm_assert ,
.Nm RM_SYSINIT ,
.Nm RM_SYSINIT_FLAGS
.Nm RM_SYSINIT_FLAGS ,
.Nm rms_init ,
.Nm rms_destroy ,
.Nm rms_rlock ,
.Nm rms_wlock ,
.Nm rms_runlock ,
.Nm rms_wunlock
.Nd kernel reader/writer lock optimized for read-mostly access patterns
.Sh SYNOPSIS
.In sys/param.h
@ -77,6 +83,18 @@
.In sys/kernel.h
.Fn RM_SYSINIT "name" "struct rmlock *rm" "const char *desc"
.Fn RM_SYSINIT_FLAGS "name" "struct rmlock *rm" "const char *desc" "int flags"
.Ft void
.Fn rms_init "struct rmslock *rms" "const char *name"
.Ft void
.Fn rms_destroy "struct rmslock *rms"
.Ft void
.Fn rms_rlock "struct rmslock *rms"
.Ft void
.Fn rms_wlock "struct rmslock *rms"
.Ft void
.Fn rms_runlock "struct rmslock *rms"
.Ft void
.Fn rms_wunlock "struct rmslock *rms"
.Sh DESCRIPTION
Read-mostly locks allow shared access to protected data by multiple threads,
or exclusive access by a single thread.
@ -113,22 +131,22 @@ Readers can recurse if the lock is initialized with the
option;
however, writers are never allowed to recurse.
.Pp
Sleepable read-mostly locks are created by passing
Sleeping for writers can be allowed by passing
.Dv RM_SLEEPABLE
to
.Fn rm_init_flags .
Unlike normal read-mostly locks,
sleepable read-mostly locks follow the same lock ordering rules as
It changes lock ordering rules to the same as for
.Xr sx 9
locks.
Sleepable read-mostly locks do not propagate priority to writers,
but they do propagate priority to readers.
Writers are permitted to sleep while holding a read-mostly lock,
but readers are not.
Unlike other sleepable locks such as
They do not propagate priority to writers, but they do propagate priority to
readers. Note that readers are not permitted to sleep regardless of the flag.
.Pp
Sleepable read-mostly locks (created with
.Fn rms_init )
allow sleeping for both readers and writers, but don't do priority propagation
for either. They follow
.Xr sx 9
locks,
readers must use try operations on other sleepable locks to avoid sleeping.
lock ordering.
.Ss Macros and Functions
.Bl -tag -width indent
.It Fn rm_init "struct rmlock *rm" "const char *name"
@ -286,6 +304,43 @@ Assert that the current thread does not hold a recursive lock of
.Fa rm .
.El
.El
.Bl -tag -width indent
.It Fn rms_init "struct rmslock *rms" "const char *name"
Initialize the sleepable read-mostly lock
.Fa rms .
The
.Fa name
description is used as
.Fa wmesg
parameter to the
.Xr msleep 9
routine.
This function must be called before any other operations on the lock.
.It Fn rms_rlock "struct rmlock *rm"
Lock
.Fa rms
as a reader.
If any thread holds this lock exclusively, the current thread blocks.
.It Fn rms_wlock "struct rmslock *rms"
Lock
.Fa rms
as a writer.
If the lock is already taken, the current thread blocks.
The
.Fn rms_wlock
function cannot be called recursively.
.It Fn rms_runlock "struct rmslock *rms"
This function releases a shared lock previously acquired by
.Fn rms_rlock .
.It Fn rms_wunlock "struct rmslock *rms"
This function releases an exclusive lock previously acquired by
.Fn rms_wlock .
.It Fn rms_destroy "struct rmslock *rms"
This functions destroys a lock previously initialized with
.Fn rms_init .
The
.Fa rms
lock must be unlocked.
.Sh SEE ALSO
.Xr locking 9 ,
.Xr mutex 9 ,

View File

@ -53,6 +53,7 @@ __FBSDID("$FreeBSD$");
#include <sys/turnstile.h>
#include <sys/lock_profile.h>
#include <machine/cpu.h>
#include <vm/uma.h>
#ifdef DDB
#include <ddb/ddb.h>
@ -853,3 +854,241 @@ db_show_rm(const struct lock_object *lock)
lc->lc_ddb_show(&rm->rm_wlock_object);
}
#endif
/*
* Read-mostly sleepable locks.
*
* These primitives allow both readers and writers to sleep. However, neither
* readers nor writers are tracked and subsequently there is no priority
* propagation.
*
* They are intended to be only used when write-locking is almost never needed
* (e.g., they can guard against unloading a kernel module) while read-locking
* happens all the time.
*
* Concurrent writers take turns taking the lock while going off cpu. If this is
* of concern for your usecase, this is not the right primitive.
*
* Neither rms_rlock nor rms_runlock use fences. Instead compiler barriers are
* inserted to prevert reordering of generated code. Execution ordering is
* provided with the use of an IPI handler.
*/
void
rms_init(struct rmslock *rms, const char *name)
{
rms->writers = 0;
rms->readers = 0;
mtx_init(&rms->mtx, name, NULL, MTX_DEF | MTX_NEW);
rms->readers_pcpu = uma_zalloc_pcpu(pcpu_zone_int, M_WAITOK | M_ZERO);
rms->readers_influx = uma_zalloc_pcpu(pcpu_zone_int, M_WAITOK | M_ZERO);
}
void
rms_destroy(struct rmslock *rms)
{
MPASS(rms->writers == 0);
MPASS(rms->readers == 0);
mtx_destroy(&rms->mtx);
uma_zfree_pcpu(pcpu_zone_int, rms->readers_pcpu);
uma_zfree_pcpu(pcpu_zone_int, rms->readers_influx);
}
static void __noinline
rms_rlock_fallback(struct rmslock *rms)
{
(*zpcpu_get(rms->readers_influx)) = 0;
critical_exit();
mtx_lock(&rms->mtx);
MPASS(*zpcpu_get(rms->readers_pcpu) == 0);
while (rms->writers > 0)
msleep(&rms->readers, &rms->mtx, PUSER - 1, mtx_name(&rms->mtx), 0);
(*zpcpu_get(rms->readers_pcpu))++;
mtx_unlock(&rms->mtx);
}
void
rms_rlock(struct rmslock *rms)
{
int *influx;
WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
critical_enter();
influx = zpcpu_get(rms->readers_influx);
__compiler_membar();
*influx = 1;
__compiler_membar();
if (__predict_false(rms->writers > 0)) {
rms_rlock_fallback(rms);
return;
}
__compiler_membar();
(*zpcpu_get(rms->readers_pcpu))++;
__compiler_membar();
*influx = 0;
critical_exit();
}
static void __noinline
rms_runlock_fallback(struct rmslock *rms)
{
(*zpcpu_get(rms->readers_influx)) = 0;
critical_exit();
mtx_lock(&rms->mtx);
MPASS(*zpcpu_get(rms->readers_pcpu) == 0);
MPASS(rms->writers > 0);
MPASS(rms->readers > 0);
rms->readers--;
if (rms->readers == 0)
wakeup_one(&rms->writers);
mtx_unlock(&rms->mtx);
}
void
rms_runlock(struct rmslock *rms)
{
int *influx;
WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
critical_enter();
influx = zpcpu_get(rms->readers_influx);
__compiler_membar();
*influx = 1;
__compiler_membar();
if (__predict_false(rms->writers > 0)) {
rms_runlock_fallback(rms);
return;
}
__compiler_membar();
(*zpcpu_get(rms->readers_pcpu))--;
__compiler_membar();
*influx = 0;
critical_exit();
}
struct rmslock_ipi {
struct rmslock *rms;
cpuset_t signal;
};
static void
rms_wlock_IPI(void *arg)
{
struct rmslock_ipi *rmsipi;
struct rmslock *rms;
int readers;
rmsipi = arg;
rms = rmsipi->rms;
if (*zpcpu_get(rms->readers_influx))
return;
readers = zpcpu_replace(rms->readers_pcpu, 0);
if (readers != 0)
atomic_add_int(&rms->readers, readers);
CPU_CLR_ATOMIC(curcpu, &rmsipi->signal);
}
static void
rms_wlock_switch(struct rmslock *rms)
{
struct rmslock_ipi rmsipi;
int *in_op;
int cpu;
MPASS(rms->readers == 0);
MPASS(rms->writers == 1);
rmsipi.rms = rms;
/*
* Publishes rms->writers. rlock and runlock will get this ordered
* via IPI in the worst case.
*/
atomic_thread_fence_rel();
/*
* Collect reader counts from all CPUs using an IPI. The handler can
* find itself running while the interrupted CPU was doing either
* rlock or runlock in which case it will fail.
*
* Successful attempts clear the cpu id in the bitmap.
*
* In case of failure we observe all failing CPUs not executing there to
* determine when to make the next attempt. Note that threads having
* the var set have preemption disabled. Setting of readers_influx
* only uses compiler barriers making these loads unreliable, which is
* fine -- the IPI handler will always see the correct result.
*
* We retry until all counts are collected. Forward progress is
* guaranteed by that fact that the total number of threads which can
* be caught like this is finite and they all are going to block on
* their own.
*/
CPU_COPY(&all_cpus, &rmsipi.signal);
for (;;) {
smp_rendezvous_cpus(
rmsipi.signal,
smp_no_rendezvous_barrier,
rms_wlock_IPI,
smp_no_rendezvous_barrier,
&rmsipi);
if (CPU_EMPTY(&rmsipi.signal))
break;
CPU_FOREACH(cpu) {
if (!CPU_ISSET(cpu, &rmsipi.signal))
continue;
in_op = zpcpu_get_cpu(rms->readers_influx, cpu);
while (atomic_load_int(in_op))
cpu_spinwait();
}
}
}
void
rms_wlock(struct rmslock *rms)
{
WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
mtx_lock(&rms->mtx);
rms->writers++;
if (rms->writers > 1) {
msleep(&rms->writers, &rms->mtx, PUSER - 1 | PDROP, mtx_name(&rms->mtx), 0);
MPASS(rms->readers == 0);
return;
}
rms_wlock_switch(rms);
if (rms->readers > 0)
msleep(&rms->writers, &rms->mtx, PUSER - 1 | PDROP, mtx_name(&rms->mtx), 0);
else
mtx_unlock(&rms->mtx);
MPASS(rms->readers == 0);
}
void
rms_wunlock(struct rmslock *rms)
{
mtx_lock(&rms->mtx);
MPASS(rms->writers >= 1);
MPASS(rms->readers == 0);
rms->writers--;
if (rms->writers > 0)
wakeup_one(&rms->writers);
else
wakeup(&rms->readers);
mtx_unlock(&rms->mtx);
}

View File

@ -68,4 +68,14 @@ struct rm_priotracker {
LIST_ENTRY(rm_priotracker) rmp_qentry;
};
#include <sys/_mutex.h>
struct rmslock {
struct mtx mtx;
int writers;
int readers;
int *readers_pcpu;
int *readers_influx;
};
#endif /* !_SYS__RMLOCK_H_ */

View File

@ -133,5 +133,12 @@ struct rm_args {
#define rm_assert(rm, what)
#endif
void rms_init(struct rmslock *rms, const char *name);
void rms_destroy(struct rmslock *rms);
void rms_rlock(struct rmslock *rms);
void rms_runlock(struct rmslock *rms);
void rms_wlock(struct rmslock *rms);
void rms_wunlock(struct rmslock *rms);
#endif /* _KERNEL */
#endif /* !_SYS_RMLOCK_H_ */