Add a rangelock implementation, intended to be used to range-locking
the i/o regions of the vnode data space. The implementation is quite simple-minded, it uses the list of the lock requests, ordered by arrival time. Each request may be for read or for write. The implementation is fair FIFO. MFC after: 2 month
This commit is contained in:
parent
b4b050eda2
commit
6f4e16f833
@ -2562,6 +2562,7 @@ kern/kern_priv.c standard
|
||||
kern/kern_proc.c standard
|
||||
kern/kern_prot.c standard
|
||||
kern/kern_racct.c standard
|
||||
kern/kern_rangelock.c standard
|
||||
kern/kern_rctl.c standard
|
||||
kern/kern_resource.c standard
|
||||
kern/kern_rmlock.c standard
|
||||
|
246
sys/kern/kern_rangelock.c
Normal file
246
sys/kern/kern_rangelock.c
Normal file
@ -0,0 +1,246 @@
|
||||
/*-
|
||||
* Copyright (c) 2009 Konstantin Belousov <kib@FreeBSD.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice unmodified, this list of conditions, and the following
|
||||
* disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/lock.h>
|
||||
#include <sys/mutex.h>
|
||||
#include <sys/proc.h>
|
||||
#include <sys/rangelock.h>
|
||||
#include <sys/systm.h>
|
||||
|
||||
#include <vm/uma.h>
|
||||
|
||||
struct rl_q_entry {
|
||||
TAILQ_ENTRY(rl_q_entry) rl_q_link;
|
||||
off_t rl_q_start, rl_q_end;
|
||||
int rl_q_flags;
|
||||
};
|
||||
|
||||
static uma_zone_t rl_entry_zone;
|
||||
|
||||
static void
|
||||
rangelock_sys_init(void)
|
||||
{
|
||||
|
||||
rl_entry_zone = uma_zcreate("rl_entry", sizeof(struct rl_q_entry),
|
||||
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
|
||||
}
|
||||
SYSINIT(vfs, SI_SUB_LOCK, SI_ORDER_ANY, rangelock_sys_init, NULL);
|
||||
|
||||
static struct rl_q_entry *
|
||||
rlqentry_alloc(void)
|
||||
{
|
||||
|
||||
return (uma_zalloc(rl_entry_zone, M_WAITOK));
|
||||
}
|
||||
|
||||
void
|
||||
rlqentry_free(struct rl_q_entry *rleq)
|
||||
{
|
||||
|
||||
uma_zfree(rl_entry_zone, rleq);
|
||||
}
|
||||
|
||||
void
|
||||
rangelock_init(struct rangelock *lock)
|
||||
{
|
||||
|
||||
TAILQ_INIT(&lock->rl_waiters);
|
||||
lock->rl_currdep = NULL;
|
||||
}
|
||||
|
||||
void
|
||||
rangelock_destroy(struct rangelock *lock)
|
||||
{
|
||||
|
||||
KASSERT(TAILQ_EMPTY(&lock->rl_waiters), ("Dangling waiters"));
|
||||
}
|
||||
|
||||
/*
|
||||
* Verifies the supplied rl_q_entries for compatibility. Returns true
|
||||
* if the rangelock queue entries are not compatible, false if they are.
|
||||
*
|
||||
* Two entries are compatible if their ranges do not overlap, or both
|
||||
* entries are for read.
|
||||
*/
|
||||
static int
|
||||
rangelock_incompatible(const struct rl_q_entry *e1,
|
||||
const struct rl_q_entry *e2)
|
||||
{
|
||||
|
||||
if ((e1->rl_q_flags & RL_LOCK_TYPE_MASK) == RL_LOCK_READ &&
|
||||
(e2->rl_q_flags & RL_LOCK_TYPE_MASK) == RL_LOCK_READ)
|
||||
return (0);
|
||||
if (e1->rl_q_start < e2->rl_q_end && e1->rl_q_end > e2->rl_q_start)
|
||||
return (1);
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Recalculate the lock->rl_currdep after an unlock.
|
||||
*/
|
||||
static void
|
||||
rangelock_calc_block(struct rangelock *lock)
|
||||
{
|
||||
struct rl_q_entry *entry, *entry1, *whead;
|
||||
|
||||
if (lock->rl_currdep == TAILQ_FIRST(&lock->rl_waiters) &&
|
||||
lock->rl_currdep != NULL)
|
||||
lock->rl_currdep = TAILQ_NEXT(lock->rl_currdep, rl_q_link);
|
||||
for (entry = lock->rl_currdep; entry != NULL;
|
||||
entry = TAILQ_NEXT(entry, rl_q_link)) {
|
||||
TAILQ_FOREACH(entry1, &lock->rl_waiters, rl_q_link) {
|
||||
if (rangelock_incompatible(entry, entry1))
|
||||
goto out;
|
||||
if (entry1 == entry)
|
||||
break;
|
||||
}
|
||||
}
|
||||
out:
|
||||
lock->rl_currdep = entry;
|
||||
TAILQ_FOREACH(whead, &lock->rl_waiters, rl_q_link) {
|
||||
if (whead == lock->rl_currdep)
|
||||
break;
|
||||
if (!(whead->rl_q_flags & RL_LOCK_GRANTED)) {
|
||||
whead->rl_q_flags |= RL_LOCK_GRANTED;
|
||||
wakeup(whead);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
rangelock_unlock_locked(struct rangelock *lock, struct rl_q_entry *entry,
|
||||
struct mtx *ilk)
|
||||
{
|
||||
|
||||
MPASS(lock != NULL && entry != NULL && ilk != NULL);
|
||||
mtx_assert(ilk, MA_OWNED);
|
||||
KASSERT(entry != lock->rl_currdep, ("stuck currdep"));
|
||||
|
||||
TAILQ_REMOVE(&lock->rl_waiters, entry, rl_q_link);
|
||||
rangelock_calc_block(lock);
|
||||
mtx_unlock(ilk);
|
||||
if (curthread->td_rlqe == NULL)
|
||||
curthread->td_rlqe = entry;
|
||||
else
|
||||
rlqentry_free(entry);
|
||||
}
|
||||
|
||||
void
|
||||
rangelock_unlock(struct rangelock *lock, void *cookie, struct mtx *ilk)
|
||||
{
|
||||
|
||||
MPASS(lock != NULL && cookie != NULL && ilk != NULL);
|
||||
|
||||
mtx_lock(ilk);
|
||||
rangelock_unlock_locked(lock, cookie, ilk);
|
||||
}
|
||||
|
||||
/*
|
||||
* Unlock the sub-range of granted lock.
|
||||
*/
|
||||
void *
|
||||
rangelock_unlock_range(struct rangelock *lock, void *cookie, off_t start,
|
||||
off_t end, struct mtx *ilk)
|
||||
{
|
||||
struct rl_q_entry *entry;
|
||||
|
||||
MPASS(lock != NULL && cookie != NULL && ilk != NULL);
|
||||
entry = cookie;
|
||||
KASSERT(entry->rl_q_flags & RL_LOCK_GRANTED,
|
||||
("Unlocking non-granted lock"));
|
||||
KASSERT(entry->rl_q_start == start, ("wrong start"));
|
||||
KASSERT(entry->rl_q_end >= end, ("wrong end"));
|
||||
|
||||
mtx_lock(ilk);
|
||||
if (entry->rl_q_end == end) {
|
||||
rangelock_unlock_locked(lock, cookie, ilk);
|
||||
return (NULL);
|
||||
}
|
||||
entry->rl_q_end = end;
|
||||
rangelock_calc_block(lock);
|
||||
mtx_unlock(ilk);
|
||||
return (cookie);
|
||||
}
|
||||
|
||||
/*
|
||||
* Add the lock request to the queue of the pending requests for
|
||||
* rangelock. Sleep until the request can be granted.
|
||||
*/
|
||||
static void *
|
||||
rangelock_enqueue(struct rangelock *lock, off_t start, off_t end, int mode,
|
||||
struct mtx *ilk)
|
||||
{
|
||||
struct rl_q_entry *entry;
|
||||
struct thread *td;
|
||||
|
||||
MPASS(lock != NULL && ilk != NULL);
|
||||
|
||||
td = curthread;
|
||||
if (td->td_rlqe != NULL) {
|
||||
entry = td->td_rlqe;
|
||||
td->td_rlqe = NULL;
|
||||
} else
|
||||
entry = rlqentry_alloc();
|
||||
MPASS(entry != NULL);
|
||||
entry->rl_q_flags = mode;
|
||||
entry->rl_q_start = start;
|
||||
entry->rl_q_end = end;
|
||||
|
||||
mtx_lock(ilk);
|
||||
/*
|
||||
* XXXKIB TODO. Check that a thread does not try to enqueue a
|
||||
* lock that is incompatible with another request from the same
|
||||
* thread.
|
||||
*/
|
||||
|
||||
TAILQ_INSERT_TAIL(&lock->rl_waiters, entry, rl_q_link);
|
||||
if (lock->rl_currdep == NULL)
|
||||
lock->rl_currdep = entry;
|
||||
rangelock_calc_block(lock);
|
||||
while (!(entry->rl_q_flags & RL_LOCK_GRANTED))
|
||||
msleep(entry, ilk, 0, "range", 0);
|
||||
mtx_unlock(ilk);
|
||||
return (entry);
|
||||
}
|
||||
|
||||
void *
|
||||
rangelock_rlock(struct rangelock *lock, off_t start, off_t end, struct mtx *ilk)
|
||||
{
|
||||
|
||||
return (rangelock_enqueue(lock, start, end, RL_LOCK_READ, ilk));
|
||||
}
|
||||
|
||||
void *
|
||||
rangelock_wlock(struct rangelock *lock, off_t start, off_t end, struct mtx *ilk)
|
||||
{
|
||||
|
||||
return (rangelock_enqueue(lock, start, end, RL_LOCK_WRITE, ilk));
|
||||
}
|
@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include <sys/lock.h>
|
||||
#include <sys/mutex.h>
|
||||
#include <sys/proc.h>
|
||||
#include <sys/rangelock.h>
|
||||
#include <sys/resourcevar.h>
|
||||
#include <sys/sdt.h>
|
||||
#include <sys/smp.h>
|
||||
@ -205,6 +206,7 @@ thread_init(void *mem, int size, int flags)
|
||||
|
||||
td->td_sleepqueue = sleepq_alloc();
|
||||
td->td_turnstile = turnstile_alloc();
|
||||
td->td_rlqe = NULL;
|
||||
EVENTHANDLER_INVOKE(thread_init, td);
|
||||
td->td_sched = (struct td_sched *)&td[1];
|
||||
umtx_thread_init(td);
|
||||
@ -222,6 +224,7 @@ thread_fini(void *mem, int size)
|
||||
|
||||
td = (struct thread *)mem;
|
||||
EVENTHANDLER_INVOKE(thread_fini, td);
|
||||
rlqentry_free(td->td_rlqe);
|
||||
turnstile_free(td->td_turnstile);
|
||||
sleepq_free(td->td_sleepqueue);
|
||||
umtx_thread_fini(td);
|
||||
|
@ -1027,6 +1027,7 @@ alloc:
|
||||
if ((mp->mnt_kern_flag & MNTK_NOKNOTE) != 0)
|
||||
vp->v_vflag |= VV_NOKNOTE;
|
||||
}
|
||||
rangelock_init(&vp->v_rl);
|
||||
|
||||
*vpp = vp;
|
||||
return (0);
|
||||
@ -2468,6 +2469,7 @@ vdropl(struct vnode *vp)
|
||||
/* XXX Elsewhere we detect an already freed vnode via NULL v_op. */
|
||||
vp->v_op = NULL;
|
||||
#endif
|
||||
rangelock_destroy(&vp->v_rl);
|
||||
lockdestroy(vp->v_vnlock);
|
||||
mtx_destroy(&vp->v_interlock);
|
||||
mtx_destroy(BO_MTX(bo));
|
||||
|
@ -213,6 +213,7 @@ struct thread {
|
||||
struct seltd *td_sel; /* Select queue/channel. */
|
||||
struct sleepqueue *td_sleepqueue; /* (k) Associated sleep queue. */
|
||||
struct turnstile *td_turnstile; /* (k) Associated turnstile. */
|
||||
struct rl_q_entry *td_rlqe; /* (k) Associated range lock entry. */
|
||||
struct umtx_q *td_umtxq; /* (c?) Link for when we're blocked. */
|
||||
lwpid_t td_tid; /* (b) Thread ID. */
|
||||
sigqueue_t td_sigqueue; /* (c) Sigs arrived, not delivered. */
|
||||
|
78
sys/sys/rangelock.h
Normal file
78
sys/sys/rangelock.h
Normal file
@ -0,0 +1,78 @@
|
||||
/*-
|
||||
* Copyright (c) 2009 Konstantin Belousov <kib@FreeBSD.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice unmodified, this list of conditions, and the following
|
||||
* disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _SYS_RANGELOCK_H
|
||||
#define _SYS_RANGELOCK_H
|
||||
|
||||
#include <sys/queue.h>
|
||||
|
||||
#define RL_LOCK_READ 0x0001
|
||||
#define RL_LOCK_WRITE 0x0002
|
||||
#define RL_LOCK_TYPE_MASK 0x0003
|
||||
#define RL_LOCK_GRANTED 0x0004
|
||||
|
||||
struct rl_q_entry;
|
||||
|
||||
/*
|
||||
* The structure representing the range lock. Caller may request
|
||||
* read or write access to the range of bytes. Access is granted if
|
||||
* all existing lock owners are compatible with the request. Two lock
|
||||
* owners are compatible if their ranges do not overlap, or both
|
||||
* owners are for read.
|
||||
*
|
||||
* Access to the structure itself is synchronized with the externally
|
||||
* supplied mutex.
|
||||
*
|
||||
* rl_waiters is the queue of lock requests in the order of arrival.
|
||||
* rl_currdep is the first lock request that cannot be granted now due
|
||||
* to the preceding requests conflicting with it.
|
||||
*/
|
||||
struct rangelock {
|
||||
TAILQ_HEAD(, rl_q_entry) rl_waiters;
|
||||
struct rl_q_entry *rl_currdep;
|
||||
};
|
||||
|
||||
#ifdef _KERNEL
|
||||
|
||||
struct mtx;
|
||||
|
||||
void rangelock_init(struct rangelock *lock);
|
||||
void rangelock_destroy(struct rangelock *lock);
|
||||
void rangelock_unlock(struct rangelock *lock, void *cookie,
|
||||
struct mtx *ilk);
|
||||
void *rangelock_unlock_range(struct rangelock *lock, void *cookie,
|
||||
off_t start, off_t end, struct mtx *ilk);
|
||||
void *rangelock_rlock(struct rangelock *lock, off_t start, off_t end,
|
||||
struct mtx *ilk);
|
||||
void *rangelock_wlock(struct rangelock *lock, off_t start, off_t end,
|
||||
struct mtx *ilk);
|
||||
void rlqentry_free(struct rl_q_entry *rlqe);
|
||||
|
||||
#endif /* _KERNEL */
|
||||
|
||||
#endif /* _SYS_RANGELOCK_H */
|
@ -38,6 +38,7 @@
|
||||
#include <sys/lock.h>
|
||||
#include <sys/lockmgr.h>
|
||||
#include <sys/mutex.h>
|
||||
#include <sys/rangelock.h>
|
||||
#include <sys/selinfo.h>
|
||||
#include <sys/uio.h>
|
||||
#include <sys/acl.h>
|
||||
@ -165,6 +166,7 @@ struct vnode {
|
||||
struct vpollinfo *v_pollinfo; /* i Poll events, p for *v_pi */
|
||||
struct label *v_label; /* MAC label for vnode */
|
||||
struct lockf *v_lockf; /* Byte-level advisory lock list */
|
||||
struct rangelock v_rl; /* Byte-range lock */
|
||||
};
|
||||
|
||||
#endif /* defined(_KERNEL) || defined(_KVM_VNODE) */
|
||||
@ -679,6 +681,15 @@ int vn_extattr_rm(struct vnode *vp, int ioflg, int attrnamespace,
|
||||
int vn_vget_ino(struct vnode *vp, ino_t ino, int lkflags,
|
||||
struct vnode **rvp);
|
||||
|
||||
#define vn_rangelock_unlock(vp, cookie) \
|
||||
rangelock_unlock(&(vp)->v_rl, (cookie), VI_MTX(vp))
|
||||
#define vn_rangelock_unlock_range(vp, cookie, start, end) \
|
||||
rangelock_unlock_range(&(vp)->v_rl, (cookie), (start), (end), \
|
||||
VI_MTX(vp))
|
||||
#define vn_rangelock_rlock(vp, start, end) \
|
||||
rangelock_rlock(&(vp)->v_rl, (start), (end), VI_MTX(vp))
|
||||
#define vn_rangelock_wlock(vp, start, end) \
|
||||
rangelock_wlock(&(vp)->v_rl, (start), (end), VI_MTX(vp))
|
||||
|
||||
int vfs_cache_lookup(struct vop_lookup_args *ap);
|
||||
void vfs_timestamp(struct timespec *);
|
||||
|
Loading…
x
Reference in New Issue
Block a user