From e3680954376d380b897066a542ba7cf0b7ba9124 Mon Sep 17 00:00:00 2001 From: Rick Macklem Date: Thu, 27 Jun 2019 23:10:40 +0000 Subject: [PATCH] Add non-blocking trylock variants for the rangelock functions. A future patch that will add a Linux compatible copy_file_range(2) syscall needs to be able to lock the byte ranges of two files concurrently. To do this without a risk of deadlock, a non-blocking variant of vn_rangelock_rlock() called vn_rangelock_tryrlock() was needed. This patch adds this, along with vn_rangelock_trywlock(), in order to do this. The patch also adds a couple of comments, that I hope clarify how the algorithm used in kern_rangelock.c works. Reviewed by: kib, asomers (previous version) Differential Revision: https://reviews.freebsd.org/D20645 --- sys/kern/kern_rangelock.c | 71 +++++++++++++++++++++++++++++++++------ sys/sys/rangelock.h | 4 +++ sys/sys/vnode.h | 4 +++ 3 files changed, 69 insertions(+), 10 deletions(-) diff --git a/sys/kern/kern_rangelock.c b/sys/kern/kern_rangelock.c index 35bd6e864d37..b434ac4b4c1c 100644 --- a/sys/kern/kern_rangelock.c +++ b/sys/kern/kern_rangelock.c @@ -141,15 +141,33 @@ rangelock_calc_block(struct rangelock *lock) static void rangelock_unlock_locked(struct rangelock *lock, struct rl_q_entry *entry, - struct mtx *ilk) + struct mtx *ilk, bool do_calc_block) { MPASS(lock != NULL && entry != NULL && ilk != NULL); mtx_assert(ilk, MA_OWNED); - KASSERT(entry != lock->rl_currdep, ("stuck currdep")); + + if (!do_calc_block) { + /* + * This is the case where rangelock_enqueue() has been called + * with trylock == true and just inserted this entry in the + * queue. + * If rl_currdep is this entry, rl_currdep needs to + * be set to the next entry in the rl_waiters list. + * However, since this entry is the last entry in the + * list, the next entry is NULL. + */ + if (lock->rl_currdep == entry) { + KASSERT(TAILQ_NEXT(lock->rl_currdep, rl_q_link) == NULL, + ("rangelock_enqueue: next entry not NULL")); + lock->rl_currdep = NULL; + } + } else + KASSERT(entry != lock->rl_currdep, ("stuck currdep")); TAILQ_REMOVE(&lock->rl_waiters, entry, rl_q_link); - rangelock_calc_block(lock); + if (do_calc_block) + rangelock_calc_block(lock); mtx_unlock(ilk); if (curthread->td_rlqe == NULL) curthread->td_rlqe = entry; @@ -164,7 +182,7 @@ rangelock_unlock(struct rangelock *lock, void *cookie, struct mtx *ilk) MPASS(lock != NULL && cookie != NULL && ilk != NULL); mtx_lock(ilk); - rangelock_unlock_locked(lock, cookie, ilk); + rangelock_unlock_locked(lock, cookie, ilk, true); } /* @@ -185,7 +203,7 @@ rangelock_unlock_range(struct rangelock *lock, void *cookie, off_t start, mtx_lock(ilk); if (entry->rl_q_end == end) { - rangelock_unlock_locked(lock, cookie, ilk); + rangelock_unlock_locked(lock, cookie, ilk, true); return (NULL); } entry->rl_q_end = end; @@ -196,11 +214,11 @@ rangelock_unlock_range(struct rangelock *lock, void *cookie, off_t start, /* * Add the lock request to the queue of the pending requests for - * rangelock. Sleep until the request can be granted. + * rangelock. Sleep until the request can be granted unless trylock == true. */ static void * rangelock_enqueue(struct rangelock *lock, off_t start, off_t end, int mode, - struct mtx *ilk) + struct mtx *ilk, bool trylock) { struct rl_q_entry *entry; struct thread *td; @@ -226,11 +244,28 @@ rangelock_enqueue(struct rangelock *lock, off_t start, off_t end, int mode, */ TAILQ_INSERT_TAIL(&lock->rl_waiters, entry, rl_q_link); + /* + * If rl_currdep == NULL, there is no entry waiting for a conflicting + * range to be resolved, so set rl_currdep to this entry. If there is + * no conflicting entry for this entry, rl_currdep will be set back to + * NULL by rangelock_calc_block(). + */ if (lock->rl_currdep == NULL) lock->rl_currdep = entry; rangelock_calc_block(lock); - while (!(entry->rl_q_flags & RL_LOCK_GRANTED)) + while (!(entry->rl_q_flags & RL_LOCK_GRANTED)) { + if (trylock) { + /* + * For this case, the range is not actually locked + * yet, but removal from the list requires the same + * steps, except for not doing a rangelock_calc_block() + * call, since rangelock_calc_block() was called above. + */ + rangelock_unlock_locked(lock, entry, ilk, false); + return (NULL); + } msleep(entry, ilk, 0, "range", 0); + } mtx_unlock(ilk); return (entry); } @@ -239,12 +274,28 @@ void * rangelock_rlock(struct rangelock *lock, off_t start, off_t end, struct mtx *ilk) { - return (rangelock_enqueue(lock, start, end, RL_LOCK_READ, ilk)); + return (rangelock_enqueue(lock, start, end, RL_LOCK_READ, ilk, false)); +} + +void * +rangelock_tryrlock(struct rangelock *lock, off_t start, off_t end, + struct mtx *ilk) +{ + + return (rangelock_enqueue(lock, start, end, RL_LOCK_READ, ilk, true)); } void * rangelock_wlock(struct rangelock *lock, off_t start, off_t end, struct mtx *ilk) { - return (rangelock_enqueue(lock, start, end, RL_LOCK_WRITE, ilk)); + return (rangelock_enqueue(lock, start, end, RL_LOCK_WRITE, ilk, false)); +} + +void * +rangelock_trywlock(struct rangelock *lock, off_t start, off_t end, + struct mtx *ilk) +{ + + return (rangelock_enqueue(lock, start, end, RL_LOCK_WRITE, ilk, true)); } diff --git a/sys/sys/rangelock.h b/sys/sys/rangelock.h index 732bd9406d48..9a8a107aed8f 100644 --- a/sys/sys/rangelock.h +++ b/sys/sys/rangelock.h @@ -75,8 +75,12 @@ void *rangelock_unlock_range(struct rangelock *lock, void *cookie, off_t start, off_t end, struct mtx *ilk); void *rangelock_rlock(struct rangelock *lock, off_t start, off_t end, struct mtx *ilk); +void *rangelock_tryrlock(struct rangelock *lock, off_t start, off_t end, + struct mtx *ilk); void *rangelock_wlock(struct rangelock *lock, off_t start, off_t end, struct mtx *ilk); +void *rangelock_trywlock(struct rangelock *lock, off_t start, off_t end, + struct mtx *ilk); void rlqentry_free(struct rl_q_entry *rlqe); #endif /* _KERNEL */ diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index 0ed2ffc96fb7..cfde146f6765 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -720,8 +720,12 @@ int vn_io_fault_pgmove(vm_page_t ma[], vm_offset_t offset, int xfersize, VI_MTX(vp)) #define vn_rangelock_rlock(vp, start, end) \ rangelock_rlock(&(vp)->v_rl, (start), (end), VI_MTX(vp)) +#define vn_rangelock_tryrlock(vp, start, end) \ + rangelock_tryrlock(&(vp)->v_rl, (start), (end), VI_MTX(vp)) #define vn_rangelock_wlock(vp, start, end) \ rangelock_wlock(&(vp)->v_rl, (start), (end), VI_MTX(vp)) +#define vn_rangelock_trywlock(vp, start, end) \ + rangelock_trywlock(&(vp)->v_rl, (start), (end), VI_MTX(vp)) int vfs_cache_lookup(struct vop_lookup_args *ap); void vfs_timestamp(struct timespec *);