diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_rlock.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_rlock.h index 8522f43cae74..ffae1130fd88 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_rlock.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_rlock.h @@ -78,6 +78,8 @@ void rangelock_fini(rangelock_t *); locked_range_t *rangelock_enter(rangelock_t *, uint64_t, uint64_t, rangelock_type_t); +locked_range_t *rangelock_tryenter(rangelock_t *, + uint64_t, uint64_t, rangelock_type_t); void rangelock_exit(locked_range_t *); void rangelock_reduce(locked_range_t *, uint64_t, uint64_t); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_rlock.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_rlock.c index 214c46b06886..434be78ffce2 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_rlock.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_rlock.c @@ -136,10 +136,11 @@ rangelock_fini(rangelock_t *rl) } /* - * Check if a write lock can be grabbed, or wait and recheck until available. + * Check if a write lock can be grabbed. If not, fail immediately or sleep and + * recheck until available, depending on the value of the "nonblock" parameter. */ -static void -rangelock_enter_writer(rangelock_t *rl, locked_range_t *new) +static boolean_t +rangelock_enter_writer(rangelock_t *rl, locked_range_t *new, boolean_t nonblock) { avl_tree_t *tree = &rl->rl_tree; locked_range_t *lr; @@ -169,7 +170,7 @@ rangelock_enter_writer(rangelock_t *rl, locked_range_t *new) */ if (avl_numnodes(tree) == 0) { avl_add(tree, new); - return; + return (B_TRUE); } /* @@ -190,8 +191,10 @@ rangelock_enter_writer(rangelock_t *rl, locked_range_t *new) goto wait; avl_insert(tree, new, where); - return; + return (B_TRUE); wait: + if (nonblock) + return (B_FALSE); if (!lr->lr_write_wanted) { cv_init(&lr->lr_write_cv, NULL, CV_DEFAULT, NULL); lr->lr_write_wanted = B_TRUE; @@ -373,10 +376,11 @@ rangelock_add_reader(avl_tree_t *tree, locked_range_t *new, } /* - * Check if a reader lock can be grabbed, or wait and recheck until available. + * Check if a reader lock can be grabbed. If not, fail immediately or sleep and + * recheck until available, depending on the value of the "nonblock" parameter. */ -static void -rangelock_enter_reader(rangelock_t *rl, locked_range_t *new) +static boolean_t +rangelock_enter_reader(rangelock_t *rl, locked_range_t *new, boolean_t nonblock) { avl_tree_t *tree = &rl->rl_tree; locked_range_t *prev, *next; @@ -397,6 +401,8 @@ rangelock_enter_reader(rangelock_t *rl, locked_range_t *new) */ if (prev && (off < prev->lr_offset + prev->lr_length)) { if ((prev->lr_type == RL_WRITER) || (prev->lr_write_wanted)) { + if (nonblock) + return (B_FALSE); if (!prev->lr_read_wanted) { cv_init(&prev->lr_read_cv, NULL, CV_DEFAULT, NULL); @@ -421,6 +427,8 @@ rangelock_enter_reader(rangelock_t *rl, locked_range_t *new) if (off + len <= next->lr_offset) goto got_lock; if ((next->lr_type == RL_WRITER) || (next->lr_write_wanted)) { + if (nonblock) + return (B_FALSE); if (!next->lr_read_wanted) { cv_init(&next->lr_read_cv, NULL, CV_DEFAULT, NULL); @@ -439,6 +447,7 @@ rangelock_enter_reader(rangelock_t *rl, locked_range_t *new) * locks and bumping ref counts (r_count). */ rangelock_add_reader(tree, new, prev, where); + return (B_TRUE); } /* @@ -448,13 +457,13 @@ rangelock_enter_reader(rangelock_t *rl, locked_range_t *new) * the range lock structure for later unlocking (or reduce range if the * entire file is locked as RL_WRITER). */ -locked_range_t * -rangelock_enter(rangelock_t *rl, uint64_t off, uint64_t len, - rangelock_type_t type) +static locked_range_t * +_rangelock_enter(rangelock_t *rl, uint64_t off, uint64_t len, + rangelock_type_t type, boolean_t nonblock) { ASSERT(type == RL_READER || type == RL_WRITER || type == RL_APPEND); - locked_range_t *new = kmem_alloc(sizeof (locked_range_t), KM_SLEEP); + locked_range_t *new = kmem_alloc(sizeof (*new), KM_SLEEP); new->lr_rangelock = rl; new->lr_offset = off; if (len + off < off) /* overflow */ @@ -471,16 +480,34 @@ rangelock_enter(rangelock_t *rl, uint64_t off, uint64_t len, /* * First check for the usual case of no locks */ - if (avl_numnodes(&rl->rl_tree) == 0) + if (avl_numnodes(&rl->rl_tree) == 0) { avl_add(&rl->rl_tree, new); - else - rangelock_enter_reader(rl, new); - } else - rangelock_enter_writer(rl, new); /* RL_WRITER or RL_APPEND */ + } else if (!rangelock_enter_reader(rl, new, nonblock)) { + kmem_free(new, sizeof (*new)); + new = NULL; + } + } else if (!rangelock_enter_writer(rl, new, nonblock)) { + kmem_free(new, sizeof (*new)); + new = NULL; + } mutex_exit(&rl->rl_lock); return (new); } +locked_range_t * +rangelock_enter(rangelock_t *rl, uint64_t off, uint64_t len, + rangelock_type_t type) +{ + return (_rangelock_enter(rl, off, len, type, B_FALSE)); +} + +locked_range_t * +rangelock_tryenter(rangelock_t *rl, uint64_t off, uint64_t len, + rangelock_type_t type) +{ + return (_rangelock_enter(rl, off, len, type, B_TRUE)); +} + /* * Unlock a reader lock */ diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c index dfdcfcdb3cad..3610cdaa323a 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c @@ -4498,13 +4498,27 @@ zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind, end = IDX_TO_OFF(ma[count - 1]->pindex + 1); /* - * Lock a range covering all required and optional pages. - * Note that we need to handle the case of the block size growing. + * Try to lock a range covering all required and optional pages, to + * handle the case of the block size growing. It is not safe to block + * on the range lock since the owner may be waiting for the fault page + * to be unbusied. */ for (;;) { blksz = zp->z_blksz; - lr = rangelock_enter(&zp->z_rangelock, rounddown(start, blksz), + lr = rangelock_tryenter(&zp->z_rangelock, + rounddown(start, blksz), roundup(end, blksz) - rounddown(start, blksz), RL_READER); + if (lr == NULL) { + if (rahead != NULL) { + *rahead = 0; + rahead = NULL; + } + if (rbehind != NULL) { + *rbehind = 0; + rbehind = NULL; + } + break; + } if (blksz == zp->z_blksz) break; rangelock_exit(lr); @@ -4515,7 +4529,8 @@ zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind, obj_size = object->un_pager.vnp.vnp_size; zfs_vmobject_wunlock(object); if (IDX_TO_OFF(ma[count - 1]->pindex) >= obj_size) { - rangelock_exit(lr); + if (lr != NULL) + rangelock_exit(lr); ZFS_EXIT(zfsvfs); return (zfs_vm_pagerret_bad); } @@ -4543,7 +4558,8 @@ zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind, error = dmu_read_pages(os, zp->z_id, ma, count, &pgsin_b, &pgsin_a, MIN(end, obj_size) - (end - PAGE_SIZE)); - rangelock_exit(lr); + if (lr != NULL) + rangelock_exit(lr); ZFS_ACCESSTIME_STAMP(zfsvfs, zp); ZFS_EXIT(zfsvfs);