450dc149bd
The original range lock implementation had to be modified by commit
8926ab7
because it was unsafe on Linux. In particular, calling
cv_destroy() immediately after cv_broadcast() is dangerous because
the waiters may still be asleep. Thus the following cv_destroy()
will free memory which may still be in use.
This was fixed by updating cv_destroy() to block on waiters but
this in turn introduced a deadlock. The deadlock was resolved
with the use of a taskq to move the offending free outside the
range lock. This worked well but using the taskq for the free
resulted in a serious performace hit. This is somewhat ironic
because at the time I felt using the taskq might improve things
by making the free asynchronous.
This patch refines the original fix and moves the free from the
taskq to a private free list. Then items which must be free'd
are simply inserted in to the list. When the range lock is dropped
it's safe to free the items. The list is walked and all rl_t
entries are freed.
This change improves small cached read performance by 26x. This
was expected because for small reads the number of locking calls
goes up significantly. More surprisingly this change significantly
improves large cache read performance. This probably attributable
to better cpu/memory locality. Very likely the same processor
which allocated the memory is now freeing it.
bs ext3 zfs zfs+fix faster
----------------------------------------------
512 435 3 79 26x
1k 820 7 160 22x
2k 1536 14 305 21x
4k 2764 28 572 20x
8k 3788 50 1024 20x
16k 4300 86 1843 21x
32k 4505 138 2560 18x
64k 5324 252 3891 15x
128k 5427 276 4710 17x
256k 5427 413 5017 12x
512k 5427 497 5324 10x
1m 5427 521 5632 10x
Closes #142
91 lines
2.5 KiB
C
91 lines
2.5 KiB
C
/*
|
|
* CDDL HEADER START
|
|
*
|
|
* The contents of this file are subject to the terms of the
|
|
* Common Development and Distribution License (the "License").
|
|
* You may not use this file except in compliance with the License.
|
|
*
|
|
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
|
* or http://www.opensolaris.org/os/licensing.
|
|
* See the License for the specific language governing permissions
|
|
* and limitations under the License.
|
|
*
|
|
* When distributing Covered Code, include this CDDL HEADER in each
|
|
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
|
* If applicable, add the following below this CDDL HEADER, with the
|
|
* fields enclosed by brackets "[]" replaced with your own identifying
|
|
* information: Portions Copyright [yyyy] [name of copyright owner]
|
|
*
|
|
* CDDL HEADER END
|
|
*/
|
|
/*
|
|
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
|
|
* Use is subject to license terms.
|
|
*/
|
|
|
|
#ifndef _SYS_FS_ZFS_RLOCK_H
|
|
#define _SYS_FS_ZFS_RLOCK_H
|
|
|
|
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
#ifdef _KERNEL
|
|
|
|
#include <sys/zfs_znode.h>
|
|
|
|
typedef enum {
|
|
RL_READER,
|
|
RL_WRITER,
|
|
RL_APPEND
|
|
} rl_type_t;
|
|
|
|
typedef struct rl {
|
|
znode_t *r_zp; /* znode this lock applies to */
|
|
avl_node_t r_node; /* avl node link */
|
|
uint64_t r_off; /* file range offset */
|
|
uint64_t r_len; /* file range length */
|
|
uint_t r_cnt; /* range reference count in tree */
|
|
rl_type_t r_type; /* range type */
|
|
kcondvar_t r_wr_cv; /* cv for waiting writers */
|
|
kcondvar_t r_rd_cv; /* cv for waiting readers */
|
|
uint8_t r_proxy; /* acting for original range */
|
|
uint8_t r_write_wanted; /* writer wants to lock this range */
|
|
uint8_t r_read_wanted; /* reader wants to lock this range */
|
|
list_node_t rl_node; /* used for deferred release */
|
|
} rl_t;
|
|
|
|
/*
|
|
* Lock a range (offset, length) as either shared (READER)
|
|
* or exclusive (WRITER or APPEND). APPEND is a special type that
|
|
* is converted to WRITER that specified to lock from the start of the
|
|
* end of file. zfs_range_lock() returns the range lock structure.
|
|
*/
|
|
rl_t *zfs_range_lock(znode_t *zp, uint64_t off, uint64_t len, rl_type_t type);
|
|
|
|
/*
|
|
* Unlock range and destroy range lock structure.
|
|
*/
|
|
void zfs_range_unlock(rl_t *rl);
|
|
|
|
/*
|
|
* Reduce range locked as RW_WRITER from whole file to specified range.
|
|
* Asserts the whole file was previously locked.
|
|
*/
|
|
void zfs_range_reduce(rl_t *rl, uint64_t off, uint64_t len);
|
|
|
|
/*
|
|
* AVL comparison function used to compare range locks
|
|
*/
|
|
int zfs_range_compare(const void *arg1, const void *arg2);
|
|
|
|
#endif /* _KERNEL */
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|
|
|
|
#endif /* _SYS_FS_ZFS_RLOCK_H */
|