5497 lock contention on arcs_mtx
Reviewed by: George Wilson <george.wilson@delphix.com> Reviewed by: Matthew Ahrens <mahrens@delphix.com> Reviewed by: Richard Elling <richard.elling@richardelling.com> Approved by: Dan McDonald <danmcd@omniti.com> Author: Prakash Surya <prakash.surya@delphix.com> illumos/illumos-gate@244781f10d
This commit is contained in:
parent
fd97c9198f
commit
1a3dd08e88
@ -505,6 +505,7 @@ extern void delay(clock_t ticks);
|
||||
} while (0);
|
||||
|
||||
#define max_ncpus 64
|
||||
#define boot_ncpus (sysconf(_SC_NPROCESSORS_ONLN))
|
||||
|
||||
#define minclsyspri 60
|
||||
#define maxclsyspri 99
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -315,7 +315,14 @@ dsl_pool_close(dsl_pool_t *dp)
|
||||
txg_list_destroy(&dp->dp_sync_tasks);
|
||||
txg_list_destroy(&dp->dp_dirty_dirs);
|
||||
|
||||
arc_flush(dp->dp_spa);
|
||||
/*
|
||||
* We can't set retry to TRUE since we're explicitly specifying
|
||||
* a spa to flush. This is good enough; any missed buffers for
|
||||
* this spa won't cause trouble, and they'll eventually fall
|
||||
* out of the ARC just like any other unused buffer.
|
||||
*/
|
||||
arc_flush(dp->dp_spa, FALSE);
|
||||
|
||||
txg_fini(dp);
|
||||
dsl_scan_fini(dp);
|
||||
dmu_buf_user_evict_wait();
|
||||
|
366
uts/common/fs/zfs/multilist.c
Normal file
366
uts/common/fs/zfs/multilist.c
Normal file
@ -0,0 +1,366 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* This file and its contents are supplied under the terms of the
|
||||
* Common Development and Distribution License ("CDDL"), version 1.0.
|
||||
* You may only use this file in accordance with the terms of version
|
||||
* 1.0 of the CDDL.
|
||||
*
|
||||
* A full copy of the text of the CDDL should have accompanied this
|
||||
* source. A copy of the CDDL is also available via the Internet at
|
||||
* http://www.illumos.org/license/CDDL.
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2013, 2014 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/multilist.h>
|
||||
|
||||
/* needed for spa_get_random() */
|
||||
#include <sys/spa.h>
|
||||
|
||||
/*
|
||||
* Given the object contained on the list, return a pointer to the
|
||||
* object's multilist_node_t structure it contains.
|
||||
*/
|
||||
static multilist_node_t *
|
||||
multilist_d2l(multilist_t *ml, void *obj)
|
||||
{
|
||||
return ((multilist_node_t *)((char *)obj + ml->ml_offset));
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize a new mutlilist using the parameters specified.
|
||||
*
|
||||
* - 'size' denotes the size of the structure containing the
|
||||
* multilist_node_t.
|
||||
* - 'offset' denotes the byte offset of the mutlilist_node_t within
|
||||
* the structure that contains it.
|
||||
* - 'num' specifies the number of internal sublists to create.
|
||||
* - 'index_func' is used to determine which sublist to insert into
|
||||
* when the multilist_insert() function is called; as well as which
|
||||
* sublist to remove from when multilist_remove() is called. The
|
||||
* requirements this function must meet, are the following:
|
||||
*
|
||||
* - It must always return the same value when called on the same
|
||||
* object (to ensure the object is removed from the list it was
|
||||
* inserted into).
|
||||
*
|
||||
* - It must return a value in the range [0, number of sublists).
|
||||
* The multilist_get_num_sublists() function may be used to
|
||||
* determine the number of sublists in the multilist.
|
||||
*
|
||||
* Also, in order to reduce internal contention between the sublists
|
||||
* during insertion and removal, this function should choose evenly
|
||||
* between all available sublists when inserting. This isn't a hard
|
||||
* requirement, but a general rule of thumb in order to garner the
|
||||
* best multi-threaded performance out of the data structure.
|
||||
*/
|
||||
void
|
||||
multilist_create(multilist_t *ml, size_t size, size_t offset, unsigned int num,
|
||||
multilist_sublist_index_func_t *index_func)
|
||||
{
|
||||
ASSERT3P(ml, !=, NULL);
|
||||
ASSERT3U(size, >, 0);
|
||||
ASSERT3U(size, >=, offset + sizeof (multilist_node_t));
|
||||
ASSERT3U(num, >, 0);
|
||||
ASSERT3P(index_func, !=, NULL);
|
||||
|
||||
ml->ml_offset = offset;
|
||||
ml->ml_num_sublists = num;
|
||||
ml->ml_index_func = index_func;
|
||||
|
||||
ml->ml_sublists = kmem_zalloc(sizeof (multilist_sublist_t) *
|
||||
ml->ml_num_sublists, KM_SLEEP);
|
||||
|
||||
ASSERT3P(ml->ml_sublists, !=, NULL);
|
||||
|
||||
for (int i = 0; i < ml->ml_num_sublists; i++) {
|
||||
multilist_sublist_t *mls = &ml->ml_sublists[i];
|
||||
mutex_init(&mls->mls_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
list_create(&mls->mls_list, size, offset);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Destroy the given multilist object, and free up any memory it holds.
|
||||
*/
|
||||
void
|
||||
multilist_destroy(multilist_t *ml)
|
||||
{
|
||||
ASSERT(multilist_is_empty(ml));
|
||||
|
||||
for (int i = 0; i < ml->ml_num_sublists; i++) {
|
||||
multilist_sublist_t *mls = &ml->ml_sublists[i];
|
||||
|
||||
ASSERT(list_is_empty(&mls->mls_list));
|
||||
|
||||
list_destroy(&mls->mls_list);
|
||||
mutex_destroy(&mls->mls_lock);
|
||||
}
|
||||
|
||||
ASSERT3P(ml->ml_sublists, !=, NULL);
|
||||
kmem_free(ml->ml_sublists,
|
||||
sizeof (multilist_sublist_t) * ml->ml_num_sublists);
|
||||
|
||||
ml->ml_num_sublists = 0;
|
||||
ml->ml_offset = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Insert the given object into the multilist.
|
||||
*
|
||||
* This function will insert the object specified into the sublist
|
||||
* determined using the function given at multilist creation time.
|
||||
*
|
||||
* The sublist locks are automatically acquired if not already held, to
|
||||
* ensure consistency when inserting and removing from multiple threads.
|
||||
*/
|
||||
void
|
||||
multilist_insert(multilist_t *ml, void *obj)
|
||||
{
|
||||
unsigned int sublist_idx = ml->ml_index_func(ml, obj);
|
||||
multilist_sublist_t *mls;
|
||||
boolean_t need_lock;
|
||||
|
||||
DTRACE_PROBE3(multilist__insert, multilist_t *, ml,
|
||||
unsigned int, sublist_idx, void *, obj);
|
||||
|
||||
ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
|
||||
|
||||
mls = &ml->ml_sublists[sublist_idx];
|
||||
|
||||
/*
|
||||
* Note: Callers may already hold the sublist lock by calling
|
||||
* multilist_sublist_lock(). Here we rely on MUTEX_HELD()
|
||||
* returning TRUE if and only if the current thread holds the
|
||||
* lock. While it's a little ugly to make the lock recursive in
|
||||
* this way, it works and allows the calling code to be much
|
||||
* simpler -- otherwise it would have to pass around a flag
|
||||
* indicating that it already has the lock.
|
||||
*/
|
||||
need_lock = !MUTEX_HELD(&mls->mls_lock);
|
||||
|
||||
if (need_lock)
|
||||
mutex_enter(&mls->mls_lock);
|
||||
|
||||
ASSERT(!multilist_link_active(multilist_d2l(ml, obj)));
|
||||
|
||||
multilist_sublist_insert_head(mls, obj);
|
||||
|
||||
if (need_lock)
|
||||
mutex_exit(&mls->mls_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove the given object from the multilist.
|
||||
*
|
||||
* This function will remove the object specified from the sublist
|
||||
* determined using the function given at multilist creation time.
|
||||
*
|
||||
* The necessary sublist locks are automatically acquired, to ensure
|
||||
* consistency when inserting and removing from multiple threads.
|
||||
*/
|
||||
void
|
||||
multilist_remove(multilist_t *ml, void *obj)
|
||||
{
|
||||
unsigned int sublist_idx = ml->ml_index_func(ml, obj);
|
||||
multilist_sublist_t *mls;
|
||||
boolean_t need_lock;
|
||||
|
||||
DTRACE_PROBE3(multilist__remove, multilist_t *, ml,
|
||||
unsigned int, sublist_idx, void *, obj);
|
||||
|
||||
ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
|
||||
|
||||
mls = &ml->ml_sublists[sublist_idx];
|
||||
/* See comment in multilist_insert(). */
|
||||
need_lock = !MUTEX_HELD(&mls->mls_lock);
|
||||
|
||||
if (need_lock)
|
||||
mutex_enter(&mls->mls_lock);
|
||||
|
||||
ASSERT(multilist_link_active(multilist_d2l(ml, obj)));
|
||||
|
||||
multilist_sublist_remove(mls, obj);
|
||||
|
||||
if (need_lock)
|
||||
mutex_exit(&mls->mls_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check to see if this multilist object is empty.
|
||||
*
|
||||
* This will return TRUE if it finds all of the sublists of this
|
||||
* multilist to be empty, and FALSE otherwise. Each sublist lock will be
|
||||
* automatically acquired as necessary.
|
||||
*
|
||||
* If concurrent insertions and removals are occurring, the semantics
|
||||
* of this function become a little fuzzy. Instead of locking all
|
||||
* sublists for the entire call time of the function, each sublist is
|
||||
* only locked as it is individually checked for emptiness. Thus, it's
|
||||
* possible for this function to return TRUE with non-empty sublists at
|
||||
* the time the function returns. This would be due to another thread
|
||||
* inserting into a given sublist, after that specific sublist was check
|
||||
* and deemed empty, but before all sublists have been checked.
|
||||
*/
|
||||
int
|
||||
multilist_is_empty(multilist_t *ml)
|
||||
{
|
||||
for (int i = 0; i < ml->ml_num_sublists; i++) {
|
||||
multilist_sublist_t *mls = &ml->ml_sublists[i];
|
||||
/* See comment in multilist_insert(). */
|
||||
boolean_t need_lock = !MUTEX_HELD(&mls->mls_lock);
|
||||
|
||||
if (need_lock)
|
||||
mutex_enter(&mls->mls_lock);
|
||||
|
||||
if (!list_is_empty(&mls->mls_list)) {
|
||||
if (need_lock)
|
||||
mutex_exit(&mls->mls_lock);
|
||||
|
||||
return (FALSE);
|
||||
}
|
||||
|
||||
if (need_lock)
|
||||
mutex_exit(&mls->mls_lock);
|
||||
}
|
||||
|
||||
return (TRUE);
|
||||
}
|
||||
|
||||
/* Return the number of sublists composing this multilist */
|
||||
unsigned int
|
||||
multilist_get_num_sublists(multilist_t *ml)
|
||||
{
|
||||
return (ml->ml_num_sublists);
|
||||
}
|
||||
|
||||
/* Return a randomly selected, valid sublist index for this multilist */
|
||||
unsigned int
|
||||
multilist_get_random_index(multilist_t *ml)
|
||||
{
|
||||
return (spa_get_random(ml->ml_num_sublists));
|
||||
}
|
||||
|
||||
/* Lock and return the sublist specified at the given index */
|
||||
multilist_sublist_t *
|
||||
multilist_sublist_lock(multilist_t *ml, unsigned int sublist_idx)
|
||||
{
|
||||
multilist_sublist_t *mls;
|
||||
|
||||
ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
|
||||
mls = &ml->ml_sublists[sublist_idx];
|
||||
mutex_enter(&mls->mls_lock);
|
||||
|
||||
return (mls);
|
||||
}
|
||||
|
||||
void
|
||||
multilist_sublist_unlock(multilist_sublist_t *mls)
|
||||
{
|
||||
mutex_exit(&mls->mls_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* We're allowing any object to be inserted into this specific sublist,
|
||||
* but this can lead to trouble if multilist_remove() is called to
|
||||
* remove this object. Specifically, if calling ml_index_func on this
|
||||
* object returns an index for sublist different than what is passed as
|
||||
* a parameter here, any call to multilist_remove() with this newly
|
||||
* inserted object is undefined! (the call to multilist_remove() will
|
||||
* remove the object from a list that it isn't contained in)
|
||||
*/
|
||||
void
|
||||
multilist_sublist_insert_head(multilist_sublist_t *mls, void *obj)
|
||||
{
|
||||
ASSERT(MUTEX_HELD(&mls->mls_lock));
|
||||
list_insert_head(&mls->mls_list, obj);
|
||||
}
|
||||
|
||||
/* please see comment above multilist_sublist_insert_head */
|
||||
void
|
||||
multilist_sublist_insert_tail(multilist_sublist_t *mls, void *obj)
|
||||
{
|
||||
ASSERT(MUTEX_HELD(&mls->mls_lock));
|
||||
list_insert_tail(&mls->mls_list, obj);
|
||||
}
|
||||
|
||||
/*
|
||||
* Move the object one element forward in the list.
|
||||
*
|
||||
* This function will move the given object forward in the list (towards
|
||||
* the head) by one object. So, in essence, it will swap its position in
|
||||
* the list with its "prev" pointer. If the given object is already at the
|
||||
* head of the list, it cannot be moved forward any more than it already
|
||||
* is, so no action is taken.
|
||||
*
|
||||
* NOTE: This function **must not** remove any object from the list other
|
||||
* than the object given as the parameter. This is relied upon in
|
||||
* arc_evict_state_impl().
|
||||
*/
|
||||
void
|
||||
multilist_sublist_move_forward(multilist_sublist_t *mls, void *obj)
|
||||
{
|
||||
void *prev = list_prev(&mls->mls_list, obj);
|
||||
|
||||
ASSERT(MUTEX_HELD(&mls->mls_lock));
|
||||
ASSERT(!list_is_empty(&mls->mls_list));
|
||||
|
||||
/* 'obj' must be at the head of the list, nothing to do */
|
||||
if (prev == NULL)
|
||||
return;
|
||||
|
||||
list_remove(&mls->mls_list, obj);
|
||||
list_insert_before(&mls->mls_list, prev, obj);
|
||||
}
|
||||
|
||||
void
|
||||
multilist_sublist_remove(multilist_sublist_t *mls, void *obj)
|
||||
{
|
||||
ASSERT(MUTEX_HELD(&mls->mls_lock));
|
||||
list_remove(&mls->mls_list, obj);
|
||||
}
|
||||
|
||||
void *
|
||||
multilist_sublist_head(multilist_sublist_t *mls)
|
||||
{
|
||||
ASSERT(MUTEX_HELD(&mls->mls_lock));
|
||||
return (list_head(&mls->mls_list));
|
||||
}
|
||||
|
||||
void *
|
||||
multilist_sublist_tail(multilist_sublist_t *mls)
|
||||
{
|
||||
ASSERT(MUTEX_HELD(&mls->mls_lock));
|
||||
return (list_tail(&mls->mls_list));
|
||||
}
|
||||
|
||||
void *
|
||||
multilist_sublist_next(multilist_sublist_t *mls, void *obj)
|
||||
{
|
||||
ASSERT(MUTEX_HELD(&mls->mls_lock));
|
||||
return (list_next(&mls->mls_list, obj));
|
||||
}
|
||||
|
||||
void *
|
||||
multilist_sublist_prev(multilist_sublist_t *mls, void *obj)
|
||||
{
|
||||
ASSERT(MUTEX_HELD(&mls->mls_lock));
|
||||
return (list_prev(&mls->mls_list, obj));
|
||||
}
|
||||
|
||||
void
|
||||
multilist_link_init(multilist_node_t *link)
|
||||
{
|
||||
list_link_init(link);
|
||||
}
|
||||
|
||||
int
|
||||
multilist_link_active(multilist_node_t *link)
|
||||
{
|
||||
return (list_link_active(link));
|
||||
}
|
@ -37,6 +37,12 @@ extern "C" {
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/spa.h>
|
||||
|
||||
/*
|
||||
* Used by arc_flush() to inform arc_evict_state() that it should evict
|
||||
* all available buffers from the arc state being passed in.
|
||||
*/
|
||||
#define ARC_EVICT_ALL -1ULL
|
||||
|
||||
typedef struct arc_buf_hdr arc_buf_hdr_t;
|
||||
typedef struct arc_buf arc_buf_t;
|
||||
typedef void arc_done_func_t(zio_t *zio, arc_buf_t *buf, void *private);
|
||||
@ -154,7 +160,7 @@ void arc_freed(spa_t *spa, const blkptr_t *bp);
|
||||
void arc_set_callback(arc_buf_t *buf, arc_evict_func_t *func, void *private);
|
||||
boolean_t arc_clear_callback(arc_buf_t *buf);
|
||||
|
||||
void arc_flush(spa_t *spa);
|
||||
void arc_flush(spa_t *spa, boolean_t retry);
|
||||
void arc_tempreserve_clear(uint64_t reserve);
|
||||
int arc_tempreserve_space(uint64_t reserve, uint64_t txg);
|
||||
|
||||
|
106
uts/common/fs/zfs/sys/multilist.h
Normal file
106
uts/common/fs/zfs/sys/multilist.h
Normal file
@ -0,0 +1,106 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* This file and its contents are supplied under the terms of the
|
||||
* Common Development and Distribution License ("CDDL"), version 1.0.
|
||||
* You may only use this file in accordance with the terms of version
|
||||
* 1.0 of the CDDL.
|
||||
*
|
||||
* A full copy of the text of the CDDL should have accompanied this
|
||||
* source. A copy of the CDDL is also available via the Internet at
|
||||
* http://www.illumos.org/license/CDDL.
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2013, 2014 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_MULTILIST_H
|
||||
#define _SYS_MULTILIST_H
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef list_node_t multilist_node_t;
|
||||
typedef struct multilist multilist_t;
|
||||
typedef struct multilist_sublist multilist_sublist_t;
|
||||
typedef unsigned int multilist_sublist_index_func_t(multilist_t *, void *);
|
||||
|
||||
struct multilist_sublist {
|
||||
/*
|
||||
* The mutex used internally to implement thread safe insertions
|
||||
* and removals to this individual sublist. It can also be locked
|
||||
* by a consumer using multilist_sublist_{lock,unlock}, which is
|
||||
* useful if a consumer needs to traverse the list in a thread
|
||||
* safe manner.
|
||||
*/
|
||||
kmutex_t mls_lock;
|
||||
/*
|
||||
* The actual list object containing all objects in this sublist.
|
||||
*/
|
||||
list_t mls_list;
|
||||
/*
|
||||
* Pad to cache line (64 bytes), in an effort to try and prevent
|
||||
* cache line contention.
|
||||
*/
|
||||
uint8_t mls_pad[24];
|
||||
};
|
||||
|
||||
struct multilist {
|
||||
/*
|
||||
* This is used to get to the multilist_node_t structure given
|
||||
* the void *object contained on the list.
|
||||
*/
|
||||
size_t ml_offset;
|
||||
/*
|
||||
* The number of sublists used internally by this multilist.
|
||||
*/
|
||||
uint64_t ml_num_sublists;
|
||||
/*
|
||||
* The array of pointers to the actual sublists.
|
||||
*/
|
||||
multilist_sublist_t *ml_sublists;
|
||||
/*
|
||||
* Pointer to function which determines the sublist to use
|
||||
* when inserting and removing objects from this multilist.
|
||||
* Please see the comment above multilist_create for details.
|
||||
*/
|
||||
multilist_sublist_index_func_t *ml_index_func;
|
||||
};
|
||||
|
||||
void multilist_destroy(multilist_t *);
|
||||
void multilist_create(multilist_t *, size_t, size_t, unsigned int,
|
||||
multilist_sublist_index_func_t *);
|
||||
|
||||
void multilist_insert(multilist_t *, void *);
|
||||
void multilist_remove(multilist_t *, void *);
|
||||
int multilist_is_empty(multilist_t *);
|
||||
|
||||
unsigned int multilist_get_num_sublists(multilist_t *);
|
||||
unsigned int multilist_get_random_index(multilist_t *);
|
||||
|
||||
multilist_sublist_t *multilist_sublist_lock(multilist_t *, unsigned int);
|
||||
void multilist_sublist_unlock(multilist_sublist_t *);
|
||||
|
||||
void multilist_sublist_insert_head(multilist_sublist_t *, void *);
|
||||
void multilist_sublist_insert_tail(multilist_sublist_t *, void *);
|
||||
void multilist_sublist_move_forward(multilist_sublist_t *mls, void *obj);
|
||||
void multilist_sublist_remove(multilist_sublist_t *, void *);
|
||||
|
||||
void *multilist_sublist_head(multilist_sublist_t *);
|
||||
void *multilist_sublist_tail(multilist_sublist_t *);
|
||||
void *multilist_sublist_next(multilist_sublist_t *, void *);
|
||||
void *multilist_sublist_prev(multilist_sublist_t *, void *);
|
||||
|
||||
void multilist_link_init(multilist_node_t *);
|
||||
int multilist_link_active(multilist_node_t *);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_MULTILIST_H */
|
@ -438,7 +438,11 @@ zio_inject_fault(char *name, int flags, int *id, zinject_record_t *record)
|
||||
* fault injection isn't a performance critical path.
|
||||
*/
|
||||
if (flags & ZINJECT_FLUSH_ARC)
|
||||
arc_flush(NULL);
|
||||
/*
|
||||
* We must use FALSE to ensure arc_flush returns, since
|
||||
* we're not preventing concurrent ARC insertions.
|
||||
*/
|
||||
arc_flush(NULL, FALSE);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user