freebsd-nq/module/zfs/multilist.c
Olaf Faaland 448d7aaabc Identify locks flagged by lockdep
When running a kernel with CONFIG_LOCKDEP=y, lockdep reports possible
recursive locking in some cases and possible circular locking dependency
in others, within the SPL and ZFS modules.

This patch uses a mutex type defined in SPL, MUTEX_NOLOCKDEP, to mark
such mutexes when they are initialized.  This mutex type causes
attempts to take or release those locks to be wrapped in lockdep_off()
and lockdep_on() calls to silence the dependency checker and allow the
use of lock_stats to examine contention.

For RW locks, it uses an analogous lock type, RW_NOLOCKDEP.

The goal is that these locks are ultimately changed back to type
MUTEX_DEFAULT or RW_DEFAULT, after the locks are annotated to reflect
their relationship (e.g. z_name_lock below) or any real problem with the
lock dependencies are fixed.

Some of the affected locks are:

tc_open_lock:
=============
This is an array of locks, all with same name, which txg_quiesce must
take all of in order to move txg to next state.  All default to the same
lockdep class, and so to lockdep appears recursive.

zp->z_name_lock:
================
In zfs_rmdir,
        dzp = znode for the directory (input to zfs_dirent_lock)
        zp  = znode for the entry being removed (output of zfs_dirent_lock)

zfs_rmdir()->zfs_dirent_lock() takes z_name_lock in dzp
zfs_rmdir() takes z_name_lock in zp

Since both dzp and zp are type znode_t, the locks have the same default
class, and lockdep considers it a possible recursive lock attempt.

l->l_rwlock:
============
zap_expand_leaf() sometimes creates two new zap leaf structures, via
these call paths:

zap_deref_leaf()->zap_get_leaf_byblk()->zap_leaf_open()
zap_expand_leaf()->zap_create_leaf()->zap_expand_leaf()->zap_create_leaf()

Because both zap_leaf_open() and zap_create_leaf() initialize
l->l_rwlock in their (separate) leaf structures, the lockdep class is
the same, and the linux kernel believes these might both be the same
lock, and emits a possible recursive lock warning.

Signed-off-by: Olaf Faaland <faaland1@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #3895
2015-12-22 10:21:33 -08:00

376 lines
10 KiB
C

/*
* CDDL HEADER START
*
* This file and its contents are supplied under the terms of the
* Common Development and Distribution License ("CDDL"), version 1.0.
* You may only use this file in accordance with the terms of version
* 1.0 of the CDDL.
*
* A full copy of the text of the CDDL should have accompanied this
* source. A copy of the CDDL is also available via the Internet at
* http://www.illumos.org/license/CDDL.
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2013, 2014 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
#include <sys/multilist.h>
#include <sys/trace_multilist.h>
/* needed for spa_get_random() */
#include <sys/spa.h>
/*
* Given the object contained on the list, return a pointer to the
* object's multilist_node_t structure it contains.
*/
#ifdef DEBUG
static multilist_node_t *
multilist_d2l(multilist_t *ml, void *obj)
{
return ((multilist_node_t *)((char *)obj + ml->ml_offset));
}
#endif
/*
* Initialize a new mutlilist using the parameters specified.
*
* - 'size' denotes the size of the structure containing the
* multilist_node_t.
* - 'offset' denotes the byte offset of the mutlilist_node_t within
* the structure that contains it.
* - 'num' specifies the number of internal sublists to create.
* - 'index_func' is used to determine which sublist to insert into
* when the multilist_insert() function is called; as well as which
* sublist to remove from when multilist_remove() is called. The
* requirements this function must meet, are the following:
*
* - It must always return the same value when called on the same
* object (to ensure the object is removed from the list it was
* inserted into).
*
* - It must return a value in the range [0, number of sublists).
* The multilist_get_num_sublists() function may be used to
* determine the number of sublists in the multilist.
*
* Also, in order to reduce internal contention between the sublists
* during insertion and removal, this function should choose evenly
* between all available sublists when inserting. This isn't a hard
* requirement, but a general rule of thumb in order to garner the
* best multi-threaded performance out of the data structure.
*/
void
multilist_create(multilist_t *ml, size_t size, size_t offset, unsigned int num,
multilist_sublist_index_func_t *index_func)
{
int i;
ASSERT3P(ml, !=, NULL);
ASSERT3U(size, >, 0);
ASSERT3U(size, >=, offset + sizeof (multilist_node_t));
ASSERT3U(num, >, 0);
ASSERT3P(index_func, !=, NULL);
ml->ml_offset = offset;
ml->ml_num_sublists = num;
ml->ml_index_func = index_func;
ml->ml_sublists = kmem_zalloc(sizeof (multilist_sublist_t) *
ml->ml_num_sublists, KM_SLEEP);
ASSERT3P(ml->ml_sublists, !=, NULL);
for (i = 0; i < ml->ml_num_sublists; i++) {
multilist_sublist_t *mls = &ml->ml_sublists[i];
mutex_init(&mls->mls_lock, NULL, MUTEX_NOLOCKDEP, NULL);
list_create(&mls->mls_list, size, offset);
}
}
/*
* Destroy the given multilist object, and free up any memory it holds.
*/
void
multilist_destroy(multilist_t *ml)
{
int i;
ASSERT(multilist_is_empty(ml));
for (i = 0; i < ml->ml_num_sublists; i++) {
multilist_sublist_t *mls = &ml->ml_sublists[i];
ASSERT(list_is_empty(&mls->mls_list));
list_destroy(&mls->mls_list);
mutex_destroy(&mls->mls_lock);
}
ASSERT3P(ml->ml_sublists, !=, NULL);
kmem_free(ml->ml_sublists,
sizeof (multilist_sublist_t) * ml->ml_num_sublists);
ml->ml_num_sublists = 0;
ml->ml_offset = 0;
}
/*
* Insert the given object into the multilist.
*
* This function will insert the object specified into the sublist
* determined using the function given at multilist creation time.
*
* The sublist locks are automatically acquired if not already held, to
* ensure consistency when inserting and removing from multiple threads.
*/
void
multilist_insert(multilist_t *ml, void *obj)
{
unsigned int sublist_idx = ml->ml_index_func(ml, obj);
multilist_sublist_t *mls;
boolean_t need_lock;
DTRACE_PROBE3(multilist__insert, multilist_t *, ml,
unsigned int, sublist_idx, void *, obj);
ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
mls = &ml->ml_sublists[sublist_idx];
/*
* Note: Callers may already hold the sublist lock by calling
* multilist_sublist_lock(). Here we rely on MUTEX_HELD()
* returning TRUE if and only if the current thread holds the
* lock. While it's a little ugly to make the lock recursive in
* this way, it works and allows the calling code to be much
* simpler -- otherwise it would have to pass around a flag
* indicating that it already has the lock.
*/
need_lock = !MUTEX_HELD(&mls->mls_lock);
if (need_lock)
mutex_enter(&mls->mls_lock);
ASSERT(!multilist_link_active(multilist_d2l(ml, obj)));
multilist_sublist_insert_head(mls, obj);
if (need_lock)
mutex_exit(&mls->mls_lock);
}
/*
* Remove the given object from the multilist.
*
* This function will remove the object specified from the sublist
* determined using the function given at multilist creation time.
*
* The necessary sublist locks are automatically acquired, to ensure
* consistency when inserting and removing from multiple threads.
*/
void
multilist_remove(multilist_t *ml, void *obj)
{
unsigned int sublist_idx = ml->ml_index_func(ml, obj);
multilist_sublist_t *mls;
boolean_t need_lock;
DTRACE_PROBE3(multilist__remove, multilist_t *, ml,
unsigned int, sublist_idx, void *, obj);
ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
mls = &ml->ml_sublists[sublist_idx];
/* See comment in multilist_insert(). */
need_lock = !MUTEX_HELD(&mls->mls_lock);
if (need_lock)
mutex_enter(&mls->mls_lock);
ASSERT(multilist_link_active(multilist_d2l(ml, obj)));
multilist_sublist_remove(mls, obj);
if (need_lock)
mutex_exit(&mls->mls_lock);
}
/*
* Check to see if this multilist object is empty.
*
* This will return TRUE if it finds all of the sublists of this
* multilist to be empty, and FALSE otherwise. Each sublist lock will be
* automatically acquired as necessary.
*
* If concurrent insertions and removals are occurring, the semantics
* of this function become a little fuzzy. Instead of locking all
* sublists for the entire call time of the function, each sublist is
* only locked as it is individually checked for emptiness. Thus, it's
* possible for this function to return TRUE with non-empty sublists at
* the time the function returns. This would be due to another thread
* inserting into a given sublist, after that specific sublist was check
* and deemed empty, but before all sublists have been checked.
*/
int
multilist_is_empty(multilist_t *ml)
{
int i;
for (i = 0; i < ml->ml_num_sublists; i++) {
multilist_sublist_t *mls = &ml->ml_sublists[i];
/* See comment in multilist_insert(). */
boolean_t need_lock = !MUTEX_HELD(&mls->mls_lock);
if (need_lock)
mutex_enter(&mls->mls_lock);
if (!list_is_empty(&mls->mls_list)) {
if (need_lock)
mutex_exit(&mls->mls_lock);
return (FALSE);
}
if (need_lock)
mutex_exit(&mls->mls_lock);
}
return (TRUE);
}
/* Return the number of sublists composing this multilist */
unsigned int
multilist_get_num_sublists(multilist_t *ml)
{
return (ml->ml_num_sublists);
}
/* Return a randomly selected, valid sublist index for this multilist */
unsigned int
multilist_get_random_index(multilist_t *ml)
{
return (spa_get_random(ml->ml_num_sublists));
}
/* Lock and return the sublist specified at the given index */
multilist_sublist_t *
multilist_sublist_lock(multilist_t *ml, unsigned int sublist_idx)
{
multilist_sublist_t *mls;
ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
mls = &ml->ml_sublists[sublist_idx];
mutex_enter(&mls->mls_lock);
return (mls);
}
void
multilist_sublist_unlock(multilist_sublist_t *mls)
{
mutex_exit(&mls->mls_lock);
}
/*
* We're allowing any object to be inserted into this specific sublist,
* but this can lead to trouble if multilist_remove() is called to
* remove this object. Specifically, if calling ml_index_func on this
* object returns an index for sublist different than what is passed as
* a parameter here, any call to multilist_remove() with this newly
* inserted object is undefined! (the call to multilist_remove() will
* remove the object from a list that it isn't contained in)
*/
void
multilist_sublist_insert_head(multilist_sublist_t *mls, void *obj)
{
ASSERT(MUTEX_HELD(&mls->mls_lock));
list_insert_head(&mls->mls_list, obj);
}
/* please see comment above multilist_sublist_insert_head */
void
multilist_sublist_insert_tail(multilist_sublist_t *mls, void *obj)
{
ASSERT(MUTEX_HELD(&mls->mls_lock));
list_insert_tail(&mls->mls_list, obj);
}
/*
* Move the object one element forward in the list.
*
* This function will move the given object forward in the list (towards
* the head) by one object. So, in essence, it will swap its position in
* the list with its "prev" pointer. If the given object is already at the
* head of the list, it cannot be moved forward any more than it already
* is, so no action is taken.
*
* NOTE: This function **must not** remove any object from the list other
* than the object given as the parameter. This is relied upon in
* arc_evict_state_impl().
*/
void
multilist_sublist_move_forward(multilist_sublist_t *mls, void *obj)
{
void *prev = list_prev(&mls->mls_list, obj);
ASSERT(MUTEX_HELD(&mls->mls_lock));
ASSERT(!list_is_empty(&mls->mls_list));
/* 'obj' must be at the head of the list, nothing to do */
if (prev == NULL)
return;
list_remove(&mls->mls_list, obj);
list_insert_before(&mls->mls_list, prev, obj);
}
void
multilist_sublist_remove(multilist_sublist_t *mls, void *obj)
{
ASSERT(MUTEX_HELD(&mls->mls_lock));
list_remove(&mls->mls_list, obj);
}
void *
multilist_sublist_head(multilist_sublist_t *mls)
{
ASSERT(MUTEX_HELD(&mls->mls_lock));
return (list_head(&mls->mls_list));
}
void *
multilist_sublist_tail(multilist_sublist_t *mls)
{
ASSERT(MUTEX_HELD(&mls->mls_lock));
return (list_tail(&mls->mls_list));
}
void *
multilist_sublist_next(multilist_sublist_t *mls, void *obj)
{
ASSERT(MUTEX_HELD(&mls->mls_lock));
return (list_next(&mls->mls_list, obj));
}
void *
multilist_sublist_prev(multilist_sublist_t *mls, void *obj)
{
ASSERT(MUTEX_HELD(&mls->mls_lock));
return (list_prev(&mls->mls_list, obj));
}
void
multilist_link_init(multilist_node_t *link)
{
list_link_init(link);
}
int
multilist_link_active(multilist_node_t *link)
{
return (list_link_active(link));
}