Fixes for procfs files backed by linked lists

There are some issues with the way the seq_file interface is implemented
for kstats backed by linked lists (zfs_dbgmsgs and certain per-pool
debugging info):

* We don't account for the fact that seq_file sometimes visits a node
  multiple times, which results in missing messages when read through
  procfs.
* We don't keep separate state for each reader of a file, so concurrent
  readers will receive incorrect results.
* We don't account for the fact that entries may have been removed from
  the list between read syscalls, so reading from these files in procfs
  can cause the system to crash.

This change fixes these issues and adds procfs_list, a wrapper around a
linked list which abstracts away the details of implementing the
seq_file interface for a list and exposing the contents of the list
through procfs.

Reviewed by: Don Brady <don.brady@delphix.com>
Reviewed-by: Serapheim Dimitropoulos <serapheim@delphix.com>
Reviewed by: Brad Lewis <brad.lewis@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: John Gallagher <john.gallagher@delphix.com>
External-issue: LX-1211
Closes #7819
This commit is contained in:
John Gallagher 2018-09-26 11:08:12 -07:00 committed by Brian Behlendorf
parent 3ed2fbcc1c
commit d12614521a
24 changed files with 1094 additions and 566 deletions

View File

@ -283,7 +283,6 @@ AC_CONFIG_FILES([
tests/zfs-tests/tests/functional/inheritance/Makefile
tests/zfs-tests/tests/functional/inuse/Makefile
tests/zfs-tests/tests/functional/io/Makefile
tests/zfs-tests/tests/functional/kstat/Makefile
tests/zfs-tests/tests/functional/large_files/Makefile
tests/zfs-tests/tests/functional/largest_pool/Makefile
tests/zfs-tests/tests/functional/link_count/Makefile
@ -301,6 +300,7 @@ AC_CONFIG_FILES([
tests/zfs-tests/tests/functional/pool_checkpoint/Makefile
tests/zfs-tests/tests/functional/poolversion/Makefile
tests/zfs-tests/tests/functional/privilege/Makefile
tests/zfs-tests/tests/functional/procfs/Makefile
tests/zfs-tests/tests/functional/projectquota/Makefile
tests/zfs-tests/tests/functional/pyzfs/Makefile
tests/zfs-tests/tests/functional/quota/Makefile

View File

@ -28,6 +28,7 @@ KERNEL_H = \
$(top_srcdir)/include/spl/sys/param.h \
$(top_srcdir)/include/spl/sys/processor.h \
$(top_srcdir)/include/spl/sys/proc.h \
$(top_srcdir)/include/spl/sys/procfs_list.h \
$(top_srcdir)/include/spl/sys/random.h \
$(top_srcdir)/include/spl/sys/rwlock.h \
$(top_srcdir)/include/spl/sys/shrinker.h \

View File

@ -98,30 +98,34 @@ typedef struct kstat_raw_ops {
void *(*addr)(kstat_t *ksp, loff_t index);
} kstat_raw_ops_t;
typedef struct kstat_proc_entry {
char kpe_name[KSTAT_STRLEN+1]; /* kstat name */
char kpe_module[KSTAT_STRLEN+1]; /* provider module name */
kstat_module_t *kpe_owner; /* kstat module linkage */
struct list_head kpe_list; /* kstat linkage */
struct proc_dir_entry *kpe_proc; /* procfs entry */
} kstat_proc_entry_t;
struct kstat_s {
int ks_magic; /* magic value */
kid_t ks_kid; /* unique kstat ID */
hrtime_t ks_crtime; /* creation time */
hrtime_t ks_snaptime; /* last access time */
char ks_module[KSTAT_STRLEN+1]; /* provider module name */
int ks_instance; /* provider module instance */
char ks_name[KSTAT_STRLEN+1]; /* kstat name */
char ks_class[KSTAT_STRLEN+1]; /* kstat class */
uchar_t ks_type; /* kstat data type */
uchar_t ks_flags; /* kstat flags */
void *ks_data; /* kstat type-specific data */
uint_t ks_ndata; /* # of data records */
size_t ks_data_size; /* size of kstat data section */
struct proc_dir_entry *ks_proc; /* proc linkage */
kstat_update_t *ks_update; /* dynamic updates */
void *ks_private; /* private data */
kmutex_t ks_private_lock; /* kstat private data lock */
kmutex_t *ks_lock; /* kstat data lock */
struct list_head ks_list; /* kstat linkage */
kstat_module_t *ks_owner; /* kstat module linkage */
kstat_raw_ops_t ks_raw_ops; /* ops table for raw type */
char *ks_raw_buf; /* buf used for raw ops */
size_t ks_raw_bufsize; /* size of raw ops buffer */
kstat_proc_entry_t ks_proc; /* data for procfs entry */
};
typedef struct kstat_named_s {
@ -189,6 +193,12 @@ extern kstat_t *__kstat_create(const char *ks_module, int ks_instance,
const char *ks_name, const char *ks_class, uchar_t ks_type,
uint_t ks_ndata, uchar_t ks_flags);
extern void kstat_proc_entry_init(kstat_proc_entry_t *kpep,
const char *module, const char *name);
extern void kstat_proc_entry_delete(kstat_proc_entry_t *kpep);
extern void kstat_proc_entry_install(kstat_proc_entry_t *kpep,
const struct file_operations *file_ops, void *data);
extern void __kstat_install(kstat_t *ksp);
extern void __kstat_delete(kstat_t *ksp);
extern void kstat_waitq_enter(kstat_io_t *);

View File

@ -0,0 +1,71 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2018 by Delphix. All rights reserved.
*/
#ifndef _SPL_PROCFS_LIST_H
#define _SPL_PROCFS_LIST_H
#include <sys/kstat.h>
#include <sys/mutex.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
typedef struct procfs_list procfs_list_t;
struct procfs_list {
/* Accessed only by user of a procfs_list */
void *pl_private;
/*
* Accessed both by user of a procfs_list and by procfs_list
* implementation
*/
kmutex_t pl_lock;
list_t pl_list;
/* Accessed only by procfs_list implementation */
uint64_t pl_next_id;
int (*pl_show)(struct seq_file *f, void *p);
int (*pl_show_header)(struct seq_file *f);
int (*pl_clear)(procfs_list_t *procfs_list);
size_t pl_node_offset;
kstat_proc_entry_t pl_kstat_entry;
};
typedef struct procfs_list_node {
list_node_t pln_link;
uint64_t pln_id;
} procfs_list_node_t;
void procfs_list_install(const char *module,
const char *name,
procfs_list_t *procfs_list,
int (*show)(struct seq_file *f, void *p),
int (*show_header)(struct seq_file *f),
int (*clear)(procfs_list_t *procfs_list),
size_t procfs_list_node_off);
void procfs_list_uninstall(procfs_list_t *procfs_list);
void procfs_list_destroy(procfs_list_t *procfs_list);
void procfs_list_add(procfs_list_t *procfs_list, void *p);
#endif /* _SPL_PROCFS_LIST_H */

View File

@ -863,22 +863,27 @@ extern boolean_t spa_refcount_zero(spa_t *spa);
#define SCL_STATE_ALL (SCL_STATE | SCL_L2ARC | SCL_ZIO)
/* Historical pool statistics */
typedef struct spa_stats_history {
typedef struct spa_history_kstat {
kmutex_t lock;
uint64_t count;
uint64_t size;
kstat_t *kstat;
void *private;
list_t list;
} spa_stats_history_t;
} spa_history_kstat_t;
typedef struct spa_history_list {
uint64_t size;
procfs_list_t procfs_list;
} spa_history_list_t;
typedef struct spa_stats {
spa_stats_history_t read_history;
spa_stats_history_t txg_history;
spa_stats_history_t tx_assign_histogram;
spa_stats_history_t io_history;
spa_stats_history_t mmp_history;
spa_stats_history_t state; /* pool state */
spa_history_list_t read_history;
spa_history_list_t txg_history;
spa_history_kstat_t tx_assign_histogram;
spa_history_kstat_t io_history;
spa_history_list_t mmp_history;
spa_history_kstat_t state; /* pool state */
} spa_stats_t;
typedef enum txg_state {
@ -911,7 +916,7 @@ extern void spa_tx_assign_add_nsecs(spa_t *spa, uint64_t nsecs);
extern int spa_mmp_history_set_skip(spa_t *spa, uint64_t mmp_kstat_id);
extern int spa_mmp_history_set(spa_t *spa, uint64_t mmp_kstat_id, int io_error,
hrtime_t duration);
extern void *spa_mmp_history_add(spa_t *spa, uint64_t txg, uint64_t timestamp,
extern void spa_mmp_history_add(spa_t *spa, uint64_t txg, uint64_t timestamp,
uint64_t mmp_delay, vdev_t *vd, int label, uint64_t mmp_kstat_id,
int error);

View File

@ -62,6 +62,7 @@
#include <sys/ctype.h>
#include <sys/disp.h>
#include <sys/trace.h>
#include <sys/procfs_list.h>
#include <linux/dcache_compat.h>
#include <linux/utsname_compat.h>
@ -351,6 +352,37 @@ extern void kstat_set_raw_ops(kstat_t *ksp,
int (*data)(char *buf, size_t size, void *data),
void *(*addr)(kstat_t *ksp, loff_t index));
/*
* procfs list manipulation
*/
struct seq_file { };
void seq_printf(struct seq_file *m, const char *fmt, ...);
typedef struct procfs_list {
void *pl_private;
kmutex_t pl_lock;
list_t pl_list;
uint64_t pl_next_id;
size_t pl_node_offset;
} procfs_list_t;
typedef struct procfs_list_node {
list_node_t pln_link;
uint64_t pln_id;
} procfs_list_node_t;
void procfs_list_install(const char *module,
const char *name,
procfs_list_t *procfs_list,
int (*show)(struct seq_file *f, void *p),
int (*show_header)(struct seq_file *f),
int (*clear)(procfs_list_t *procfs_list),
size_t procfs_list_node_off);
void procfs_list_uninstall(procfs_list_t *procfs_list);
void procfs_list_destroy(procfs_list_t *procfs_list);
void procfs_list_add(procfs_list_t *procfs_list, void *p);
/*
* Kernel memory
*/

View File

@ -76,13 +76,6 @@ extern void __dprintf(const char *file, const char *func,
extern void zfs_panic_recover(const char *fmt, ...);
typedef struct zfs_dbgmsg {
list_node_t zdm_node;
time_t zdm_timestamp;
int zdm_size;
char zdm_msg[1]; /* variable length allocation */
} zfs_dbgmsg_t;
extern void zfs_dbgmsg_init(void);
extern void zfs_dbgmsg_fini(void);

View File

@ -424,6 +424,57 @@ cv_broadcast(kcondvar_t *cv)
VERIFY0(pthread_cond_broadcast(cv));
}
/*
* =========================================================================
* procfs list
* =========================================================================
*/
void
seq_printf(struct seq_file *m, const char *fmt, ...)
{}
void
procfs_list_install(const char *module,
const char *name,
procfs_list_t *procfs_list,
int (*show)(struct seq_file *f, void *p),
int (*show_header)(struct seq_file *f),
int (*clear)(procfs_list_t *procfs_list),
size_t procfs_list_node_off)
{
mutex_init(&procfs_list->pl_lock, NULL, MUTEX_DEFAULT, NULL);
list_create(&procfs_list->pl_list,
procfs_list_node_off + sizeof (procfs_list_node_t),
procfs_list_node_off + offsetof(procfs_list_node_t, pln_link));
procfs_list->pl_next_id = 1;
procfs_list->pl_node_offset = procfs_list_node_off;
}
void
procfs_list_uninstall(procfs_list_t *procfs_list)
{}
void
procfs_list_destroy(procfs_list_t *procfs_list)
{
ASSERT(list_is_empty(&procfs_list->pl_list));
list_destroy(&procfs_list->pl_list);
mutex_destroy(&procfs_list->pl_lock);
}
#define NODE_ID(procfs_list, obj) \
(((procfs_list_node_t *)(((char *)obj) + \
(procfs_list)->pl_node_offset))->pln_id)
void
procfs_list_add(procfs_list_t *procfs_list, void *p)
{
ASSERT(MUTEX_HELD(&procfs_list->pl_lock));
NODE_ID(procfs_list, p) = procfs_list->pl_next_id++;
list_insert_tail(&procfs_list->pl_list, p);
}
/*
* =========================================================================
* vnode operations

View File

@ -18,6 +18,7 @@ $(MODULE)-objs += spl-kobj.o
$(MODULE)-objs += spl-kstat.o
$(MODULE)-objs += spl-mutex.o
$(MODULE)-objs += spl-proc.o
$(MODULE)-objs += spl-procfs-list.o
$(MODULE)-objs += spl-rwlock.o
$(MODULE)-objs += spl-taskq.o
$(MODULE)-objs += spl-thread.o

View File

@ -530,6 +530,18 @@ __kstat_set_raw_ops(kstat_t *ksp,
}
EXPORT_SYMBOL(__kstat_set_raw_ops);
void
kstat_proc_entry_init(kstat_proc_entry_t *kpep, const char *module,
const char *name)
{
kpep->kpe_owner = NULL;
kpep->kpe_proc = NULL;
INIT_LIST_HEAD(&kpep->kpe_list);
strncpy(kpep->kpe_module, module, KSTAT_STRLEN);
strncpy(kpep->kpe_name, name, KSTAT_STRLEN);
}
EXPORT_SYMBOL(kstat_proc_entry_init);
kstat_t *
__kstat_create(const char *ks_module, int ks_instance, const char *ks_name,
const char *ks_class, uchar_t ks_type, uint_t ks_ndata,
@ -556,13 +568,10 @@ __kstat_create(const char *ks_module, int ks_instance, const char *ks_name,
ksp->ks_magic = KS_MAGIC;
mutex_init(&ksp->ks_private_lock, NULL, MUTEX_DEFAULT, NULL);
ksp->ks_lock = &ksp->ks_private_lock;
INIT_LIST_HEAD(&ksp->ks_list);
ksp->ks_crtime = gethrtime();
ksp->ks_snaptime = ksp->ks_crtime;
strncpy(ksp->ks_module, ks_module, KSTAT_STRLEN);
ksp->ks_instance = ks_instance;
strncpy(ksp->ks_name, ks_name, KSTAT_STRLEN);
strncpy(ksp->ks_class, ks_class, KSTAT_STRLEN);
ksp->ks_type = ks_type;
ksp->ks_flags = ks_flags;
@ -573,6 +582,7 @@ __kstat_create(const char *ks_module, int ks_instance, const char *ks_name,
ksp->ks_raw_ops.addr = NULL;
ksp->ks_raw_buf = NULL;
ksp->ks_raw_bufsize = 0;
kstat_proc_entry_init(&ksp->ks_proc, ks_module, ks_name);
switch (ksp->ks_type) {
case KSTAT_TYPE_RAW:
@ -614,14 +624,14 @@ __kstat_create(const char *ks_module, int ks_instance, const char *ks_name,
EXPORT_SYMBOL(__kstat_create);
static int
kstat_detect_collision(kstat_t *ksp)
kstat_detect_collision(kstat_proc_entry_t *kpep)
{
kstat_module_t *module;
kstat_t *tmp;
kstat_proc_entry_t *tmp;
char *parent;
char *cp;
parent = kmem_asprintf("%s", ksp->ks_module);
parent = kmem_asprintf("%s", kpep->kpe_module);
if ((cp = strrchr(parent, '/')) == NULL) {
strfree(parent);
@ -630,8 +640,8 @@ kstat_detect_collision(kstat_t *ksp)
cp[0] = '\0';
if ((module = kstat_find_module(parent)) != NULL) {
list_for_each_entry(tmp, &module->ksm_kstat_list, ks_list) {
if (strncmp(tmp->ks_name, cp+1, KSTAT_STRLEN) == 0) {
list_for_each_entry(tmp, &module->ksm_kstat_list, kpe_list) {
if (strncmp(tmp->kpe_name, cp+1, KSTAT_STRLEN) == 0) {
strfree(parent);
return (EEXIST);
}
@ -642,24 +652,30 @@ kstat_detect_collision(kstat_t *ksp)
return (0);
}
/*
* Add a file to the proc filesystem under the kstat namespace (i.e.
* /proc/spl/kstat/). The file need not necessarily be implemented as a
* kstat.
*/
void
__kstat_install(kstat_t *ksp)
kstat_proc_entry_install(kstat_proc_entry_t *kpep,
const struct file_operations *file_ops, void *data)
{
kstat_module_t *module;
kstat_t *tmp;
kstat_proc_entry_t *tmp;
ASSERT(ksp);
ASSERT(kpep);
mutex_enter(&kstat_module_lock);
module = kstat_find_module(ksp->ks_module);
module = kstat_find_module(kpep->kpe_module);
if (module == NULL) {
if (kstat_detect_collision(ksp) != 0) {
if (kstat_detect_collision(kpep) != 0) {
cmn_err(CE_WARN, "kstat_create('%s', '%s'): namespace" \
" collision", ksp->ks_module, ksp->ks_name);
" collision", kpep->kpe_module, kpep->kpe_name);
goto out;
}
module = kstat_create_module(ksp->ks_module);
module = kstat_create_module(kpep->kpe_module);
if (module == NULL)
goto out;
}
@ -668,44 +684,60 @@ __kstat_install(kstat_t *ksp)
* Only one entry by this name per-module, on failure the module
* shouldn't be deleted because we know it has at least one entry.
*/
list_for_each_entry(tmp, &module->ksm_kstat_list, ks_list) {
if (strncmp(tmp->ks_name, ksp->ks_name, KSTAT_STRLEN) == 0)
list_for_each_entry(tmp, &module->ksm_kstat_list, kpe_list) {
if (strncmp(tmp->kpe_name, kpep->kpe_name, KSTAT_STRLEN) == 0)
goto out;
}
list_add_tail(&ksp->ks_list, &module->ksm_kstat_list);
list_add_tail(&kpep->kpe_list, &module->ksm_kstat_list);
mutex_enter(ksp->ks_lock);
ksp->ks_owner = module;
ksp->ks_proc = proc_create_data(ksp->ks_name, 0644,
module->ksm_proc, &proc_kstat_operations, (void *)ksp);
if (ksp->ks_proc == NULL) {
list_del_init(&ksp->ks_list);
kpep->kpe_owner = module;
kpep->kpe_proc = proc_create_data(kpep->kpe_name, 0644,
module->ksm_proc, file_ops, data);
if (kpep->kpe_proc == NULL) {
list_del_init(&kpep->kpe_list);
if (list_empty(&module->ksm_kstat_list))
kstat_delete_module(module);
}
mutex_exit(ksp->ks_lock);
out:
mutex_exit(&kstat_module_lock);
}
EXPORT_SYMBOL(kstat_proc_entry_install);
void
__kstat_install(kstat_t *ksp)
{
ASSERT(ksp);
kstat_proc_entry_install(&ksp->ks_proc, &proc_kstat_operations, ksp);
}
EXPORT_SYMBOL(__kstat_install);
void
__kstat_delete(kstat_t *ksp)
kstat_proc_entry_delete(kstat_proc_entry_t *kpep)
{
kstat_module_t *module = ksp->ks_owner;
kstat_module_t *module = kpep->kpe_owner;
if (kpep->kpe_proc)
remove_proc_entry(kpep->kpe_name, module->ksm_proc);
mutex_enter(&kstat_module_lock);
list_del_init(&ksp->ks_list);
list_del_init(&kpep->kpe_list);
/*
* Remove top level module directory if it wasn't empty before, but now
* is.
*/
if (kpep->kpe_proc && list_empty(&module->ksm_kstat_list))
kstat_delete_module(module);
mutex_exit(&kstat_module_lock);
if (ksp->ks_proc) {
remove_proc_entry(ksp->ks_name, module->ksm_proc);
}
EXPORT_SYMBOL(kstat_proc_entry_delete);
/* Remove top level module directory if it's empty */
if (list_empty(&module->ksm_kstat_list))
kstat_delete_module(module);
}
void
__kstat_delete(kstat_t *ksp)
{
kstat_proc_entry_delete(&ksp->ks_proc);
if (!(ksp->ks_flags & KSTAT_FLAG_VIRTUAL))
kmem_free(ksp->ks_data, ksp->ks_data_size);

View File

@ -0,0 +1,256 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2018 by Delphix. All rights reserved.
*/
#include <sys/list.h>
#include <sys/mutex.h>
#include <sys/procfs_list.h>
#include <linux/proc_fs.h>
/*
* A procfs_list is a wrapper around a linked list which implements the seq_file
* interface, allowing the contents of the list to be exposed through procfs.
* The kernel already has some utilities to help implement the seq_file
* interface for linked lists (seq_list_*), but they aren't appropriate for use
* with lists that have many entries, because seq_list_start walks the list at
* the start of each read syscall to find where it left off, so reading a file
* ends up being quadratic in the number of entries in the list.
*
* This implementation avoids this penalty by maintaining a separate cursor into
* the list per instance of the file that is open. It also maintains some extra
* information in each node of the list to prevent reads of entries that have
* been dropped from the list.
*
* Callers should only add elements to the list using procfs_list_add, which
* adds an element to the tail of the list. Other operations can be performed
* directly on the wrapped list using the normal list manipulation functions,
* but elements should only be removed from the head of the list.
*/
#define NODE_ID(procfs_list, obj) \
(((procfs_list_node_t *)(((char *)obj) + \
(procfs_list)->pl_node_offset))->pln_id)
typedef struct procfs_list_cursor {
procfs_list_t *procfs_list; /* List into which this cursor points */
void *cached_node; /* Most recently accessed node */
loff_t cached_pos; /* Position of cached_node */
} procfs_list_cursor_t;
static int
procfs_list_seq_show(struct seq_file *f, void *p)
{
procfs_list_cursor_t *cursor = f->private;
procfs_list_t *procfs_list = cursor->procfs_list;
ASSERT(MUTEX_HELD(&procfs_list->pl_lock));
if (p == SEQ_START_TOKEN) {
if (procfs_list->pl_show_header != NULL)
return (procfs_list->pl_show_header(f));
else
return (0);
}
return (procfs_list->pl_show(f, p));
}
static void *
procfs_list_next_node(procfs_list_cursor_t *cursor, loff_t *pos)
{
void *next_node;
procfs_list_t *procfs_list = cursor->procfs_list;
if (cursor->cached_node == SEQ_START_TOKEN)
next_node = list_head(&procfs_list->pl_list);
else
next_node = list_next(&procfs_list->pl_list,
cursor->cached_node);
if (next_node != NULL) {
cursor->cached_node = next_node;
cursor->cached_pos = NODE_ID(procfs_list, cursor->cached_node);
*pos = cursor->cached_pos;
}
return (next_node);
}
static void *
procfs_list_seq_start(struct seq_file *f, loff_t *pos)
{
procfs_list_cursor_t *cursor = f->private;
procfs_list_t *procfs_list = cursor->procfs_list;
mutex_enter(&procfs_list->pl_lock);
if (*pos == 0) {
cursor->cached_node = SEQ_START_TOKEN;
cursor->cached_pos = 0;
return (SEQ_START_TOKEN);
}
/*
* Check if our cached pointer has become stale, which happens if the
* the message where we left off has been dropped from the list since
* the last read syscall completed.
*/
void *oldest_node = list_head(&procfs_list->pl_list);
if (cursor->cached_node != SEQ_START_TOKEN && (oldest_node == NULL ||
NODE_ID(procfs_list, oldest_node) > cursor->cached_pos))
return (ERR_PTR(-EIO));
/*
* If it isn't starting from the beginning of the file, the seq_file
* code will either pick up at the same position it visited last or the
* following one.
*/
if (*pos == cursor->cached_pos) {
return (cursor->cached_node);
} else {
ASSERT3U(*pos, ==, cursor->cached_pos + 1);
return (procfs_list_next_node(cursor, pos));
}
}
static void *
procfs_list_seq_next(struct seq_file *f, void *p, loff_t *pos)
{
procfs_list_cursor_t *cursor = f->private;
ASSERT(MUTEX_HELD(&cursor->procfs_list->pl_lock));
return (procfs_list_next_node(cursor, pos));
}
static void
procfs_list_seq_stop(struct seq_file *f, void *p)
{
procfs_list_cursor_t *cursor = f->private;
procfs_list_t *procfs_list = cursor->procfs_list;
mutex_exit(&procfs_list->pl_lock);
}
static struct seq_operations procfs_list_seq_ops = {
.show = procfs_list_seq_show,
.start = procfs_list_seq_start,
.next = procfs_list_seq_next,
.stop = procfs_list_seq_stop,
};
static int
procfs_list_open(struct inode *inode, struct file *filp)
{
int rc = seq_open_private(filp, &procfs_list_seq_ops,
sizeof (procfs_list_cursor_t));
if (rc != 0)
return (rc);
struct seq_file *f = filp->private_data;
procfs_list_cursor_t *cursor = f->private;
cursor->procfs_list = PDE_DATA(inode);
cursor->cached_node = NULL;
cursor->cached_pos = 0;
return (0);
}
static ssize_t
procfs_list_write(struct file *filp, const char __user *buf, size_t len,
loff_t *ppos)
{
struct seq_file *f = filp->private_data;
procfs_list_cursor_t *cursor = f->private;
procfs_list_t *procfs_list = cursor->procfs_list;
int rc;
if (procfs_list->pl_clear != NULL &&
(rc = procfs_list->pl_clear(procfs_list)) != 0)
return (-rc);
return (len);
}
static struct file_operations procfs_list_operations = {
.owner = THIS_MODULE,
.open = procfs_list_open,
.write = procfs_list_write,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release_private,
};
/*
* Initialize a procfs_list and create a file for it in the proc filesystem
* under the kstat namespace.
*/
void
procfs_list_install(const char *module,
const char *name,
procfs_list_t *procfs_list,
int (*show)(struct seq_file *f, void *p),
int (*show_header)(struct seq_file *f),
int (*clear)(procfs_list_t *procfs_list),
size_t procfs_list_node_off)
{
mutex_init(&procfs_list->pl_lock, NULL, MUTEX_DEFAULT, NULL);
list_create(&procfs_list->pl_list,
procfs_list_node_off + sizeof (procfs_list_node_t),
procfs_list_node_off + offsetof(procfs_list_node_t, pln_link));
procfs_list->pl_next_id = 1; /* Save id 0 for SEQ_START_TOKEN */
procfs_list->pl_show = show;
procfs_list->pl_show_header = show_header;
procfs_list->pl_clear = clear;
procfs_list->pl_node_offset = procfs_list_node_off;
kstat_proc_entry_init(&procfs_list->pl_kstat_entry, module, name);
kstat_proc_entry_install(&procfs_list->pl_kstat_entry,
&procfs_list_operations, procfs_list);
}
EXPORT_SYMBOL(procfs_list_install);
/* Remove the proc filesystem file corresponding to the given list */
void
procfs_list_uninstall(procfs_list_t *procfs_list)
{
kstat_proc_entry_delete(&procfs_list->pl_kstat_entry);
}
EXPORT_SYMBOL(procfs_list_uninstall);
void
procfs_list_destroy(procfs_list_t *procfs_list)
{
ASSERT(list_is_empty(&procfs_list->pl_list));
list_destroy(&procfs_list->pl_list);
mutex_destroy(&procfs_list->pl_lock);
}
EXPORT_SYMBOL(procfs_list_destroy);
/*
* Add a new node to the tail of the list. While the standard list manipulation
* functions can be use for all other operation, adding elements to the list
* should only be done using this helper so that the id of the new node is set
* correctly.
*/
void
procfs_list_add(procfs_list_t *procfs_list, void *p)
{
ASSERT(MUTEX_HELD(&procfs_list->pl_lock));
NODE_ID(procfs_list, p) = procfs_list->pl_next_id++;
list_insert_tail(&procfs_list->pl_list, p);
}
EXPORT_SYMBOL(procfs_list_add);

View File

@ -55,7 +55,6 @@ int zfs_multihost_history = 0;
* Read statistics - Information exported regarding each arc_read call
*/
typedef struct spa_read_history {
uint64_t uid; /* unique identifier */
hrtime_t start; /* time read completed */
uint64_t objset; /* read from this objset */
uint64_t object; /* read of this object number */
@ -65,13 +64,13 @@ typedef struct spa_read_history {
uint32_t aflags; /* ARC flags (cached, prefetch, etc.) */
pid_t pid; /* PID of task doing read */
char comm[16]; /* process name of task doing read */
list_node_t srh_link;
procfs_list_node_t srh_node;
} spa_read_history_t;
static int
spa_read_history_headers(char *buf, size_t size)
spa_read_history_show_header(struct seq_file *f)
{
(void) snprintf(buf, size, "%-8s %-16s %-8s %-8s %-8s %-8s %-8s "
seq_printf(f, "%-8s %-16s %-8s %-8s %-8s %-8s %-8s "
"%-24s %-8s %-16s\n", "UID", "start", "objset", "object",
"level", "blkid", "aflags", "origin", "pid", "process");
@ -79,13 +78,13 @@ spa_read_history_headers(char *buf, size_t size)
}
static int
spa_read_history_data(char *buf, size_t size, void *data)
spa_read_history_show(struct seq_file *f, void *data)
{
spa_read_history_t *srh = (spa_read_history_t *)data;
(void) snprintf(buf, size, "%-8llu %-16llu 0x%-6llx "
seq_printf(f, "%-8llu %-16llu 0x%-6llx "
"%-8lli %-8lli %-8lli 0x%-6x %-24s %-8i %-16s\n",
(u_longlong_t)srh->uid, srh->start,
(u_longlong_t)srh->srh_node.pln_id, srh->start,
(longlong_t)srh->objset, (longlong_t)srh->object,
(longlong_t)srh->level, (longlong_t)srh->blkid,
srh->aflags, srh->origin, srh->pid, srh->comm);
@ -93,120 +92,73 @@ spa_read_history_data(char *buf, size_t size, void *data)
return (0);
}
/*
* Calculate the address for the next spa_stats_history_t entry. The
* ssh->lock will be held until ksp->ks_ndata entries are processed.
*/
static void *
spa_read_history_addr(kstat_t *ksp, loff_t n)
/* Remove oldest elements from list until there are no more than 'size' left */
static void
spa_read_history_truncate(spa_history_list_t *shl, unsigned int size)
{
spa_t *spa = ksp->ks_private;
spa_stats_history_t *ssh = &spa->spa_stats.read_history;
ASSERT(MUTEX_HELD(&ssh->lock));
if (n == 0)
ssh->private = list_tail(&ssh->list);
else if (ssh->private)
ssh->private = list_prev(&ssh->list, ssh->private);
return (ssh->private);
}
/*
* When the kstat is written discard all spa_read_history_t entries. The
* ssh->lock will be held until ksp->ks_ndata entries are processed.
*/
static int
spa_read_history_update(kstat_t *ksp, int rw)
{
spa_t *spa = ksp->ks_private;
spa_stats_history_t *ssh = &spa->spa_stats.read_history;
if (rw == KSTAT_WRITE) {
spa_read_history_t *srh;
while ((srh = list_remove_head(&ssh->list))) {
ssh->size--;
kmem_free(srh, sizeof (spa_read_history_t));
}
ASSERT3U(ssh->size, ==, 0);
spa_read_history_t *srh;
while (shl->size > size) {
srh = list_remove_head(&shl->procfs_list.pl_list);
ASSERT3P(srh, !=, NULL);
kmem_free(srh, sizeof (spa_read_history_t));
shl->size--;
}
ksp->ks_ndata = ssh->size;
ksp->ks_data_size = ssh->size * sizeof (spa_read_history_t);
if (size == 0)
ASSERT(list_is_empty(&shl->procfs_list.pl_list));
}
static int
spa_read_history_clear(procfs_list_t *procfs_list)
{
spa_history_list_t *shl = procfs_list->pl_private;
mutex_enter(&procfs_list->pl_lock);
spa_read_history_truncate(shl, 0);
mutex_exit(&procfs_list->pl_lock);
return (0);
}
static void
spa_read_history_init(spa_t *spa)
{
spa_stats_history_t *ssh = &spa->spa_stats.read_history;
char *name;
kstat_t *ksp;
spa_history_list_t *shl = &spa->spa_stats.read_history;
char *module;
mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL);
list_create(&ssh->list, sizeof (spa_read_history_t),
offsetof(spa_read_history_t, srh_link));
shl->size = 0;
ssh->count = 0;
ssh->size = 0;
ssh->private = NULL;
module = kmem_asprintf("zfs/%s", spa_name(spa));
name = kmem_asprintf("zfs/%s", spa_name(spa));
shl->procfs_list.pl_private = shl;
procfs_list_install(module,
"reads",
&shl->procfs_list,
spa_read_history_show,
spa_read_history_show_header,
spa_read_history_clear,
offsetof(spa_read_history_t, srh_node));
ksp = kstat_create(name, 0, "reads", "misc",
KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
ssh->kstat = ksp;
if (ksp) {
ksp->ks_lock = &ssh->lock;
ksp->ks_data = NULL;
ksp->ks_private = spa;
ksp->ks_update = spa_read_history_update;
kstat_set_raw_ops(ksp, spa_read_history_headers,
spa_read_history_data, spa_read_history_addr);
kstat_install(ksp);
}
strfree(name);
strfree(module);
}
static void
spa_read_history_destroy(spa_t *spa)
{
spa_stats_history_t *ssh = &spa->spa_stats.read_history;
spa_read_history_t *srh;
kstat_t *ksp;
ksp = ssh->kstat;
if (ksp)
kstat_delete(ksp);
mutex_enter(&ssh->lock);
while ((srh = list_remove_head(&ssh->list))) {
ssh->size--;
kmem_free(srh, sizeof (spa_read_history_t));
}
ASSERT3U(ssh->size, ==, 0);
list_destroy(&ssh->list);
mutex_exit(&ssh->lock);
mutex_destroy(&ssh->lock);
spa_history_list_t *shl = &spa->spa_stats.read_history;
procfs_list_uninstall(&shl->procfs_list);
spa_read_history_truncate(shl, 0);
procfs_list_destroy(&shl->procfs_list);
}
void
spa_read_history_add(spa_t *spa, const zbookmark_phys_t *zb, uint32_t aflags)
{
spa_stats_history_t *ssh = &spa->spa_stats.read_history;
spa_read_history_t *srh, *rm;
spa_history_list_t *shl = &spa->spa_stats.read_history;
spa_read_history_t *srh;
ASSERT3P(spa, !=, NULL);
ASSERT3P(zb, !=, NULL);
if (zfs_read_history == 0 && ssh->size == 0)
if (zfs_read_history == 0 && shl->size == 0)
return;
if (zfs_read_history_hits == 0 && (aflags & ARC_FLAG_CACHED))
@ -222,19 +174,14 @@ spa_read_history_add(spa_t *spa, const zbookmark_phys_t *zb, uint32_t aflags)
srh->aflags = aflags;
srh->pid = getpid();
mutex_enter(&ssh->lock);
mutex_enter(&shl->procfs_list.pl_lock);
srh->uid = ssh->count++;
list_insert_head(&ssh->list, srh);
ssh->size++;
procfs_list_add(&shl->procfs_list, srh);
shl->size++;
while (ssh->size > zfs_read_history) {
ssh->size--;
rm = list_remove_tail(&ssh->list);
kmem_free(rm, sizeof (spa_read_history_t));
}
spa_read_history_truncate(shl, zfs_read_history);
mutex_exit(&ssh->lock);
mutex_exit(&shl->procfs_list.pl_lock);
}
/*
@ -256,22 +203,21 @@ typedef struct spa_txg_history {
uint64_t writes; /* number of write operations */
uint64_t ndirty; /* number of dirty bytes */
hrtime_t times[TXG_STATE_COMMITTED]; /* completion times */
list_node_t sth_link;
procfs_list_node_t sth_node;
} spa_txg_history_t;
static int
spa_txg_history_headers(char *buf, size_t size)
spa_txg_history_show_header(struct seq_file *f)
{
(void) snprintf(buf, size, "%-8s %-16s %-5s %-12s %-12s %-12s "
seq_printf(f, "%-8s %-16s %-5s %-12s %-12s %-12s "
"%-8s %-8s %-12s %-12s %-12s %-12s\n", "txg", "birth", "state",
"ndirty", "nread", "nwritten", "reads", "writes",
"otime", "qtime", "wtime", "stime");
return (0);
}
static int
spa_txg_history_data(char *buf, size_t size, void *data)
spa_txg_history_show(struct seq_file *f, void *data)
{
spa_txg_history_t *sth = (spa_txg_history_t *)data;
uint64_t open = 0, quiesce = 0, wait = 0, sync = 0;
@ -303,7 +249,7 @@ spa_txg_history_data(char *buf, size_t size, void *data)
sync = sth->times[TXG_STATE_SYNCED] -
sth->times[TXG_STATE_WAIT_FOR_SYNC];
(void) snprintf(buf, size, "%-8llu %-16llu %-5c %-12llu "
seq_printf(f, "%-8llu %-16llu %-5c %-12llu "
"%-12llu %-12llu %-8llu %-8llu %-12llu %-12llu %-12llu %-12llu\n",
(longlong_t)sth->txg, sth->times[TXG_STATE_BIRTH], state,
(u_longlong_t)sth->ndirty,
@ -315,110 +261,62 @@ spa_txg_history_data(char *buf, size_t size, void *data)
return (0);
}
/*
* Calculate the address for the next spa_stats_history_t entry. The
* ssh->lock will be held until ksp->ks_ndata entries are processed.
*/
static void *
spa_txg_history_addr(kstat_t *ksp, loff_t n)
/* Remove oldest elements from list until there are no more than 'size' left */
static void
spa_txg_history_truncate(spa_history_list_t *shl, unsigned int size)
{
spa_t *spa = ksp->ks_private;
spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
ASSERT(MUTEX_HELD(&ssh->lock));
if (n == 0)
ssh->private = list_tail(&ssh->list);
else if (ssh->private)
ssh->private = list_prev(&ssh->list, ssh->private);
return (ssh->private);
}
/*
* When the kstat is written discard all spa_txg_history_t entries. The
* ssh->lock will be held until ksp->ks_ndata entries are processed.
*/
static int
spa_txg_history_update(kstat_t *ksp, int rw)
{
spa_t *spa = ksp->ks_private;
spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
ASSERT(MUTEX_HELD(&ssh->lock));
if (rw == KSTAT_WRITE) {
spa_txg_history_t *sth;
while ((sth = list_remove_head(&ssh->list))) {
ssh->size--;
kmem_free(sth, sizeof (spa_txg_history_t));
}
ASSERT3U(ssh->size, ==, 0);
spa_txg_history_t *sth;
while (shl->size > size) {
sth = list_remove_head(&shl->procfs_list.pl_list);
ASSERT3P(sth, !=, NULL);
kmem_free(sth, sizeof (spa_txg_history_t));
shl->size--;
}
ksp->ks_ndata = ssh->size;
ksp->ks_data_size = ssh->size * sizeof (spa_txg_history_t);
if (size == 0)
ASSERT(list_is_empty(&shl->procfs_list.pl_list));
}
static int
spa_txg_history_clear(procfs_list_t *procfs_list)
{
spa_history_list_t *shl = procfs_list->pl_private;
mutex_enter(&procfs_list->pl_lock);
spa_txg_history_truncate(shl, 0);
mutex_exit(&procfs_list->pl_lock);
return (0);
}
static void
spa_txg_history_init(spa_t *spa)
{
spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
char *name;
kstat_t *ksp;
spa_history_list_t *shl = &spa->spa_stats.txg_history;
char *module;
mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL);
list_create(&ssh->list, sizeof (spa_txg_history_t),
offsetof(spa_txg_history_t, sth_link));
shl->size = 0;
ssh->count = 0;
ssh->size = 0;
ssh->private = NULL;
module = kmem_asprintf("zfs/%s", spa_name(spa));
name = kmem_asprintf("zfs/%s", spa_name(spa));
shl->procfs_list.pl_private = shl;
procfs_list_install(module,
"txgs",
&shl->procfs_list,
spa_txg_history_show,
spa_txg_history_show_header,
spa_txg_history_clear,
offsetof(spa_txg_history_t, sth_node));
ksp = kstat_create(name, 0, "txgs", "misc",
KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
ssh->kstat = ksp;
if (ksp) {
ksp->ks_lock = &ssh->lock;
ksp->ks_data = NULL;
ksp->ks_private = spa;
ksp->ks_update = spa_txg_history_update;
kstat_set_raw_ops(ksp, spa_txg_history_headers,
spa_txg_history_data, spa_txg_history_addr);
kstat_install(ksp);
}
strfree(name);
strfree(module);
}
static void
spa_txg_history_destroy(spa_t *spa)
{
spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
spa_txg_history_t *sth;
kstat_t *ksp;
ksp = ssh->kstat;
if (ksp)
kstat_delete(ksp);
mutex_enter(&ssh->lock);
while ((sth = list_remove_head(&ssh->list))) {
ssh->size--;
kmem_free(sth, sizeof (spa_txg_history_t));
}
ASSERT3U(ssh->size, ==, 0);
list_destroy(&ssh->list);
mutex_exit(&ssh->lock);
mutex_destroy(&ssh->lock);
spa_history_list_t *shl = &spa->spa_stats.txg_history;
procfs_list_uninstall(&shl->procfs_list);
spa_txg_history_truncate(shl, 0);
procfs_list_destroy(&shl->procfs_list);
}
/*
@ -427,10 +325,10 @@ spa_txg_history_destroy(spa_t *spa)
void
spa_txg_history_add(spa_t *spa, uint64_t txg, hrtime_t birth_time)
{
spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
spa_txg_history_t *sth, *rm;
spa_history_list_t *shl = &spa->spa_stats.txg_history;
spa_txg_history_t *sth;
if (zfs_txg_history == 0 && ssh->size == 0)
if (zfs_txg_history == 0 && shl->size == 0)
return;
sth = kmem_zalloc(sizeof (spa_txg_history_t), KM_SLEEP);
@ -438,18 +336,11 @@ spa_txg_history_add(spa_t *spa, uint64_t txg, hrtime_t birth_time)
sth->state = TXG_STATE_OPEN;
sth->times[TXG_STATE_BIRTH] = birth_time;
mutex_enter(&ssh->lock);
list_insert_head(&ssh->list, sth);
ssh->size++;
while (ssh->size > zfs_txg_history) {
ssh->size--;
rm = list_remove_tail(&ssh->list);
kmem_free(rm, sizeof (spa_txg_history_t));
}
mutex_exit(&ssh->lock);
mutex_enter(&shl->procfs_list.pl_lock);
procfs_list_add(&shl->procfs_list, sth);
shl->size++;
spa_txg_history_truncate(shl, zfs_txg_history);
mutex_exit(&shl->procfs_list.pl_lock);
}
/*
@ -459,16 +350,16 @@ int
spa_txg_history_set(spa_t *spa, uint64_t txg, txg_state_t completed_state,
hrtime_t completed_time)
{
spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
spa_history_list_t *shl = &spa->spa_stats.txg_history;
spa_txg_history_t *sth;
int error = ENOENT;
if (zfs_txg_history == 0)
return (0);
mutex_enter(&ssh->lock);
for (sth = list_head(&ssh->list); sth != NULL;
sth = list_next(&ssh->list, sth)) {
mutex_enter(&shl->procfs_list.pl_lock);
for (sth = list_tail(&shl->procfs_list.pl_list); sth != NULL;
sth = list_prev(&shl->procfs_list.pl_list, sth)) {
if (sth->txg == txg) {
sth->times[completed_state] = completed_time;
sth->state++;
@ -476,7 +367,7 @@ spa_txg_history_set(spa_t *spa, uint64_t txg, txg_state_t completed_state,
break;
}
}
mutex_exit(&ssh->lock);
mutex_exit(&shl->procfs_list.pl_lock);
return (error);
}
@ -488,16 +379,16 @@ static int
spa_txg_history_set_io(spa_t *spa, uint64_t txg, uint64_t nread,
uint64_t nwritten, uint64_t reads, uint64_t writes, uint64_t ndirty)
{
spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
spa_history_list_t *shl = &spa->spa_stats.txg_history;
spa_txg_history_t *sth;
int error = ENOENT;
if (zfs_txg_history == 0)
return (0);
mutex_enter(&ssh->lock);
for (sth = list_head(&ssh->list); sth != NULL;
sth = list_next(&ssh->list, sth)) {
mutex_enter(&shl->procfs_list.pl_lock);
for (sth = list_tail(&shl->procfs_list.pl_list); sth != NULL;
sth = list_prev(&shl->procfs_list.pl_list, sth)) {
if (sth->txg == txg) {
sth->nread = nread;
sth->nwritten = nwritten;
@ -508,7 +399,7 @@ spa_txg_history_set_io(spa_t *spa, uint64_t txg, uint64_t nread,
break;
}
}
mutex_exit(&ssh->lock);
mutex_exit(&shl->procfs_list.pl_lock);
return (error);
}
@ -580,16 +471,16 @@ static int
spa_tx_assign_update(kstat_t *ksp, int rw)
{
spa_t *spa = ksp->ks_private;
spa_stats_history_t *ssh = &spa->spa_stats.tx_assign_histogram;
spa_history_kstat_t *shk = &spa->spa_stats.tx_assign_histogram;
int i;
if (rw == KSTAT_WRITE) {
for (i = 0; i < ssh->count; i++)
((kstat_named_t *)ssh->private)[i].value.ui64 = 0;
for (i = 0; i < shk->count; i++)
((kstat_named_t *)shk->private)[i].value.ui64 = 0;
}
for (i = ssh->count; i > 0; i--)
if (((kstat_named_t *)ssh->private)[i-1].value.ui64 != 0)
for (i = shk->count; i > 0; i--)
if (((kstat_named_t *)shk->private)[i-1].value.ui64 != 0)
break;
ksp->ks_ndata = i;
@ -601,22 +492,22 @@ spa_tx_assign_update(kstat_t *ksp, int rw)
static void
spa_tx_assign_init(spa_t *spa)
{
spa_stats_history_t *ssh = &spa->spa_stats.tx_assign_histogram;
spa_history_kstat_t *shk = &spa->spa_stats.tx_assign_histogram;
char *name;
kstat_named_t *ks;
kstat_t *ksp;
int i;
mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&shk->lock, NULL, MUTEX_DEFAULT, NULL);
ssh->count = 42; /* power of two buckets for 1ns to 2,199s */
ssh->size = ssh->count * sizeof (kstat_named_t);
ssh->private = kmem_alloc(ssh->size, KM_SLEEP);
shk->count = 42; /* power of two buckets for 1ns to 2,199s */
shk->size = shk->count * sizeof (kstat_named_t);
shk->private = kmem_alloc(shk->size, KM_SLEEP);
name = kmem_asprintf("zfs/%s", spa_name(spa));
for (i = 0; i < ssh->count; i++) {
ks = &((kstat_named_t *)ssh->private)[i];
for (i = 0; i < shk->count; i++) {
ks = &((kstat_named_t *)shk->private)[i];
ks->data_type = KSTAT_DATA_UINT64;
ks->value.ui64 = 0;
(void) snprintf(ks->name, KSTAT_STRLEN, "%llu ns",
@ -625,13 +516,13 @@ spa_tx_assign_init(spa_t *spa)
ksp = kstat_create(name, 0, "dmu_tx_assign", "misc",
KSTAT_TYPE_NAMED, 0, KSTAT_FLAG_VIRTUAL);
ssh->kstat = ksp;
shk->kstat = ksp;
if (ksp) {
ksp->ks_lock = &ssh->lock;
ksp->ks_data = ssh->private;
ksp->ks_ndata = ssh->count;
ksp->ks_data_size = ssh->size;
ksp->ks_lock = &shk->lock;
ksp->ks_data = shk->private;
ksp->ks_ndata = shk->count;
ksp->ks_data_size = shk->size;
ksp->ks_private = spa;
ksp->ks_update = spa_tx_assign_update;
kstat_install(ksp);
@ -642,27 +533,27 @@ spa_tx_assign_init(spa_t *spa)
static void
spa_tx_assign_destroy(spa_t *spa)
{
spa_stats_history_t *ssh = &spa->spa_stats.tx_assign_histogram;
spa_history_kstat_t *shk = &spa->spa_stats.tx_assign_histogram;
kstat_t *ksp;
ksp = ssh->kstat;
ksp = shk->kstat;
if (ksp)
kstat_delete(ksp);
kmem_free(ssh->private, ssh->size);
mutex_destroy(&ssh->lock);
kmem_free(shk->private, shk->size);
mutex_destroy(&shk->lock);
}
void
spa_tx_assign_add_nsecs(spa_t *spa, uint64_t nsecs)
{
spa_stats_history_t *ssh = &spa->spa_stats.tx_assign_histogram;
spa_history_kstat_t *shk = &spa->spa_stats.tx_assign_histogram;
uint64_t idx = 0;
while (((1ULL << idx) < nsecs) && (idx < ssh->size - 1))
while (((1ULL << idx) < nsecs) && (idx < shk->size - 1))
idx++;
atomic_inc_64(&((kstat_named_t *)ssh->private)[idx].value.ui64);
atomic_inc_64(&((kstat_named_t *)shk->private)[idx].value.ui64);
}
/*
@ -682,19 +573,19 @@ spa_io_history_update(kstat_t *ksp, int rw)
static void
spa_io_history_init(spa_t *spa)
{
spa_stats_history_t *ssh = &spa->spa_stats.io_history;
spa_history_kstat_t *shk = &spa->spa_stats.io_history;
char *name;
kstat_t *ksp;
mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&shk->lock, NULL, MUTEX_DEFAULT, NULL);
name = kmem_asprintf("zfs/%s", spa_name(spa));
ksp = kstat_create(name, 0, "io", "disk", KSTAT_TYPE_IO, 1, 0);
ssh->kstat = ksp;
shk->kstat = ksp;
if (ksp) {
ksp->ks_lock = &ssh->lock;
ksp->ks_lock = &shk->lock;
ksp->ks_private = spa;
ksp->ks_update = spa_io_history_update;
kstat_install(ksp);
@ -705,12 +596,12 @@ spa_io_history_init(spa_t *spa)
static void
spa_io_history_destroy(spa_t *spa)
{
spa_stats_history_t *ssh = &spa->spa_stats.io_history;
spa_history_kstat_t *shk = &spa->spa_stats.io_history;
if (ssh->kstat)
kstat_delete(ssh->kstat);
if (shk->kstat)
kstat_delete(shk->kstat);
mutex_destroy(&ssh->lock);
mutex_destroy(&shk->lock);
}
/*
@ -733,7 +624,7 @@ spa_io_history_destroy(spa_t *spa)
*/
typedef struct spa_mmp_history {
uint64_t mmp_kstat_id; /* unique # for updates */
uint64_t mmp_node_id; /* unique # for updates */
uint64_t txg; /* txg of last sync */
uint64_t timestamp; /* UTC time MMP write issued */
uint64_t mmp_delay; /* mmp_thread.mmp_delay at timestamp */
@ -743,20 +634,20 @@ typedef struct spa_mmp_history {
int io_error; /* error status of MMP write */
hrtime_t error_start; /* hrtime of start of error period */
hrtime_t duration; /* time from submission to completion */
list_node_t smh_link;
procfs_list_node_t smh_node;
} spa_mmp_history_t;
static int
spa_mmp_history_headers(char *buf, size_t size)
spa_mmp_history_show_header(struct seq_file *f)
{
(void) snprintf(buf, size, "%-10s %-10s %-10s %-6s %-10s %-12s %-24s "
seq_printf(f, "%-10s %-10s %-10s %-6s %-10s %-12s %-24s "
"%-10s %s\n", "id", "txg", "timestamp", "error", "duration",
"mmp_delay", "vdev_guid", "vdev_label", "vdev_path");
return (0);
}
static int
spa_mmp_history_data(char *buf, size_t size, void *data)
spa_mmp_history_show(struct seq_file *f, void *data)
{
spa_mmp_history_t *smh = (spa_mmp_history_t *)data;
char skip_fmt[] = "%-10llu %-10llu %10llu %#6llx %10lld %12llu %-24llu "
@ -764,8 +655,8 @@ spa_mmp_history_data(char *buf, size_t size, void *data)
char write_fmt[] = "%-10llu %-10llu %10llu %6lld %10lld %12llu %-24llu "
"%-10lld %s\n";
(void) snprintf(buf, size, (smh->error_start ? skip_fmt : write_fmt),
(u_longlong_t)smh->mmp_kstat_id, (u_longlong_t)smh->txg,
seq_printf(f, (smh->error_start ? skip_fmt : write_fmt),
(u_longlong_t)smh->mmp_node_id, (u_longlong_t)smh->txg,
(u_longlong_t)smh->timestamp, (longlong_t)smh->io_error,
(longlong_t)smh->duration, (u_longlong_t)smh->mmp_delay,
(u_longlong_t)smh->vdev_guid, (u_longlong_t)smh->vdev_label,
@ -774,137 +665,86 @@ spa_mmp_history_data(char *buf, size_t size, void *data)
return (0);
}
/*
* Calculate the address for the next spa_stats_history_t entry. The
* ssh->lock will be held until ksp->ks_ndata entries are processed.
*/
static void *
spa_mmp_history_addr(kstat_t *ksp, loff_t n)
/* Remove oldest elements from list until there are no more than 'size' left */
static void
spa_mmp_history_truncate(spa_history_list_t *shl, unsigned int size)
{
spa_t *spa = ksp->ks_private;
spa_stats_history_t *ssh = &spa->spa_stats.mmp_history;
ASSERT(MUTEX_HELD(&ssh->lock));
if (n == 0)
ssh->private = list_tail(&ssh->list);
else if (ssh->private)
ssh->private = list_prev(&ssh->list, ssh->private);
return (ssh->private);
}
/*
* When the kstat is written discard all spa_mmp_history_t entries. The
* ssh->lock will be held until ksp->ks_ndata entries are processed.
*/
static int
spa_mmp_history_update(kstat_t *ksp, int rw)
{
spa_t *spa = ksp->ks_private;
spa_stats_history_t *ssh = &spa->spa_stats.mmp_history;
ASSERT(MUTEX_HELD(&ssh->lock));
if (rw == KSTAT_WRITE) {
spa_mmp_history_t *smh;
while ((smh = list_remove_head(&ssh->list))) {
ssh->size--;
if (smh->vdev_path)
strfree(smh->vdev_path);
kmem_free(smh, sizeof (spa_mmp_history_t));
}
ASSERT3U(ssh->size, ==, 0);
spa_mmp_history_t *smh;
while (shl->size > size) {
smh = list_remove_head(&shl->procfs_list.pl_list);
if (smh->vdev_path)
strfree(smh->vdev_path);
kmem_free(smh, sizeof (spa_mmp_history_t));
shl->size--;
}
ksp->ks_ndata = ssh->size;
ksp->ks_data_size = ssh->size * sizeof (spa_mmp_history_t);
if (size == 0)
ASSERT(list_is_empty(&shl->procfs_list.pl_list));
}
static int
spa_mmp_history_clear(procfs_list_t *procfs_list)
{
spa_history_list_t *shl = procfs_list->pl_private;
mutex_enter(&procfs_list->pl_lock);
spa_mmp_history_truncate(shl, 0);
mutex_exit(&procfs_list->pl_lock);
return (0);
}
static void
spa_mmp_history_init(spa_t *spa)
{
spa_stats_history_t *ssh = &spa->spa_stats.mmp_history;
char *name;
kstat_t *ksp;
spa_history_list_t *shl = &spa->spa_stats.mmp_history;
char *module;
mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL);
list_create(&ssh->list, sizeof (spa_mmp_history_t),
offsetof(spa_mmp_history_t, smh_link));
shl->size = 0;
ssh->count = 0;
ssh->size = 0;
ssh->private = NULL;
module = kmem_asprintf("zfs/%s", spa_name(spa));
name = kmem_asprintf("zfs/%s", spa_name(spa));
shl->procfs_list.pl_private = shl;
procfs_list_install(module,
"multihost",
&shl->procfs_list,
spa_mmp_history_show,
spa_mmp_history_show_header,
spa_mmp_history_clear,
offsetof(spa_mmp_history_t, smh_node));
ksp = kstat_create(name, 0, "multihost", "misc",
KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
ssh->kstat = ksp;
if (ksp) {
ksp->ks_lock = &ssh->lock;
ksp->ks_data = NULL;
ksp->ks_private = spa;
ksp->ks_update = spa_mmp_history_update;
kstat_set_raw_ops(ksp, spa_mmp_history_headers,
spa_mmp_history_data, spa_mmp_history_addr);
kstat_install(ksp);
}
strfree(name);
strfree(module);
}
static void
spa_mmp_history_destroy(spa_t *spa)
{
spa_stats_history_t *ssh = &spa->spa_stats.mmp_history;
spa_mmp_history_t *smh;
kstat_t *ksp;
ksp = ssh->kstat;
if (ksp)
kstat_delete(ksp);
mutex_enter(&ssh->lock);
while ((smh = list_remove_head(&ssh->list))) {
ssh->size--;
if (smh->vdev_path)
strfree(smh->vdev_path);
kmem_free(smh, sizeof (spa_mmp_history_t));
}
ASSERT3U(ssh->size, ==, 0);
list_destroy(&ssh->list);
mutex_exit(&ssh->lock);
mutex_destroy(&ssh->lock);
spa_history_list_t *shl = &spa->spa_stats.mmp_history;
procfs_list_uninstall(&shl->procfs_list);
spa_mmp_history_truncate(shl, 0);
procfs_list_destroy(&shl->procfs_list);
}
/*
* Set duration in existing "skip" record to how long we have waited for a leaf
* vdev to become available.
*
* Important that we start search at the head of the list where new
* Important that we start search at the tail of the list where new
* records are inserted, so this is normally an O(1) operation.
*/
int
spa_mmp_history_set_skip(spa_t *spa, uint64_t mmp_kstat_id)
spa_mmp_history_set_skip(spa_t *spa, uint64_t mmp_node_id)
{
spa_stats_history_t *ssh = &spa->spa_stats.mmp_history;
spa_history_list_t *shl = &spa->spa_stats.mmp_history;
spa_mmp_history_t *smh;
int error = ENOENT;
if (zfs_multihost_history == 0 && ssh->size == 0)
if (zfs_multihost_history == 0 && shl->size == 0)
return (0);
mutex_enter(&ssh->lock);
for (smh = list_head(&ssh->list); smh != NULL;
smh = list_next(&ssh->list, smh)) {
if (smh->mmp_kstat_id == mmp_kstat_id) {
mutex_enter(&shl->procfs_list.pl_lock);
for (smh = list_tail(&shl->procfs_list.pl_list); smh != NULL;
smh = list_prev(&shl->procfs_list.pl_list, smh)) {
if (smh->mmp_node_id == mmp_node_id) {
ASSERT3U(smh->io_error, !=, 0);
smh->duration = gethrtime() - smh->error_start;
smh->vdev_guid++;
@ -912,7 +752,7 @@ spa_mmp_history_set_skip(spa_t *spa, uint64_t mmp_kstat_id)
break;
}
}
mutex_exit(&ssh->lock);
mutex_exit(&shl->procfs_list.pl_lock);
return (error);
}
@ -922,20 +762,20 @@ spa_mmp_history_set_skip(spa_t *spa, uint64_t mmp_kstat_id)
* See comment re: search order above spa_mmp_history_set_skip().
*/
int
spa_mmp_history_set(spa_t *spa, uint64_t mmp_kstat_id, int io_error,
spa_mmp_history_set(spa_t *spa, uint64_t mmp_node_id, int io_error,
hrtime_t duration)
{
spa_stats_history_t *ssh = &spa->spa_stats.mmp_history;
spa_history_list_t *shl = &spa->spa_stats.mmp_history;
spa_mmp_history_t *smh;
int error = ENOENT;
if (zfs_multihost_history == 0 && ssh->size == 0)
if (zfs_multihost_history == 0 && shl->size == 0)
return (0);
mutex_enter(&ssh->lock);
for (smh = list_head(&ssh->list); smh != NULL;
smh = list_next(&ssh->list, smh)) {
if (smh->mmp_kstat_id == mmp_kstat_id) {
mutex_enter(&shl->procfs_list.pl_lock);
for (smh = list_tail(&shl->procfs_list.pl_list); smh != NULL;
smh = list_prev(&shl->procfs_list.pl_list, smh)) {
if (smh->mmp_node_id == mmp_node_id) {
ASSERT(smh->io_error == 0);
smh->io_error = io_error;
smh->duration = duration;
@ -943,7 +783,7 @@ spa_mmp_history_set(spa_t *spa, uint64_t mmp_kstat_id, int io_error,
break;
}
}
mutex_exit(&ssh->lock);
mutex_exit(&shl->procfs_list.pl_lock);
return (error);
}
@ -953,16 +793,16 @@ spa_mmp_history_set(spa_t *spa, uint64_t mmp_kstat_id, int io_error,
* error == 0 : a write was issued.
* error != 0 : a write was not issued because no leaves were found.
*/
void *
void
spa_mmp_history_add(spa_t *spa, uint64_t txg, uint64_t timestamp,
uint64_t mmp_delay, vdev_t *vd, int label, uint64_t mmp_kstat_id,
uint64_t mmp_delay, vdev_t *vd, int label, uint64_t mmp_node_id,
int error)
{
spa_stats_history_t *ssh = &spa->spa_stats.mmp_history;
spa_mmp_history_t *smh, *rm;
spa_history_list_t *shl = &spa->spa_stats.mmp_history;
spa_mmp_history_t *smh;
if (zfs_multihost_history == 0 && ssh->size == 0)
return (NULL);
if (zfs_multihost_history == 0 && shl->size == 0)
return;
smh = kmem_zalloc(sizeof (spa_mmp_history_t), KM_SLEEP);
smh->txg = txg;
@ -974,7 +814,7 @@ spa_mmp_history_add(spa_t *spa, uint64_t txg, uint64_t timestamp,
smh->vdev_path = strdup(vd->vdev_path);
}
smh->vdev_label = label;
smh->mmp_kstat_id = mmp_kstat_id;
smh->mmp_node_id = mmp_node_id;
if (error) {
smh->io_error = error;
@ -982,21 +822,11 @@ spa_mmp_history_add(spa_t *spa, uint64_t txg, uint64_t timestamp,
smh->vdev_guid = 1;
}
mutex_enter(&ssh->lock);
list_insert_head(&ssh->list, smh);
ssh->size++;
while (ssh->size > zfs_multihost_history) {
ssh->size--;
rm = list_remove_tail(&ssh->list);
if (rm->vdev_path)
strfree(rm->vdev_path);
kmem_free(rm, sizeof (spa_mmp_history_t));
}
mutex_exit(&ssh->lock);
return ((void *)smh);
mutex_enter(&shl->procfs_list.pl_lock);
procfs_list_add(&shl->procfs_list, smh);
shl->size++;
spa_mmp_history_truncate(shl, zfs_multihost_history);
mutex_exit(&shl->procfs_list.pl_lock);
}
static void *
@ -1023,19 +853,19 @@ spa_state_data(char *buf, size_t size, void *data)
static void
spa_state_init(spa_t *spa)
{
spa_stats_history_t *ssh = &spa->spa_stats.state;
spa_history_kstat_t *shk = &spa->spa_stats.state;
char *name;
kstat_t *ksp;
mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&shk->lock, NULL, MUTEX_DEFAULT, NULL);
name = kmem_asprintf("zfs/%s", spa_name(spa));
ksp = kstat_create(name, 0, "state", "misc",
KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
ssh->kstat = ksp;
shk->kstat = ksp;
if (ksp) {
ksp->ks_lock = &ssh->lock;
ksp->ks_lock = &shk->lock;
ksp->ks_data = NULL;
ksp->ks_private = spa;
ksp->ks_flags |= KSTAT_FLAG_NO_HEADERS;
@ -1049,12 +879,12 @@ spa_state_init(spa_t *spa)
static void
spa_health_destroy(spa_t *spa)
{
spa_stats_history_t *ssh = &spa->spa_stats.state;
kstat_t *ksp = ssh->kstat;
spa_history_kstat_t *shk = &spa->spa_stats.state;
kstat_t *ksp = shk->kstat;
if (ksp)
kstat_delete(ksp);
mutex_destroy(&ssh->lock);
mutex_destroy(&shk->lock);
}
void

View File

@ -429,16 +429,16 @@ static void
vdev_queue_io_add(vdev_queue_t *vq, zio_t *zio)
{
spa_t *spa = zio->io_spa;
spa_stats_history_t *ssh = &spa->spa_stats.io_history;
spa_history_kstat_t *shk = &spa->spa_stats.io_history;
ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
avl_add(vdev_queue_class_tree(vq, zio->io_priority), zio);
avl_add(vdev_queue_type_tree(vq, zio->io_type), zio);
if (ssh->kstat != NULL) {
mutex_enter(&ssh->lock);
kstat_waitq_enter(ssh->kstat->ks_data);
mutex_exit(&ssh->lock);
if (shk->kstat != NULL) {
mutex_enter(&shk->lock);
kstat_waitq_enter(shk->kstat->ks_data);
mutex_exit(&shk->lock);
}
}
@ -446,16 +446,16 @@ static void
vdev_queue_io_remove(vdev_queue_t *vq, zio_t *zio)
{
spa_t *spa = zio->io_spa;
spa_stats_history_t *ssh = &spa->spa_stats.io_history;
spa_history_kstat_t *shk = &spa->spa_stats.io_history;
ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
avl_remove(vdev_queue_class_tree(vq, zio->io_priority), zio);
avl_remove(vdev_queue_type_tree(vq, zio->io_type), zio);
if (ssh->kstat != NULL) {
mutex_enter(&ssh->lock);
kstat_waitq_exit(ssh->kstat->ks_data);
mutex_exit(&ssh->lock);
if (shk->kstat != NULL) {
mutex_enter(&shk->lock);
kstat_waitq_exit(shk->kstat->ks_data);
mutex_exit(&shk->lock);
}
}
@ -463,17 +463,17 @@ static void
vdev_queue_pending_add(vdev_queue_t *vq, zio_t *zio)
{
spa_t *spa = zio->io_spa;
spa_stats_history_t *ssh = &spa->spa_stats.io_history;
spa_history_kstat_t *shk = &spa->spa_stats.io_history;
ASSERT(MUTEX_HELD(&vq->vq_lock));
ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
vq->vq_class[zio->io_priority].vqc_active++;
avl_add(&vq->vq_active_tree, zio);
if (ssh->kstat != NULL) {
mutex_enter(&ssh->lock);
kstat_runq_enter(ssh->kstat->ks_data);
mutex_exit(&ssh->lock);
if (shk->kstat != NULL) {
mutex_enter(&shk->lock);
kstat_runq_enter(shk->kstat->ks_data);
mutex_exit(&shk->lock);
}
}
@ -481,17 +481,17 @@ static void
vdev_queue_pending_remove(vdev_queue_t *vq, zio_t *zio)
{
spa_t *spa = zio->io_spa;
spa_stats_history_t *ssh = &spa->spa_stats.io_history;
spa_history_kstat_t *shk = &spa->spa_stats.io_history;
ASSERT(MUTEX_HELD(&vq->vq_lock));
ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
vq->vq_class[zio->io_priority].vqc_active--;
avl_remove(&vq->vq_active_tree, zio);
if (ssh->kstat != NULL) {
kstat_io_t *ksio = ssh->kstat->ks_data;
if (shk->kstat != NULL) {
kstat_io_t *ksio = shk->kstat->ks_data;
mutex_enter(&ssh->lock);
mutex_enter(&shk->lock);
kstat_runq_exit(ksio);
if (zio->io_type == ZIO_TYPE_READ) {
ksio->reads++;
@ -500,7 +500,7 @@ vdev_queue_pending_remove(vdev_queue_t *vq, zio_t *zio)
ksio->writes++;
ksio->nwritten += zio->io_size;
}
mutex_exit(&ssh->lock);
mutex_exit(&shk->lock);
}
}

View File

@ -24,13 +24,17 @@
*/
#include <sys/zfs_context.h>
#include <sys/kstat.h>
list_t zfs_dbgmsgs;
typedef struct zfs_dbgmsg {
procfs_list_node_t zdm_node;
time_t zdm_timestamp;
int zdm_size;
char zdm_msg[1]; /* variable length allocation */
} zfs_dbgmsg_t;
procfs_list_t zfs_dbgmsgs;
int zfs_dbgmsg_size = 0;
kmutex_t zfs_dbgmsgs_lock;
int zfs_dbgmsg_maxsize = 4<<20; /* 4MB */
kstat_t *zfs_dbgmsg_kstat;
/*
* Internal ZFS debug messages are enabled by default.
@ -47,122 +51,70 @@ kstat_t *zfs_dbgmsg_kstat;
int zfs_dbgmsg_enable = 1;
static int
zfs_dbgmsg_headers(char *buf, size_t size)
zfs_dbgmsg_show_header(struct seq_file *f)
{
(void) snprintf(buf, size, "%-12s %-8s\n", "timestamp", "message");
seq_printf(f, "%-12s %-8s\n", "timestamp", "message");
return (0);
}
static int
zfs_dbgmsg_data(char *buf, size_t size, void *data)
zfs_dbgmsg_show(struct seq_file *f, void *p)
{
zfs_dbgmsg_t *zdm = (zfs_dbgmsg_t *)data;
(void) snprintf(buf, size, "%-12llu %-s\n",
zfs_dbgmsg_t *zdm = (zfs_dbgmsg_t *)p;
seq_printf(f, "%-12llu %-s\n",
(u_longlong_t)zdm->zdm_timestamp, zdm->zdm_msg);
return (0);
}
static void *
zfs_dbgmsg_addr(kstat_t *ksp, loff_t n)
{
zfs_dbgmsg_t *zdm = (zfs_dbgmsg_t *)ksp->ks_private;
ASSERT(MUTEX_HELD(&zfs_dbgmsgs_lock));
if (n == 0)
ksp->ks_private = list_head(&zfs_dbgmsgs);
else if (zdm)
ksp->ks_private = list_next(&zfs_dbgmsgs, zdm);
return (ksp->ks_private);
}
static void
zfs_dbgmsg_purge(int max_size)
{
zfs_dbgmsg_t *zdm;
int size;
ASSERT(MUTEX_HELD(&zfs_dbgmsgs_lock));
while (zfs_dbgmsg_size > max_size) {
zdm = list_remove_head(&zfs_dbgmsgs);
zfs_dbgmsg_t *zdm = list_remove_head(&zfs_dbgmsgs.pl_list);
if (zdm == NULL)
return;
size = zdm->zdm_size;
int size = zdm->zdm_size;
kmem_free(zdm, size);
zfs_dbgmsg_size -= size;
}
}
static int
zfs_dbgmsg_update(kstat_t *ksp, int rw)
zfs_dbgmsg_clear(procfs_list_t *procfs_list)
{
if (rw == KSTAT_WRITE)
zfs_dbgmsg_purge(0);
mutex_enter(&zfs_dbgmsgs.pl_lock);
zfs_dbgmsg_purge(0);
mutex_exit(&zfs_dbgmsgs.pl_lock);
return (0);
}
void
zfs_dbgmsg_init(void)
{
list_create(&zfs_dbgmsgs, sizeof (zfs_dbgmsg_t),
procfs_list_install("zfs",
"dbgmsg",
&zfs_dbgmsgs,
zfs_dbgmsg_show,
zfs_dbgmsg_show_header,
zfs_dbgmsg_clear,
offsetof(zfs_dbgmsg_t, zdm_node));
mutex_init(&zfs_dbgmsgs_lock, NULL, MUTEX_DEFAULT, NULL);
zfs_dbgmsg_kstat = kstat_create("zfs", 0, "dbgmsg", "misc",
KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
if (zfs_dbgmsg_kstat) {
zfs_dbgmsg_kstat->ks_lock = &zfs_dbgmsgs_lock;
zfs_dbgmsg_kstat->ks_ndata = UINT32_MAX;
zfs_dbgmsg_kstat->ks_private = NULL;
zfs_dbgmsg_kstat->ks_update = zfs_dbgmsg_update;
kstat_set_raw_ops(zfs_dbgmsg_kstat, zfs_dbgmsg_headers,
zfs_dbgmsg_data, zfs_dbgmsg_addr);
kstat_install(zfs_dbgmsg_kstat);
}
}
void
zfs_dbgmsg_fini(void)
{
if (zfs_dbgmsg_kstat)
kstat_delete(zfs_dbgmsg_kstat);
procfs_list_uninstall(&zfs_dbgmsgs);
zfs_dbgmsg_purge(0);
/*
* TODO - decide how to make this permanent
*/
#ifdef _KERNEL
mutex_enter(&zfs_dbgmsgs_lock);
zfs_dbgmsg_purge(0);
mutex_exit(&zfs_dbgmsgs_lock);
mutex_destroy(&zfs_dbgmsgs_lock);
procfs_list_destroy(&zfs_dbgmsgs);
#endif
}
void
__zfs_dbgmsg(char *buf)
{
zfs_dbgmsg_t *zdm;
int size;
size = sizeof (zfs_dbgmsg_t) + strlen(buf);
zdm = kmem_zalloc(size, KM_SLEEP);
zdm->zdm_size = size;
zdm->zdm_timestamp = gethrestime_sec();
strcpy(zdm->zdm_msg, buf);
mutex_enter(&zfs_dbgmsgs_lock);
list_insert_tail(&zfs_dbgmsgs, zdm);
zfs_dbgmsg_size += size;
zfs_dbgmsg_purge(MAX(zfs_dbgmsg_maxsize, 0));
mutex_exit(&zfs_dbgmsgs_lock);
}
void
__set_error(const char *file, const char *func, int line, int err)
{
@ -176,6 +128,22 @@ __set_error(const char *file, const char *func, int line, int err)
}
#ifdef _KERNEL
static void
__zfs_dbgmsg(char *buf)
{
int size = sizeof (zfs_dbgmsg_t) + strlen(buf);
zfs_dbgmsg_t *zdm = kmem_zalloc(size, KM_SLEEP);
zdm->zdm_size = size;
zdm->zdm_timestamp = gethrestime_sec();
strcpy(zdm->zdm_msg, buf);
mutex_enter(&zfs_dbgmsgs.pl_lock);
procfs_list_add(&zfs_dbgmsgs, zdm);
zfs_dbgmsg_size += size;
zfs_dbgmsg_purge(MAX(zfs_dbgmsg_maxsize, 0));
mutex_exit(&zfs_dbgmsgs.pl_lock);
}
void
__dprintf(const char *file, const char *func, int line, const char *fmt, ...)
{
@ -244,14 +212,12 @@ __dprintf(const char *file, const char *func, int line, const char *fmt, ...)
void
zfs_dbgmsg_print(const char *tag)
{
zfs_dbgmsg_t *zdm;
(void) printf("ZFS_DBGMSG(%s):\n", tag);
mutex_enter(&zfs_dbgmsgs_lock);
for (zdm = list_head(&zfs_dbgmsgs); zdm;
zdm = list_next(&zfs_dbgmsgs, zdm))
mutex_enter(&zfs_dbgmsgs.pl_lock);
for (zfs_dbgmsg_t *zdm = list_head(&zfs_dbgmsgs.pl_list); zdm != NULL;
zdm = list_next(&zfs_dbgmsgs.pl_list, zdm))
(void) printf("%s\n", zdm->zdm_msg);
mutex_exit(&zfs_dbgmsgs_lock);
mutex_exit(&zfs_dbgmsgs.pl_lock);
}
#endif /* _KERNEL */

View File

@ -584,10 +584,6 @@ tests = ['inuse_001_pos', 'inuse_003_pos', 'inuse_004_pos',
post =
tags = ['functional', 'inuse']
[tests/functional/kstat]
tests = ['state']
tags = ['functional', 'kstat']
[tests/functional/large_files]
tests = ['large_files_001_pos', 'large_files_002_pos']
tags = ['functional', 'large_files']
@ -672,6 +668,11 @@ tags = ['functional', 'poolversion']
tests = ['privilege_001_pos', 'privilege_002_pos']
tags = ['functional', 'privilege']
[tests/functional/procfs]
tests = ['procfs_list_basic', 'procfs_list_concurrent_readers',
'procfs_list_stale_read', 'pool_state']
tags = ['functional', 'procfs']
[tests/functional/projectquota]
tests = ['projectid_001_pos', 'projectid_002_pos', 'projectid_003_pos',
'projectquota_001_pos', 'projectquota_002_pos', 'projectquota_003_pos',

View File

@ -29,7 +29,6 @@ SUBDIRS = \
inheritance \
inuse \
io \
kstat \
large_files \
largest_pool \
libzfs \
@ -48,6 +47,7 @@ SUBDIRS = \
pool_names \
poolversion \
privilege \
procfs \
projectquota \
quota \
raidz \

View File

@ -1,5 +0,0 @@
pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/kstat
dist_pkgdata_SCRIPTS = \
setup.ksh \
cleanup.ksh \
state.ksh

View File

@ -0,0 +1,8 @@
pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/procfs
dist_pkgdata_SCRIPTS = \
setup.ksh \
cleanup.ksh \
procfs_list_basic.ksh \
procfs_list_concurrent_readers.ksh \
procfs_list_stale_read.ksh \
pool_state.ksh

View File

@ -19,8 +19,9 @@
#
# CDDL HEADER END
#
#
# Copyright (c) 2018 by Lawrence Livermore National Security, LLC.
# Copyright (c) 2018 by Delphix. All rights reserved.
#
. $STF_SUITE/include/libtest.shlib

View File

@ -0,0 +1,95 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2018 by Delphix. All rights reserved.
#
. $STF_SUITE/include/libtest.shlib
#
# DESCRIPTION:
# Test that we can read from and write to a file in procfs whose contents is
# backed by a linked list.
#
# STRATEGY:
# 1. Take some snapshots of a filesystem, which will cause some messages to be
# written to the zfs dbgmsgs.
# 2. Read the dbgmsgs via procfs and verify that the expected messages are
# present.
# 3. Write to the dbgmsgs file to clear the messages.
# 4. Read the dbgmsgs again, and make sure the messages are no longer present.
#
function cleanup
{
datasetexists $FS && log_must zfs destroy -r $FS
}
function count_snap_cmds
{
typeset expected_count=$1
count=$(grep "command: zfs snapshot $FS@testsnapshot" | wc -l)
log_must eval "[[ $count -eq $expected_count ]]"
}
typeset -r ZFS_DBGMSG=/proc/spl/kstat/zfs/dbgmsg
typeset -r FS=$TESTPOOL/fs
typeset snap_msgs
log_onexit cleanup
# Clear out old messages
echo 0 >$ZFS_DBGMSG || log_fail "failed to write to $ZFS_DBGMSG"
log_must zfs create $FS
for i in {1..20}; do
log_must zfs snapshot "$FS@testsnapshot$i"
done
log_must zpool sync $TESTPOOL
#
# Read the debug message file in small chunks to make sure that the read is
# split up into multiple syscalls. This tests that when a syscall begins we
# correctly pick up in the list of messages where the previous syscall left
# off. The size of the read can affect how many bytes the seq_file code has
# left in its internal buffer, which in turn can affect the relative pos that
# the seq_file code picks up at when the next read starts. Try a few
# different size reads to make sure we can handle each case.
#
# Check that the file has the right contents by grepping for some of the
# messages that we expect to be present.
#
for chunk_sz in {1,64,256,1024,4096}; do
dd if=$ZFS_DBGMSG bs=$chunk_sz | count_snap_cmds 20
done
# Clear out old messages and check that they really are gone
echo 0 >$ZFS_DBGMSG || log_fail "failed to write to $ZFS_DBGMSG"
cat $ZFS_DBGMSG | count_snap_cmds 0
#
# Even though we don't expect any messages in the file, reading should still
# succeed.
#
log_must cat $ZFS_DBGMSG
log_pass "Basic reading/writing of procfs file backed by linked list successful"

View File

@ -0,0 +1,82 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2018 by Delphix. All rights reserved.
#
. $STF_SUITE/include/libtest.shlib
#
# DESCRIPTION:
# Make sure that interleaving reads from different readers does not affect the
# results that are returned.
#
# STRATEGY:
# 1. Make sure a few debug messages have been logged.
# 2. Open the procfs file and start reading from it.
# 3. Open the file again, and read its entire contents.
# 4. Resume reading from the first instance.
# 5. Check that the contents read by the two instances are identical.
#
function cleanup
{
[[ -z $msgs1 ]] || log_must rm $msgs1
[[ -z $msgs2 ]] || log_must rm $msgs2
datasetexists $FS && log_must zfs destroy -r $FS
}
typeset -r ZFS_DBGMSG=/proc/spl/kstat/zfs/dbgmsg
typeset -r FS=$TESTPOOL/fs
typeset msgs1 msgs2
log_onexit cleanup
# Clear out old messages
echo 0 >$ZFS_DBGMSG || log_fail "failed to write to $ZFS_DBGMSG"
# Add some new messages
log_must zfs create $FS
for i in {1..20}; do
log_must zfs snapshot "$FS@testsnapshot$i"
done
log_must zpool sync $TESTPOOL
msgs1=$(mktemp) || log_fail
msgs2=$(mktemp) || log_fail
#
# Start reading file, pause and read it from another process, and then finish
# reading.
#
{ dd bs=512 count=4; cat $ZFS_DBGMSG >$msgs1; cat; } <$ZFS_DBGMSG >$msgs2
#
# Truncate the result of the read that completed second in case it picked up an
# extra message that was logged after the first read completed.
#
log_must truncate -s $(stat -c "%s" $msgs1) $msgs2
log_must diff $msgs1 $msgs2
log_pass "Concurrent readers receive identical results"

View File

@ -0,0 +1,98 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2018 by Delphix. All rights reserved.
#
. $STF_SUITE/include/libtest.shlib
#
# DESCRIPTION:
# Make sure errors caused by messages being dropped from the list backing the
# procfs file are handled gracefully.
#
# STRATEGY:
# 1. Make sure a few entries have been logged.
# 2. Open the procfs file and start reading from it.
# 3. Write to the file to cause its contents to be dropped.
# 4. Resume reading from the first instance, and check that the expected
# error is received.
# 5. Repeat steps 1-4, except instead of dropping all the messages by writing
# to the file, cause enough new messages to be written that the old messages
# are dropped.
#
function cleanup
{
echo $default_max_entries >$MAX_ENTRIES_PARAM || log_fail
}
function sync_n
{
for i in {1..$1}; do
log_must zpool sync $TESTPOOL
done
return 0
}
function do_test
{
typeset cmd=$1
# Clear out old entries
echo 0 >$TXG_HIST || log_fail
# Add some new entries
sync_n 20
# Confirm that there actually is something in the file.
[[ $(wc -l <$TXG_HIST) -ge 20 ]] || log_fail "expected more entries"
#
# Start reading file, pause and run a command that will cause the
# current offset into the file to become invalid, and then try to
# finish reading.
#
{
log_must dd bs=512 count=4 >/dev/null
log_must eval "$cmd"
cat 2>&1 >/dev/null | log_must grep "Input/output error"
} <$TXG_HIST
}
typeset -r TXG_HIST=/proc/spl/kstat/zfs/$TESTPOOL/txgs
typeset MAX_ENTRIES_PARAM=/sys/module/zfs/parameters/zfs_txg_history
typeset default_max_entries
log_onexit cleanup
default_max_entries=$(cat $MAX_ENTRIES_PARAM) || log_fail
echo 50 >$MAX_ENTRIES_PARAM || log_fail
# Clear all of the existing entries.
do_test "echo 0 >$TXG_HIST"
# Add enough new entries to the list that all of the old ones are dropped.
do_test "sync_n 60"
log_pass "Attempting to read dropped message returns expected error"

View File

@ -19,16 +19,16 @@
#
# CDDL HEADER END
#
#
# Copyright (c) 2018 by Lawrence Livermore National Security, LLC.
# Copyright (c) 2018 by Delphix. All rights reserved.
#
. $STF_SUITE/include/libtest.shlib
if ! is_linux ; then
log_unsupported "/proc/spl/kstat/<pool>/health only supported on Linux"
log_unsupported "procfs is only used on Linux"
fi
default_mirror_setup $DISKS
log_pass