Illumos #734: Use taskq_dispatch_ent() interface
It has been observed that some of the hottest locks are those of the zio taskqs. Contention on these locks can limit the rate at which zios are dispatched which limits performance. This upstream change from Illumos uses new interface to the taskqs which allow them to utilize a prealloc'ed taskq_ent_t. This removes the need to perform an allocation at dispatch time while holding the contended lock. This has the effect of improving system performance. Reviewed by: Albert Lee <trisk@nexenta.com> Reviewed by: Richard Lowe <richlowe@richlowe.net> Reviewed by: Alexey Zaytsev <alexey.zaytsev@nexenta.com> Reviewed by: Jason Brian King <jason.brian.king@gmail.com> Reviewed by: George Wilson <gwilson@zfsmail.com> Reviewed by: Adam Leventhal <ahl@delphix.com> Approved by: Gordon Ross <gwr@nexenta.com> References to Illumos issue: https://www.illumos.org/issues/734 Ported-by: Prakash Surya <surya1@llnl.gov> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #482
This commit is contained in:
parent
30a9524e45
commit
a38718a63d
@ -22,6 +22,9 @@
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
/*
|
||||
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_ZFS_CONTEXT_H
|
||||
#define _SYS_ZFS_CONTEXT_H
|
||||
@ -365,6 +368,16 @@ typedef struct taskq taskq_t;
|
||||
typedef uintptr_t taskqid_t;
|
||||
typedef void (task_func_t)(void *);
|
||||
|
||||
typedef struct taskq_ent {
|
||||
struct taskq_ent *tqent_next;
|
||||
struct taskq_ent *tqent_prev;
|
||||
task_func_t *tqent_func;
|
||||
void *tqent_arg;
|
||||
uintptr_t tqent_flags;
|
||||
} taskq_ent_t;
|
||||
|
||||
#define TQENT_FLAG_PREALLOC 0x1 /* taskq_dispatch_ent used */
|
||||
|
||||
#define TASKQ_PREPOPULATE 0x0001
|
||||
#define TASKQ_CPR_SAFE 0x0002 /* Use CPR safe protocol */
|
||||
#define TASKQ_DYNAMIC 0x0004 /* Use dynamic thread scheduling */
|
||||
@ -385,6 +398,10 @@ extern taskq_t *taskq_create(const char *, int, pri_t, int, int, uint_t);
|
||||
#define taskq_create_sysdc(a, b, d, e, p, dc, f) \
|
||||
(taskq_create(a, b, maxclsyspri, d, e, f))
|
||||
extern taskqid_t taskq_dispatch(taskq_t *, task_func_t, void *, uint_t);
|
||||
extern void taskq_dispatch_ent(taskq_t *, task_func_t, void *, uint_t,
|
||||
taskq_ent_t *);
|
||||
extern int taskq_empty_ent(taskq_ent_t *);
|
||||
extern void taskq_init_ent(taskq_ent_t *);
|
||||
extern void taskq_destroy(taskq_t *);
|
||||
extern void taskq_wait(taskq_t *);
|
||||
extern int taskq_member(taskq_t *, kthread_t *);
|
||||
|
@ -22,6 +22,9 @@
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
/*
|
||||
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _ZIO_H
|
||||
#define _ZIO_H
|
||||
@ -423,6 +426,9 @@ struct zio {
|
||||
/* FMA state */
|
||||
zio_cksum_report_t *io_cksum_report;
|
||||
uint64_t io_ena;
|
||||
|
||||
/* Taskq dispatching state */
|
||||
taskq_ent_t io_tqent;
|
||||
};
|
||||
|
||||
extern zio_t *zio_null(zio_t *pio, spa_t *spa, vdev_t *vd,
|
||||
|
@ -22,19 +22,15 @@
|
||||
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
/*
|
||||
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
int taskq_now;
|
||||
taskq_t *system_taskq;
|
||||
|
||||
typedef struct task {
|
||||
struct task *task_next;
|
||||
struct task *task_prev;
|
||||
task_func_t *task_func;
|
||||
void *task_arg;
|
||||
} task_t;
|
||||
|
||||
#define TASKQ_ACTIVE 0x00010000
|
||||
|
||||
struct taskq {
|
||||
@ -51,18 +47,19 @@ struct taskq {
|
||||
int tq_maxalloc;
|
||||
kcondvar_t tq_maxalloc_cv;
|
||||
int tq_maxalloc_wait;
|
||||
task_t *tq_freelist;
|
||||
task_t tq_task;
|
||||
taskq_ent_t *tq_freelist;
|
||||
taskq_ent_t tq_task;
|
||||
};
|
||||
|
||||
static task_t *
|
||||
static taskq_ent_t *
|
||||
task_alloc(taskq_t *tq, int tqflags)
|
||||
{
|
||||
task_t *t;
|
||||
taskq_ent_t *t;
|
||||
int rv;
|
||||
|
||||
again: if ((t = tq->tq_freelist) != NULL && tq->tq_nalloc >= tq->tq_minalloc) {
|
||||
tq->tq_freelist = t->task_next;
|
||||
ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
|
||||
tq->tq_freelist = t->tqent_next;
|
||||
} else {
|
||||
if (tq->tq_nalloc >= tq->tq_maxalloc) {
|
||||
if (!(tqflags & KM_SLEEP))
|
||||
@ -87,25 +84,28 @@ again: if ((t = tq->tq_freelist) != NULL && tq->tq_nalloc >= tq->tq_minalloc) {
|
||||
}
|
||||
mutex_exit(&tq->tq_lock);
|
||||
|
||||
t = kmem_alloc(sizeof (task_t), tqflags);
|
||||
t = kmem_alloc(sizeof (taskq_ent_t), tqflags);
|
||||
|
||||
mutex_enter(&tq->tq_lock);
|
||||
if (t != NULL)
|
||||
if (t != NULL) {
|
||||
/* Make sure we start without any flags */
|
||||
t->tqent_flags = 0;
|
||||
tq->tq_nalloc++;
|
||||
}
|
||||
}
|
||||
return (t);
|
||||
}
|
||||
|
||||
static void
|
||||
task_free(taskq_t *tq, task_t *t)
|
||||
task_free(taskq_t *tq, taskq_ent_t *t)
|
||||
{
|
||||
if (tq->tq_nalloc <= tq->tq_minalloc) {
|
||||
t->task_next = tq->tq_freelist;
|
||||
t->tqent_next = tq->tq_freelist;
|
||||
tq->tq_freelist = t;
|
||||
} else {
|
||||
tq->tq_nalloc--;
|
||||
mutex_exit(&tq->tq_lock);
|
||||
kmem_free(t, sizeof (task_t));
|
||||
kmem_free(t, sizeof (taskq_ent_t));
|
||||
mutex_enter(&tq->tq_lock);
|
||||
}
|
||||
|
||||
@ -116,7 +116,7 @@ task_free(taskq_t *tq, task_t *t)
|
||||
taskqid_t
|
||||
taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t tqflags)
|
||||
{
|
||||
task_t *t;
|
||||
taskq_ent_t *t;
|
||||
|
||||
if (taskq_now) {
|
||||
func(arg);
|
||||
@ -130,26 +130,77 @@ taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t tqflags)
|
||||
return (0);
|
||||
}
|
||||
if (tqflags & TQ_FRONT) {
|
||||
t->task_next = tq->tq_task.task_next;
|
||||
t->task_prev = &tq->tq_task;
|
||||
t->tqent_next = tq->tq_task.tqent_next;
|
||||
t->tqent_prev = &tq->tq_task;
|
||||
} else {
|
||||
t->task_next = &tq->tq_task;
|
||||
t->task_prev = tq->tq_task.task_prev;
|
||||
t->tqent_next = &tq->tq_task;
|
||||
t->tqent_prev = tq->tq_task.tqent_prev;
|
||||
}
|
||||
t->task_next->task_prev = t;
|
||||
t->task_prev->task_next = t;
|
||||
t->task_func = func;
|
||||
t->task_arg = arg;
|
||||
t->tqent_next->tqent_prev = t;
|
||||
t->tqent_prev->tqent_next = t;
|
||||
t->tqent_func = func;
|
||||
t->tqent_arg = arg;
|
||||
|
||||
ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
|
||||
|
||||
cv_signal(&tq->tq_dispatch_cv);
|
||||
mutex_exit(&tq->tq_lock);
|
||||
return (1);
|
||||
}
|
||||
|
||||
int
|
||||
taskq_empty_ent(taskq_ent_t *t)
|
||||
{
|
||||
return t->tqent_next == NULL;
|
||||
}
|
||||
|
||||
void
|
||||
taskq_init_ent(taskq_ent_t *t)
|
||||
{
|
||||
t->tqent_next = NULL;
|
||||
t->tqent_prev = NULL;
|
||||
t->tqent_func = NULL;
|
||||
t->tqent_arg = NULL;
|
||||
t->tqent_flags = 0;
|
||||
}
|
||||
|
||||
void
|
||||
taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags,
|
||||
taskq_ent_t *t)
|
||||
{
|
||||
ASSERT(func != NULL);
|
||||
ASSERT(!(tq->tq_flags & TASKQ_DYNAMIC));
|
||||
|
||||
/*
|
||||
* Mark it as a prealloc'd task. This is important
|
||||
* to ensure that we don't free it later.
|
||||
*/
|
||||
t->tqent_flags |= TQENT_FLAG_PREALLOC;
|
||||
/*
|
||||
* Enqueue the task to the underlying queue.
|
||||
*/
|
||||
mutex_enter(&tq->tq_lock);
|
||||
|
||||
if (flags & TQ_FRONT) {
|
||||
t->tqent_next = tq->tq_task.tqent_next;
|
||||
t->tqent_prev = &tq->tq_task;
|
||||
} else {
|
||||
t->tqent_next = &tq->tq_task;
|
||||
t->tqent_prev = tq->tq_task.tqent_prev;
|
||||
}
|
||||
t->tqent_next->tqent_prev = t;
|
||||
t->tqent_prev->tqent_next = t;
|
||||
t->tqent_func = func;
|
||||
t->tqent_arg = arg;
|
||||
cv_signal(&tq->tq_dispatch_cv);
|
||||
mutex_exit(&tq->tq_lock);
|
||||
}
|
||||
|
||||
void
|
||||
taskq_wait(taskq_t *tq)
|
||||
{
|
||||
mutex_enter(&tq->tq_lock);
|
||||
while (tq->tq_task.task_next != &tq->tq_task || tq->tq_active != 0)
|
||||
while (tq->tq_task.tqent_next != &tq->tq_task || tq->tq_active != 0)
|
||||
cv_wait(&tq->tq_wait_cv, &tq->tq_lock);
|
||||
mutex_exit(&tq->tq_lock);
|
||||
}
|
||||
@ -158,27 +209,32 @@ static void
|
||||
taskq_thread(void *arg)
|
||||
{
|
||||
taskq_t *tq = arg;
|
||||
task_t *t;
|
||||
taskq_ent_t *t;
|
||||
boolean_t prealloc;
|
||||
|
||||
mutex_enter(&tq->tq_lock);
|
||||
while (tq->tq_flags & TASKQ_ACTIVE) {
|
||||
if ((t = tq->tq_task.task_next) == &tq->tq_task) {
|
||||
if ((t = tq->tq_task.tqent_next) == &tq->tq_task) {
|
||||
if (--tq->tq_active == 0)
|
||||
cv_broadcast(&tq->tq_wait_cv);
|
||||
cv_wait(&tq->tq_dispatch_cv, &tq->tq_lock);
|
||||
tq->tq_active++;
|
||||
continue;
|
||||
}
|
||||
t->task_prev->task_next = t->task_next;
|
||||
t->task_next->task_prev = t->task_prev;
|
||||
t->tqent_prev->tqent_next = t->tqent_next;
|
||||
t->tqent_next->tqent_prev = t->tqent_prev;
|
||||
t->tqent_next = NULL;
|
||||
t->tqent_prev = NULL;
|
||||
prealloc = t->tqent_flags & TQENT_FLAG_PREALLOC;
|
||||
mutex_exit(&tq->tq_lock);
|
||||
|
||||
rw_enter(&tq->tq_threadlock, RW_READER);
|
||||
t->task_func(t->task_arg);
|
||||
t->tqent_func(t->tqent_arg);
|
||||
rw_exit(&tq->tq_threadlock);
|
||||
|
||||
mutex_enter(&tq->tq_lock);
|
||||
task_free(tq, t);
|
||||
if (!prealloc)
|
||||
task_free(tq, t);
|
||||
}
|
||||
tq->tq_nthreads--;
|
||||
cv_broadcast(&tq->tq_wait_cv);
|
||||
@ -217,8 +273,8 @@ taskq_create(const char *name, int nthreads, pri_t pri,
|
||||
tq->tq_nthreads = nthreads;
|
||||
tq->tq_minalloc = minalloc;
|
||||
tq->tq_maxalloc = maxalloc;
|
||||
tq->tq_task.task_next = &tq->tq_task;
|
||||
tq->tq_task.task_prev = &tq->tq_task;
|
||||
tq->tq_task.tqent_next = &tq->tq_task;
|
||||
tq->tq_task.tqent_prev = &tq->tq_task;
|
||||
tq->tq_threadlist = kmem_alloc(nthreads*sizeof(kthread_t *), KM_SLEEP);
|
||||
|
||||
if (flags & TASKQ_PREPOPULATE) {
|
||||
|
@ -22,6 +22,9 @@
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
/*
|
||||
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file contains all the routines used when modifying on-disk SPA state.
|
||||
@ -665,7 +668,7 @@ spa_create_zio_taskqs(spa_t *spa)
|
||||
const zio_taskq_info_t *ztip = &zio_taskqs[t][q];
|
||||
enum zti_modes mode = ztip->zti_mode;
|
||||
uint_t value = ztip->zti_value;
|
||||
uint_t flags = TASKQ_PREPOPULATE;
|
||||
uint_t flags = 0;
|
||||
char name[32];
|
||||
|
||||
if (t == ZIO_TYPE_WRITE)
|
||||
|
@ -21,6 +21,7 @@
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2011 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
@ -570,6 +571,8 @@ zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
|
||||
zio_add_child(pio, zio);
|
||||
}
|
||||
|
||||
taskq_init_ent(&zio->io_tqent);
|
||||
|
||||
return (zio);
|
||||
}
|
||||
|
||||
@ -1073,7 +1076,7 @@ zio_taskq_dispatch(zio_t *zio, enum zio_taskq_type q, boolean_t cutinline)
|
||||
{
|
||||
spa_t *spa = zio->io_spa;
|
||||
zio_type_t t = zio->io_type;
|
||||
int flags = TQ_NOSLEEP | (cutinline ? TQ_FRONT : 0);
|
||||
int flags = (cutinline ? TQ_FRONT : 0);
|
||||
|
||||
/*
|
||||
* If we're a config writer or a probe, the normal issue and
|
||||
@ -1098,8 +1101,14 @@ zio_taskq_dispatch(zio_t *zio, enum zio_taskq_type q, boolean_t cutinline)
|
||||
|
||||
ASSERT3U(q, <, ZIO_TASKQ_TYPES);
|
||||
|
||||
while (taskq_dispatch(spa->spa_zio_taskq[t][q],
|
||||
(task_func_t *)zio_execute, zio, flags) == 0); /* do nothing */
|
||||
/*
|
||||
* NB: We are assuming that the zio can only be dispatched
|
||||
* to a single taskq at a time. It would be a grievous error
|
||||
* to dispatch the zio to another taskq at the same time.
|
||||
*/
|
||||
ASSERT(taskq_empty_ent(&zio->io_tqent));
|
||||
taskq_dispatch_ent(spa->spa_zio_taskq[t][q],
|
||||
(task_func_t *)zio_execute, zio, flags, &zio->io_tqent);
|
||||
}
|
||||
|
||||
static boolean_t
|
||||
@ -2947,9 +2956,11 @@ zio_done(zio_t *zio)
|
||||
* Reexecution is potentially a huge amount of work.
|
||||
* Hand it off to the otherwise-unused claim taskq.
|
||||
*/
|
||||
(void) taskq_dispatch(
|
||||
ASSERT(taskq_empty_ent(&zio->io_tqent));
|
||||
(void) taskq_dispatch_ent(
|
||||
zio->io_spa->spa_zio_taskq[ZIO_TYPE_CLAIM][ZIO_TASKQ_ISSUE],
|
||||
(task_func_t *)zio_reexecute, zio, TQ_SLEEP);
|
||||
(task_func_t *)zio_reexecute, zio, 0,
|
||||
&zio->io_tqent);
|
||||
}
|
||||
return (ZIO_PIPELINE_STOP);
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user