freebsd-skq/sys/geom/geom_disk.h
Alexander Motin 40ea77a036 Merge GEOM direct dispatch changes from the projects/camlock branch.
When safety requirements are met, it allows to avoid passing I/O requests
to GEOM g_up/g_down thread, executing them directly in the caller context.
That allows to avoid CPU bottlenecks in g_up/g_down threads, plus avoid
several context switches per I/O.

The defined now safety requirements are:
 - caller should not hold any locks and should be reenterable;
 - callee should not depend on GEOM dual-threaded concurency semantics;
 - on the way down, if request is unmapped while callee doesn't support it,
   the context should be sleepable;
 - kernel thread stack usage should be below 50%.

To keep compatibility with GEOM classes not meeting above requirements
new provider and consumer flags added:
 - G_CF_DIRECT_SEND -- consumer code meets caller requirements (request);
 - G_CF_DIRECT_RECEIVE -- consumer code meets callee requirements (done);
 - G_PF_DIRECT_SEND -- provider code meets caller requirements (done);
 - G_PF_DIRECT_RECEIVE -- provider code meets callee requirements (request).
Capable GEOM class can set them, allowing direct dispatch in cases where
it is safe.  If any of requirements are not met, request is queued to
g_up or g_down thread same as before.

Such GEOM classes were reviewed and updated to support direct dispatch:
CONCAT, DEV, DISK, GATE, MD, MIRROR, MULTIPATH, NOP, PART, RAID, STRIPE,
VFS, ZERO, ZFS::VDEV, ZFS::ZVOL, all classes based on g_slice KPI (LABEL,
MAP, FLASHMAP, etc).

To declare direct completion capability disk(9) KPI got new flag equivalent
to G_PF_DIRECT_SEND -- DISKFLAG_DIRECT_COMPLETION.  da(4) and ada(4) disk
drivers got it set now thanks to earlier CAM locking work.

This change more then twice increases peak block storage performance on
systems with manu CPUs, together with earlier CAM locking changes reaching
more then 1 million IOPS (512 byte raw reads from 16 SATA SSDs on 4 HBAs to
256 user-level threads).

Sponsored by:	iXsystems, Inc.
MFC after:	2 months
2013-10-22 08:22:19 +00:00

129 lines
4.1 KiB
C

/*-
* Copyright (c) 2003 Poul-Henning Kamp
* All rights reserved.
*
* This software was developed for the FreeBSD Project by Poul-Henning Kamp
* and NAI Labs, the Security Research Division of Network Associates, Inc.
* under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
* DARPA CHATS research program.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The names of the authors may not be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _GEOM_GEOM_DISK_H_
#define _GEOM_GEOM_DISK_H_
#ifdef _KERNEL
#include <sys/queue.h>
#include <sys/_lock.h>
#include <sys/_mutex.h>
#include <sys/disk.h>
#define G_DISK_CLASS_NAME "DISK"
struct disk;
typedef int disk_open_t(struct disk *);
typedef int disk_close_t(struct disk *);
typedef void disk_strategy_t(struct bio *bp);
typedef int disk_getattr_t(struct bio *bp);
typedef void disk_gone_t(struct disk *);
typedef int disk_ioctl_t(struct disk *, u_long cmd, void *data,
int fflag, struct thread *td);
/* NB: disk_ioctl_t SHALL be cast'able to d_ioctl_t */
struct g_geom;
struct devstat;
struct disk {
/* Fields which are private to geom_disk */
struct g_geom *d_geom;
struct devstat *d_devstat;
int d_destroyed;
/* Shared fields */
u_int d_flags;
const char *d_name;
u_int d_unit;
struct bio_queue_head *d_queue;
struct mtx *d_lock;
/* Disk methods */
disk_open_t *d_open;
disk_close_t *d_close;
disk_strategy_t *d_strategy;
disk_ioctl_t *d_ioctl;
dumper_t *d_dump;
disk_getattr_t *d_getattr;
disk_gone_t *d_gone;
/* Info fields from driver to geom_disk.c. Valid when open */
u_int d_sectorsize;
off_t d_mediasize;
u_int d_fwsectors;
u_int d_fwheads;
u_int d_maxsize;
off_t d_delmaxsize;
u_int d_stripeoffset;
u_int d_stripesize;
char d_ident[DISK_IDENT_SIZE];
char d_descr[DISK_IDENT_SIZE];
uint16_t d_hba_vendor;
uint16_t d_hba_device;
uint16_t d_hba_subvendor;
uint16_t d_hba_subdevice;
/* Fields private to the driver */
void *d_drv1;
};
#define DISKFLAG_NEEDSGIANT 0x1
#define DISKFLAG_OPEN 0x2
#define DISKFLAG_CANDELETE 0x4
#define DISKFLAG_CANFLUSHCACHE 0x8
#define DISKFLAG_UNMAPPED_BIO 0x10
#define DISKFLAG_DIRECT_COMPLETION 0x20
struct disk *disk_alloc(void);
void disk_create(struct disk *disk, int version);
void disk_destroy(struct disk *disk);
void disk_gone(struct disk *disk);
void disk_attr_changed(struct disk *dp, const char *attr, int flag);
void disk_media_changed(struct disk *dp, int flag);
void disk_media_gone(struct disk *dp, int flag);
int disk_resize(struct disk *dp, int flag);
#define DISK_VERSION_00 0x58561059
#define DISK_VERSION_01 0x5856105a
#define DISK_VERSION_02 0x5856105b
#define DISK_VERSION_03 0x5856105c
#define DISK_VERSION DISK_VERSION_03
#endif /* _KERNEL */
#endif /* _GEOM_GEOM_DISK_H_ */