2005-01-06 01:43:34 +00:00
|
|
|
/*-
|
1999-09-21 11:00:49 +00:00
|
|
|
* ----------------------------------------------------------------------------
|
|
|
|
* "THE BEER-WARE LICENSE" (Revision 42):
|
|
|
|
* <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you
|
|
|
|
* can do whatever you want with this stuff. If we meet some day, and you think
|
|
|
|
* this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp
|
|
|
|
* ----------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* $FreeBSD$
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
2005-01-06 01:43:34 +00:00
|
|
|
/*-
|
2001-01-02 09:42:47 +00:00
|
|
|
* The following functions are based in the vn(4) driver: mdstart_swap(),
|
|
|
|
* mdstart_vnode(), mdcreate_swap(), mdcreate_vnode() and mddestroy(),
|
|
|
|
* and as such under the following copyright:
|
|
|
|
*
|
|
|
|
* Copyright (c) 1988 University of Utah.
|
|
|
|
* Copyright (c) 1990, 1993
|
|
|
|
* The Regents of the University of California. All rights reserved.
|
2013-03-19 14:53:23 +00:00
|
|
|
* Copyright (c) 2013 The FreeBSD Foundation
|
|
|
|
* All rights reserved.
|
2001-01-02 09:42:47 +00:00
|
|
|
*
|
2004-04-09 05:12:47 +00:00
|
|
|
* This code is derived from software contributed to Berkeley by
|
|
|
|
* the Systems Programming Group of the University of Utah Computer
|
|
|
|
* Science Department.
|
|
|
|
*
|
2013-03-19 14:53:23 +00:00
|
|
|
* Portions of this software were developed by Konstantin Belousov
|
|
|
|
* under sponsorship from the FreeBSD Foundation.
|
|
|
|
*
|
2001-01-02 09:42:47 +00:00
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
* 4. Neither the name of the University nor the names of its contributors
|
|
|
|
* may be used to endorse or promote products derived from this software
|
|
|
|
* without specific prior written permission.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
* SUCH DAMAGE.
|
|
|
|
*
|
|
|
|
* from: Utah Hdr: vn.c 1.13 94/04/02
|
|
|
|
*
|
|
|
|
* from: @(#)vn.c 8.6 (Berkeley) 4/1/94
|
2002-01-21 20:57:03 +00:00
|
|
|
* From: src/sys/dev/vn/vn.c,v 1.122 2000/12/16 16:06:03
|
2001-01-02 09:42:47 +00:00
|
|
|
*/
|
|
|
|
|
2003-01-12 21:16:49 +00:00
|
|
|
#include "opt_geom.h"
|
2000-08-20 21:34:39 +00:00
|
|
|
#include "opt_md.h"
|
1999-11-26 20:08:44 +00:00
|
|
|
|
1999-09-21 11:00:49 +00:00
|
|
|
#include <sys/param.h>
|
|
|
|
#include <sys/systm.h>
|
2000-05-05 09:59:14 +00:00
|
|
|
#include <sys/bio.h>
|
2013-03-19 14:53:23 +00:00
|
|
|
#include <sys/buf.h>
|
1999-09-21 11:00:49 +00:00
|
|
|
#include <sys/conf.h>
|
2007-11-07 22:47:41 +00:00
|
|
|
#include <sys/devicestat.h>
|
2001-05-01 08:13:21 +00:00
|
|
|
#include <sys/fcntl.h>
|
1999-09-22 21:23:21 +00:00
|
|
|
#include <sys/kernel.h>
|
2002-06-03 22:09:04 +00:00
|
|
|
#include <sys/kthread.h>
|
2008-05-29 12:50:46 +00:00
|
|
|
#include <sys/limits.h>
|
1999-11-20 22:43:27 +00:00
|
|
|
#include <sys/linker.h>
|
2001-05-01 08:13:21 +00:00
|
|
|
#include <sys/lock.h>
|
|
|
|
#include <sys/malloc.h>
|
2000-12-31 13:03:42 +00:00
|
|
|
#include <sys/mdioctl.h>
|
2006-03-28 21:25:11 +00:00
|
|
|
#include <sys/mount.h>
|
2001-05-21 18:52:02 +00:00
|
|
|
#include <sys/mutex.h>
|
2005-09-19 06:55:27 +00:00
|
|
|
#include <sys/sx.h>
|
2000-12-31 13:03:42 +00:00
|
|
|
#include <sys/namei.h>
|
|
|
|
#include <sys/proc.h>
|
2001-05-01 08:13:21 +00:00
|
|
|
#include <sys/queue.h>
|
2013-03-09 02:32:23 +00:00
|
|
|
#include <sys/rwlock.h>
|
2011-07-11 05:19:28 +00:00
|
|
|
#include <sys/sbuf.h>
|
2004-12-30 20:29:58 +00:00
|
|
|
#include <sys/sched.h>
|
2004-03-18 18:23:37 +00:00
|
|
|
#include <sys/sf_buf.h>
|
2001-05-01 08:13:21 +00:00
|
|
|
#include <sys/sysctl.h>
|
|
|
|
#include <sys/vnode.h>
|
|
|
|
|
2003-01-12 21:16:49 +00:00
|
|
|
#include <geom/geom.h>
|
2015-08-13 13:20:29 +00:00
|
|
|
#include <geom/geom_int.h>
|
2003-01-12 21:16:49 +00:00
|
|
|
|
2000-12-31 13:03:42 +00:00
|
|
|
#include <vm/vm.h>
|
2012-08-05 14:11:42 +00:00
|
|
|
#include <vm/vm_param.h>
|
2000-12-31 13:03:42 +00:00
|
|
|
#include <vm/vm_object.h>
|
|
|
|
#include <vm/vm_page.h>
|
|
|
|
#include <vm/vm_pager.h>
|
|
|
|
#include <vm/swap_pager.h>
|
2002-05-26 06:48:55 +00:00
|
|
|
#include <vm/uma.h>
|
2000-08-20 21:34:39 +00:00
|
|
|
|
2001-02-24 16:26:41 +00:00
|
|
|
#define MD_MODVER 1
|
|
|
|
|
2006-03-28 21:25:11 +00:00
|
|
|
#define MD_SHUTDOWN 0x10000 /* Tell worker thread to terminate. */
|
|
|
|
#define MD_EXITING 0x20000 /* Worker thread is exiting. */
|
2002-06-03 22:09:04 +00:00
|
|
|
|
2000-07-17 13:13:04 +00:00
|
|
|
#ifndef MD_NSECT
|
|
|
|
#define MD_NSECT (10000 * 2)
|
1999-09-21 12:05:38 +00:00
|
|
|
#endif
|
|
|
|
|
2005-10-31 15:41:29 +00:00
|
|
|
static MALLOC_DEFINE(M_MD, "md_disk", "Memory Disk");
|
|
|
|
static MALLOC_DEFINE(M_MDSECT, "md_sectors", "Memory Disk Sectors");
|
1999-09-21 11:00:49 +00:00
|
|
|
|
1999-11-26 20:08:44 +00:00
|
|
|
static int md_debug;
|
2011-12-13 00:38:50 +00:00
|
|
|
SYSCTL_INT(_debug, OID_AUTO, mddebug, CTLFLAG_RW, &md_debug, 0,
|
|
|
|
"Enable md(4) debug messages");
|
2010-12-29 11:39:15 +00:00
|
|
|
static int md_malloc_wait;
|
2011-12-13 00:38:50 +00:00
|
|
|
SYSCTL_INT(_vm, OID_AUTO, md_malloc_wait, CTLFLAG_RW, &md_malloc_wait, 0,
|
|
|
|
"Allow malloc to wait for memory allocations");
|
1999-09-21 11:00:49 +00:00
|
|
|
|
2012-11-03 21:20:55 +00:00
|
|
|
#if defined(MD_ROOT) && !defined(MD_ROOT_FSTYPE)
|
|
|
|
#define MD_ROOT_FSTYPE "ufs"
|
|
|
|
#endif
|
|
|
|
|
1999-11-26 20:08:44 +00:00
|
|
|
#if defined(MD_ROOT) && defined(MD_ROOT_SIZE)
|
2006-01-31 13:35:30 +00:00
|
|
|
/*
|
|
|
|
* Preloaded image gets put here.
|
|
|
|
* Applications that patch the object with the image can determine
|
|
|
|
* the size looking at the start and end markers (strings),
|
|
|
|
* so we want them contiguous.
|
|
|
|
*/
|
|
|
|
static struct {
|
|
|
|
u_char start[MD_ROOT_SIZE*1024];
|
|
|
|
u_char end[128];
|
|
|
|
} mfs_root = {
|
|
|
|
.start = "MFS Filesystem goes here",
|
|
|
|
.end = "MFS Filesystem had better STOP here",
|
|
|
|
};
|
1999-11-26 20:08:44 +00:00
|
|
|
#endif
|
|
|
|
|
2004-08-08 06:47:43 +00:00
|
|
|
static g_init_t g_md_init;
|
|
|
|
static g_fini_t g_md_fini;
|
|
|
|
static g_start_t g_md_start;
|
|
|
|
static g_access_t g_md_access;
|
2010-07-26 10:37:14 +00:00
|
|
|
static void g_md_dumpconf(struct sbuf *sb, const char *indent,
|
|
|
|
struct g_geom *gp, struct g_consumer *cp __unused, struct g_provider *pp);
|
2003-11-18 18:19:26 +00:00
|
|
|
|
2004-06-16 09:47:26 +00:00
|
|
|
static struct cdev *status_dev = 0;
|
2005-09-19 06:55:27 +00:00
|
|
|
static struct sx md_sx;
|
2010-07-22 10:24:28 +00:00
|
|
|
static struct unrhdr *md_uh;
|
2001-02-24 16:26:41 +00:00
|
|
|
|
2003-01-12 17:39:29 +00:00
|
|
|
static d_ioctl_t mdctlioctl;
|
2000-12-31 13:03:42 +00:00
|
|
|
|
|
|
|
static struct cdevsw mdctl_cdevsw = {
|
2004-02-21 21:10:55 +00:00
|
|
|
.d_version = D_VERSION,
|
2003-03-03 12:15:54 +00:00
|
|
|
.d_ioctl = mdctlioctl,
|
|
|
|
.d_name = MD_NAME,
|
1999-09-21 11:00:49 +00:00
|
|
|
};
|
|
|
|
|
2004-08-08 06:47:43 +00:00
|
|
|
struct g_class g_md_class = {
|
|
|
|
.name = "MD",
|
2004-08-08 07:57:53 +00:00
|
|
|
.version = G_VERSION,
|
2004-08-08 06:47:43 +00:00
|
|
|
.init = g_md_init,
|
|
|
|
.fini = g_md_fini,
|
|
|
|
.start = g_md_start,
|
|
|
|
.access = g_md_access,
|
2006-03-26 23:21:11 +00:00
|
|
|
.dumpconf = g_md_dumpconf,
|
2004-08-08 06:47:43 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
DECLARE_GEOM_CLASS(g_md_class, g_md);
|
|
|
|
|
2000-07-04 10:06:34 +00:00
|
|
|
|
2009-12-28 22:56:30 +00:00
|
|
|
static LIST_HEAD(, md_s) md_softc_list = LIST_HEAD_INITIALIZER(md_softc_list);
|
2000-08-20 21:34:39 +00:00
|
|
|
|
2002-05-25 20:44:20 +00:00
|
|
|
#define NINDIR (PAGE_SIZE / sizeof(uintptr_t))
|
|
|
|
#define NMASK (NINDIR-1)
|
|
|
|
static int nshift;
|
|
|
|
|
2013-03-19 14:53:23 +00:00
|
|
|
static int md_vnode_pbuf_freecnt;
|
|
|
|
|
2002-05-25 20:44:20 +00:00
|
|
|
struct indir {
|
|
|
|
uintptr_t *array;
|
2003-08-07 15:04:27 +00:00
|
|
|
u_int total;
|
|
|
|
u_int used;
|
|
|
|
u_int shift;
|
2002-05-25 20:44:20 +00:00
|
|
|
};
|
|
|
|
|
1999-09-21 11:00:49 +00:00
|
|
|
struct md_s {
|
|
|
|
int unit;
|
2000-08-20 21:34:39 +00:00
|
|
|
LIST_ENTRY(md_s) list;
|
2000-04-15 05:54:02 +00:00
|
|
|
struct bio_queue_head bio_queue;
|
2003-01-13 08:50:23 +00:00
|
|
|
struct mtx queue_mtx;
|
Merge GEOM direct dispatch changes from the projects/camlock branch.
When safety requirements are met, it allows to avoid passing I/O requests
to GEOM g_up/g_down thread, executing them directly in the caller context.
That allows to avoid CPU bottlenecks in g_up/g_down threads, plus avoid
several context switches per I/O.
The defined now safety requirements are:
- caller should not hold any locks and should be reenterable;
- callee should not depend on GEOM dual-threaded concurency semantics;
- on the way down, if request is unmapped while callee doesn't support it,
the context should be sleepable;
- kernel thread stack usage should be below 50%.
To keep compatibility with GEOM classes not meeting above requirements
new provider and consumer flags added:
- G_CF_DIRECT_SEND -- consumer code meets caller requirements (request);
- G_CF_DIRECT_RECEIVE -- consumer code meets callee requirements (done);
- G_PF_DIRECT_SEND -- provider code meets caller requirements (done);
- G_PF_DIRECT_RECEIVE -- provider code meets callee requirements (request).
Capable GEOM class can set them, allowing direct dispatch in cases where
it is safe. If any of requirements are not met, request is queued to
g_up or g_down thread same as before.
Such GEOM classes were reviewed and updated to support direct dispatch:
CONCAT, DEV, DISK, GATE, MD, MIRROR, MULTIPATH, NOP, PART, RAID, STRIPE,
VFS, ZERO, ZFS::VDEV, ZFS::ZVOL, all classes based on g_slice KPI (LABEL,
MAP, FLASHMAP, etc).
To declare direct completion capability disk(9) KPI got new flag equivalent
to G_PF_DIRECT_SEND -- DISKFLAG_DIRECT_COMPLETION. da(4) and ada(4) disk
drivers got it set now thanks to earlier CAM locking work.
This change more then twice increases peak block storage performance on
systems with manu CPUs, together with earlier CAM locking changes reaching
more then 1 million IOPS (512 byte raw reads from 16 SATA SSDs on 4 HBAs to
256 user-level threads).
Sponsored by: iXsystems, Inc.
MFC after: 2 months
2013-10-22 08:22:19 +00:00
|
|
|
struct mtx stat_mtx;
|
2004-06-16 09:47:26 +00:00
|
|
|
struct cdev *dev;
|
2000-12-31 13:03:42 +00:00
|
|
|
enum md_types type;
|
2004-09-16 21:32:13 +00:00
|
|
|
off_t mediasize;
|
|
|
|
unsigned sectorsize;
|
2001-08-02 10:19:13 +00:00
|
|
|
unsigned opencount;
|
2003-04-09 11:59:29 +00:00
|
|
|
unsigned fwheads;
|
|
|
|
unsigned fwsectors;
|
2000-12-31 13:03:42 +00:00
|
|
|
unsigned flags;
|
2002-05-26 06:48:55 +00:00
|
|
|
char name[20];
|
2002-06-03 22:09:04 +00:00
|
|
|
struct proc *procp;
|
2003-01-12 21:16:49 +00:00
|
|
|
struct g_geom *gp;
|
|
|
|
struct g_provider *pp;
|
2005-09-19 06:55:27 +00:00
|
|
|
int (*start)(struct md_s *sc, struct bio *bp);
|
2007-11-07 22:47:41 +00:00
|
|
|
struct devstat *devstat;
|
1999-11-20 22:43:27 +00:00
|
|
|
|
|
|
|
/* MD_MALLOC related fields */
|
2002-05-25 20:44:20 +00:00
|
|
|
struct indir *indir;
|
2002-05-26 06:48:55 +00:00
|
|
|
uma_zone_t uma;
|
1999-09-21 11:00:49 +00:00
|
|
|
|
1999-11-20 22:43:27 +00:00
|
|
|
/* MD_PRELOAD related fields */
|
|
|
|
u_char *pl_ptr;
|
2004-09-16 21:32:13 +00:00
|
|
|
size_t pl_len;
|
1999-09-21 11:00:49 +00:00
|
|
|
|
2000-12-31 13:03:42 +00:00
|
|
|
/* MD_VNODE related fields */
|
|
|
|
struct vnode *vnode;
|
2004-11-06 13:07:02 +00:00
|
|
|
char file[PATH_MAX];
|
2000-12-31 13:03:42 +00:00
|
|
|
struct ucred *cred;
|
|
|
|
|
2001-08-16 03:04:49 +00:00
|
|
|
/* MD_SWAP related fields */
|
2000-12-31 13:03:42 +00:00
|
|
|
vm_object_t object;
|
|
|
|
};
|
1999-09-21 11:00:49 +00:00
|
|
|
|
2002-05-25 20:44:20 +00:00
|
|
|
static struct indir *
|
2003-08-07 15:04:27 +00:00
|
|
|
new_indir(u_int shift)
|
2002-05-25 20:44:20 +00:00
|
|
|
{
|
|
|
|
struct indir *ip;
|
|
|
|
|
2010-12-29 11:39:15 +00:00
|
|
|
ip = malloc(sizeof *ip, M_MD, (md_malloc_wait ? M_WAITOK : M_NOWAIT)
|
|
|
|
| M_ZERO);
|
2002-05-25 20:44:20 +00:00
|
|
|
if (ip == NULL)
|
2002-06-10 19:25:21 +00:00
|
|
|
return (NULL);
|
2002-05-25 20:44:20 +00:00
|
|
|
ip->array = malloc(sizeof(uintptr_t) * NINDIR,
|
2010-12-29 11:39:15 +00:00
|
|
|
M_MDSECT, (md_malloc_wait ? M_WAITOK : M_NOWAIT) | M_ZERO);
|
2002-05-25 20:44:20 +00:00
|
|
|
if (ip->array == NULL) {
|
|
|
|
free(ip, M_MD);
|
2002-06-10 19:25:21 +00:00
|
|
|
return (NULL);
|
2002-05-25 20:44:20 +00:00
|
|
|
}
|
|
|
|
ip->total = NINDIR;
|
|
|
|
ip->shift = shift;
|
2002-06-10 19:25:21 +00:00
|
|
|
return (ip);
|
2002-05-25 20:44:20 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
del_indir(struct indir *ip)
|
|
|
|
{
|
|
|
|
|
2002-05-26 06:48:55 +00:00
|
|
|
free(ip->array, M_MDSECT);
|
2002-05-25 20:44:20 +00:00
|
|
|
free(ip, M_MD);
|
|
|
|
}
|
|
|
|
|
2002-05-26 06:48:55 +00:00
|
|
|
static void
|
|
|
|
destroy_indir(struct md_s *sc, struct indir *ip)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < NINDIR; i++) {
|
|
|
|
if (!ip->array[i])
|
|
|
|
continue;
|
|
|
|
if (ip->shift)
|
|
|
|
destroy_indir(sc, (struct indir*)(ip->array[i]));
|
|
|
|
else if (ip->array[i] > 255)
|
|
|
|
uma_zfree(sc->uma, (void *)(ip->array[i]));
|
|
|
|
}
|
|
|
|
del_indir(ip);
|
|
|
|
}
|
|
|
|
|
2002-05-25 20:44:20 +00:00
|
|
|
/*
|
2005-12-28 15:18:18 +00:00
|
|
|
* This function does the math and allocates the top level "indir" structure
|
2002-05-25 20:44:20 +00:00
|
|
|
* for a device of "size" sectors.
|
|
|
|
*/
|
|
|
|
|
|
|
|
static struct indir *
|
|
|
|
dimension(off_t size)
|
|
|
|
{
|
|
|
|
off_t rcnt;
|
|
|
|
struct indir *ip;
|
2010-07-06 18:22:57 +00:00
|
|
|
int layer;
|
2002-05-25 20:44:20 +00:00
|
|
|
|
|
|
|
rcnt = size;
|
|
|
|
layer = 0;
|
|
|
|
while (rcnt > NINDIR) {
|
|
|
|
rcnt /= NINDIR;
|
|
|
|
layer++;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* XXX: the top layer is probably not fully populated, so we allocate
|
2003-03-27 10:14:36 +00:00
|
|
|
* too much space for ip->array in here.
|
2002-05-25 20:44:20 +00:00
|
|
|
*/
|
2003-03-27 10:14:36 +00:00
|
|
|
ip = malloc(sizeof *ip, M_MD, M_WAITOK | M_ZERO);
|
|
|
|
ip->array = malloc(sizeof(uintptr_t) * NINDIR,
|
|
|
|
M_MDSECT, M_WAITOK | M_ZERO);
|
|
|
|
ip->total = NINDIR;
|
|
|
|
ip->shift = layer * nshift;
|
2002-05-25 20:44:20 +00:00
|
|
|
return (ip);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Read a given sector
|
|
|
|
*/
|
|
|
|
|
|
|
|
static uintptr_t
|
|
|
|
s_read(struct indir *ip, off_t offset)
|
|
|
|
{
|
|
|
|
struct indir *cip;
|
|
|
|
int idx;
|
|
|
|
uintptr_t up;
|
|
|
|
|
|
|
|
if (md_debug > 1)
|
2002-06-24 12:07:02 +00:00
|
|
|
printf("s_read(%jd)\n", (intmax_t)offset);
|
2002-05-25 20:44:20 +00:00
|
|
|
up = 0;
|
|
|
|
for (cip = ip; cip != NULL;) {
|
|
|
|
if (cip->shift) {
|
|
|
|
idx = (offset >> cip->shift) & NMASK;
|
|
|
|
up = cip->array[idx];
|
|
|
|
cip = (struct indir *)up;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
idx = offset & NMASK;
|
2002-06-10 19:25:21 +00:00
|
|
|
return (cip->array[idx]);
|
2002-05-25 20:44:20 +00:00
|
|
|
}
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Write a given sector, prune the tree if the value is 0
|
|
|
|
*/
|
|
|
|
|
|
|
|
static int
|
2002-05-26 09:38:51 +00:00
|
|
|
s_write(struct indir *ip, off_t offset, uintptr_t ptr)
|
2002-05-25 20:44:20 +00:00
|
|
|
{
|
|
|
|
struct indir *cip, *lip[10];
|
|
|
|
int idx, li;
|
|
|
|
uintptr_t up;
|
|
|
|
|
|
|
|
if (md_debug > 1)
|
2002-06-24 12:07:02 +00:00
|
|
|
printf("s_write(%jd, %p)\n", (intmax_t)offset, (void *)ptr);
|
2002-05-25 20:44:20 +00:00
|
|
|
up = 0;
|
|
|
|
li = 0;
|
|
|
|
cip = ip;
|
|
|
|
for (;;) {
|
|
|
|
lip[li++] = cip;
|
|
|
|
if (cip->shift) {
|
|
|
|
idx = (offset >> cip->shift) & NMASK;
|
|
|
|
up = cip->array[idx];
|
|
|
|
if (up != 0) {
|
|
|
|
cip = (struct indir *)up;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
/* Allocate branch */
|
|
|
|
cip->array[idx] =
|
|
|
|
(uintptr_t)new_indir(cip->shift - nshift);
|
|
|
|
if (cip->array[idx] == 0)
|
2002-10-20 20:50:31 +00:00
|
|
|
return (ENOSPC);
|
2002-05-25 20:44:20 +00:00
|
|
|
cip->used++;
|
|
|
|
up = cip->array[idx];
|
|
|
|
cip = (struct indir *)up;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
/* leafnode */
|
|
|
|
idx = offset & NMASK;
|
|
|
|
up = cip->array[idx];
|
|
|
|
if (up != 0)
|
|
|
|
cip->used--;
|
|
|
|
cip->array[idx] = ptr;
|
|
|
|
if (ptr != 0)
|
|
|
|
cip->used++;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (cip->used != 0 || li == 1)
|
|
|
|
return (0);
|
|
|
|
li--;
|
|
|
|
while (cip->used == 0 && cip != ip) {
|
|
|
|
li--;
|
|
|
|
idx = (offset >> lip[li]->shift) & NMASK;
|
|
|
|
up = lip[li]->array[idx];
|
|
|
|
KASSERT(up == (uintptr_t)cip, ("md screwed up"));
|
|
|
|
del_indir(cip);
|
2002-08-22 21:24:01 +00:00
|
|
|
lip[li]->array[idx] = 0;
|
2002-05-25 20:44:20 +00:00
|
|
|
lip[li]->used--;
|
|
|
|
cip = lip[li];
|
|
|
|
}
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
2003-01-12 21:16:49 +00:00
|
|
|
|
|
|
|
static int
|
|
|
|
g_md_access(struct g_provider *pp, int r, int w, int e)
|
|
|
|
{
|
|
|
|
struct md_s *sc;
|
|
|
|
|
|
|
|
sc = pp->geom->softc;
|
2008-12-16 20:59:27 +00:00
|
|
|
if (sc == NULL) {
|
|
|
|
if (r <= 0 && w <= 0 && e <= 0)
|
|
|
|
return (0);
|
2003-05-02 06:18:58 +00:00
|
|
|
return (ENXIO);
|
2008-12-16 20:59:27 +00:00
|
|
|
}
|
2003-01-12 21:16:49 +00:00
|
|
|
r += pp->acr;
|
|
|
|
w += pp->acw;
|
|
|
|
e += pp->ace;
|
2005-08-17 01:24:55 +00:00
|
|
|
if ((sc->flags & MD_READONLY) != 0 && w > 0)
|
|
|
|
return (EROFS);
|
2003-01-12 21:16:49 +00:00
|
|
|
if ((pp->acr + pp->acw + pp->ace) == 0 && (r + w + e) > 0) {
|
|
|
|
sc->opencount = 1;
|
|
|
|
} else if ((pp->acr + pp->acw + pp->ace) > 0 && (r + w + e) == 0) {
|
|
|
|
sc->opencount = 0;
|
|
|
|
}
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
g_md_start(struct bio *bp)
|
|
|
|
{
|
|
|
|
struct md_s *sc;
|
|
|
|
|
|
|
|
sc = bp->bio_to->geom->softc;
|
Merge GEOM direct dispatch changes from the projects/camlock branch.
When safety requirements are met, it allows to avoid passing I/O requests
to GEOM g_up/g_down thread, executing them directly in the caller context.
That allows to avoid CPU bottlenecks in g_up/g_down threads, plus avoid
several context switches per I/O.
The defined now safety requirements are:
- caller should not hold any locks and should be reenterable;
- callee should not depend on GEOM dual-threaded concurency semantics;
- on the way down, if request is unmapped while callee doesn't support it,
the context should be sleepable;
- kernel thread stack usage should be below 50%.
To keep compatibility with GEOM classes not meeting above requirements
new provider and consumer flags added:
- G_CF_DIRECT_SEND -- consumer code meets caller requirements (request);
- G_CF_DIRECT_RECEIVE -- consumer code meets callee requirements (done);
- G_PF_DIRECT_SEND -- provider code meets caller requirements (done);
- G_PF_DIRECT_RECEIVE -- provider code meets callee requirements (request).
Capable GEOM class can set them, allowing direct dispatch in cases where
it is safe. If any of requirements are not met, request is queued to
g_up or g_down thread same as before.
Such GEOM classes were reviewed and updated to support direct dispatch:
CONCAT, DEV, DISK, GATE, MD, MIRROR, MULTIPATH, NOP, PART, RAID, STRIPE,
VFS, ZERO, ZFS::VDEV, ZFS::ZVOL, all classes based on g_slice KPI (LABEL,
MAP, FLASHMAP, etc).
To declare direct completion capability disk(9) KPI got new flag equivalent
to G_PF_DIRECT_SEND -- DISKFLAG_DIRECT_COMPLETION. da(4) and ada(4) disk
drivers got it set now thanks to earlier CAM locking work.
This change more then twice increases peak block storage performance on
systems with manu CPUs, together with earlier CAM locking changes reaching
more then 1 million IOPS (512 byte raw reads from 16 SATA SSDs on 4 HBAs to
256 user-level threads).
Sponsored by: iXsystems, Inc.
MFC after: 2 months
2013-10-22 08:22:19 +00:00
|
|
|
if ((bp->bio_cmd == BIO_READ) || (bp->bio_cmd == BIO_WRITE)) {
|
|
|
|
mtx_lock(&sc->stat_mtx);
|
2007-11-07 22:47:41 +00:00
|
|
|
devstat_start_transaction_bio(sc->devstat, bp);
|
Merge GEOM direct dispatch changes from the projects/camlock branch.
When safety requirements are met, it allows to avoid passing I/O requests
to GEOM g_up/g_down thread, executing them directly in the caller context.
That allows to avoid CPU bottlenecks in g_up/g_down threads, plus avoid
several context switches per I/O.
The defined now safety requirements are:
- caller should not hold any locks and should be reenterable;
- callee should not depend on GEOM dual-threaded concurency semantics;
- on the way down, if request is unmapped while callee doesn't support it,
the context should be sleepable;
- kernel thread stack usage should be below 50%.
To keep compatibility with GEOM classes not meeting above requirements
new provider and consumer flags added:
- G_CF_DIRECT_SEND -- consumer code meets caller requirements (request);
- G_CF_DIRECT_RECEIVE -- consumer code meets callee requirements (done);
- G_PF_DIRECT_SEND -- provider code meets caller requirements (done);
- G_PF_DIRECT_RECEIVE -- provider code meets callee requirements (request).
Capable GEOM class can set them, allowing direct dispatch in cases where
it is safe. If any of requirements are not met, request is queued to
g_up or g_down thread same as before.
Such GEOM classes were reviewed and updated to support direct dispatch:
CONCAT, DEV, DISK, GATE, MD, MIRROR, MULTIPATH, NOP, PART, RAID, STRIPE,
VFS, ZERO, ZFS::VDEV, ZFS::ZVOL, all classes based on g_slice KPI (LABEL,
MAP, FLASHMAP, etc).
To declare direct completion capability disk(9) KPI got new flag equivalent
to G_PF_DIRECT_SEND -- DISKFLAG_DIRECT_COMPLETION. da(4) and ada(4) disk
drivers got it set now thanks to earlier CAM locking work.
This change more then twice increases peak block storage performance on
systems with manu CPUs, together with earlier CAM locking changes reaching
more then 1 million IOPS (512 byte raw reads from 16 SATA SSDs on 4 HBAs to
256 user-level threads).
Sponsored by: iXsystems, Inc.
MFC after: 2 months
2013-10-22 08:22:19 +00:00
|
|
|
mtx_unlock(&sc->stat_mtx);
|
|
|
|
}
|
2003-01-13 08:50:23 +00:00
|
|
|
mtx_lock(&sc->queue_mtx);
|
2003-04-01 15:06:26 +00:00
|
|
|
bioq_disksort(&sc->bio_queue, bp);
|
2004-09-16 18:59:19 +00:00
|
|
|
mtx_unlock(&sc->queue_mtx);
|
2004-09-18 09:16:19 +00:00
|
|
|
wakeup(sc);
|
2003-01-12 21:16:49 +00:00
|
|
|
}
|
|
|
|
|
2013-03-19 14:53:23 +00:00
|
|
|
#define MD_MALLOC_MOVE_ZERO 1
|
|
|
|
#define MD_MALLOC_MOVE_FILL 2
|
|
|
|
#define MD_MALLOC_MOVE_READ 3
|
|
|
|
#define MD_MALLOC_MOVE_WRITE 4
|
|
|
|
#define MD_MALLOC_MOVE_CMP 5
|
|
|
|
|
|
|
|
static int
|
|
|
|
md_malloc_move(vm_page_t **mp, int *ma_offs, unsigned sectorsize,
|
|
|
|
void *ptr, u_char fill, int op)
|
|
|
|
{
|
|
|
|
struct sf_buf *sf;
|
|
|
|
vm_page_t m, *mp1;
|
|
|
|
char *p, first;
|
|
|
|
off_t *uc;
|
|
|
|
unsigned n;
|
|
|
|
int error, i, ma_offs1, sz, first_read;
|
|
|
|
|
|
|
|
m = NULL;
|
|
|
|
error = 0;
|
|
|
|
sf = NULL;
|
|
|
|
/* if (op == MD_MALLOC_MOVE_CMP) { gcc */
|
|
|
|
first = 0;
|
|
|
|
first_read = 0;
|
|
|
|
uc = ptr;
|
|
|
|
mp1 = *mp;
|
|
|
|
ma_offs1 = *ma_offs;
|
|
|
|
/* } */
|
|
|
|
sched_pin();
|
|
|
|
for (n = sectorsize; n != 0; n -= sz) {
|
|
|
|
sz = imin(PAGE_SIZE - *ma_offs, n);
|
|
|
|
if (m != **mp) {
|
|
|
|
if (sf != NULL)
|
|
|
|
sf_buf_free(sf);
|
|
|
|
m = **mp;
|
|
|
|
sf = sf_buf_alloc(m, SFB_CPUPRIVATE |
|
|
|
|
(md_malloc_wait ? 0 : SFB_NOWAIT));
|
|
|
|
if (sf == NULL) {
|
|
|
|
error = ENOMEM;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
p = (char *)sf_buf_kva(sf) + *ma_offs;
|
|
|
|
switch (op) {
|
|
|
|
case MD_MALLOC_MOVE_ZERO:
|
|
|
|
bzero(p, sz);
|
|
|
|
break;
|
|
|
|
case MD_MALLOC_MOVE_FILL:
|
|
|
|
memset(p, fill, sz);
|
|
|
|
break;
|
|
|
|
case MD_MALLOC_MOVE_READ:
|
|
|
|
bcopy(ptr, p, sz);
|
|
|
|
cpu_flush_dcache(p, sz);
|
|
|
|
break;
|
|
|
|
case MD_MALLOC_MOVE_WRITE:
|
|
|
|
bcopy(p, ptr, sz);
|
|
|
|
break;
|
|
|
|
case MD_MALLOC_MOVE_CMP:
|
|
|
|
for (i = 0; i < sz; i++, p++) {
|
|
|
|
if (!first_read) {
|
|
|
|
*uc = (u_char)*p;
|
|
|
|
first = *p;
|
|
|
|
first_read = 1;
|
|
|
|
} else if (*p != first) {
|
|
|
|
error = EDOOFUS;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
KASSERT(0, ("md_malloc_move unknown op %d\n", op));
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (error != 0)
|
|
|
|
break;
|
|
|
|
*ma_offs += sz;
|
|
|
|
*ma_offs %= PAGE_SIZE;
|
|
|
|
if (*ma_offs == 0)
|
|
|
|
(*mp)++;
|
|
|
|
ptr = (char *)ptr + sz;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (sf != NULL)
|
|
|
|
sf_buf_free(sf);
|
|
|
|
sched_unpin();
|
|
|
|
if (op == MD_MALLOC_MOVE_CMP && error != 0) {
|
|
|
|
*mp = mp1;
|
|
|
|
*ma_offs = ma_offs1;
|
|
|
|
}
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
2002-01-21 20:50:06 +00:00
|
|
|
static int
|
|
|
|
mdstart_malloc(struct md_s *sc, struct bio *bp)
|
1999-09-21 11:00:49 +00:00
|
|
|
{
|
2002-05-25 20:44:20 +00:00
|
|
|
u_char *dst;
|
2013-03-19 14:53:23 +00:00
|
|
|
vm_page_t *m;
|
|
|
|
int i, error, error1, ma_offs, notmapped;
|
2004-09-16 21:32:13 +00:00
|
|
|
off_t secno, nsec, uc;
|
2002-05-25 20:44:20 +00:00
|
|
|
uintptr_t sp, osp;
|
2002-01-21 20:57:03 +00:00
|
|
|
|
2006-11-01 18:56:18 +00:00
|
|
|
switch (bp->bio_cmd) {
|
|
|
|
case BIO_READ:
|
|
|
|
case BIO_WRITE:
|
|
|
|
case BIO_DELETE:
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
return (EOPNOTSUPP);
|
|
|
|
}
|
|
|
|
|
2013-03-19 14:53:23 +00:00
|
|
|
notmapped = (bp->bio_flags & BIO_UNMAPPED) != 0;
|
|
|
|
if (notmapped) {
|
|
|
|
m = bp->bio_ma;
|
|
|
|
ma_offs = bp->bio_ma_offset;
|
|
|
|
dst = NULL;
|
|
|
|
} else {
|
|
|
|
dst = bp->bio_data;
|
|
|
|
}
|
|
|
|
|
2004-09-16 21:32:13 +00:00
|
|
|
nsec = bp->bio_length / sc->sectorsize;
|
|
|
|
secno = bp->bio_offset / sc->sectorsize;
|
2002-05-25 20:44:20 +00:00
|
|
|
error = 0;
|
2002-01-21 20:57:03 +00:00
|
|
|
while (nsec--) {
|
2002-05-26 09:38:51 +00:00
|
|
|
osp = s_read(sc->indir, secno);
|
2002-01-21 20:57:03 +00:00
|
|
|
if (bp->bio_cmd == BIO_DELETE) {
|
2002-05-26 09:38:51 +00:00
|
|
|
if (osp != 0)
|
|
|
|
error = s_write(sc->indir, secno, 0);
|
2002-01-21 20:57:03 +00:00
|
|
|
} else if (bp->bio_cmd == BIO_READ) {
|
2013-03-19 14:53:23 +00:00
|
|
|
if (osp == 0) {
|
|
|
|
if (notmapped) {
|
|
|
|
error = md_malloc_move(&m, &ma_offs,
|
|
|
|
sc->sectorsize, NULL, 0,
|
|
|
|
MD_MALLOC_MOVE_ZERO);
|
|
|
|
} else
|
|
|
|
bzero(dst, sc->sectorsize);
|
|
|
|
} else if (osp <= 255) {
|
|
|
|
if (notmapped) {
|
|
|
|
error = md_malloc_move(&m, &ma_offs,
|
|
|
|
sc->sectorsize, NULL, osp,
|
|
|
|
MD_MALLOC_MOVE_FILL);
|
|
|
|
} else
|
|
|
|
memset(dst, osp, sc->sectorsize);
|
|
|
|
} else {
|
|
|
|
if (notmapped) {
|
|
|
|
error = md_malloc_move(&m, &ma_offs,
|
|
|
|
sc->sectorsize, (void *)osp, 0,
|
|
|
|
MD_MALLOC_MOVE_READ);
|
|
|
|
} else {
|
|
|
|
bcopy((void *)osp, dst, sc->sectorsize);
|
|
|
|
cpu_flush_dcache(dst, sc->sectorsize);
|
|
|
|
}
|
2009-05-18 18:37:18 +00:00
|
|
|
}
|
2002-05-26 09:38:51 +00:00
|
|
|
osp = 0;
|
2002-05-25 20:44:20 +00:00
|
|
|
} else if (bp->bio_cmd == BIO_WRITE) {
|
2002-01-21 20:57:03 +00:00
|
|
|
if (sc->flags & MD_COMPRESS) {
|
2013-03-19 14:53:23 +00:00
|
|
|
if (notmapped) {
|
|
|
|
error1 = md_malloc_move(&m, &ma_offs,
|
|
|
|
sc->sectorsize, &uc, 0,
|
|
|
|
MD_MALLOC_MOVE_CMP);
|
|
|
|
i = error1 == 0 ? sc->sectorsize : 0;
|
|
|
|
} else {
|
|
|
|
uc = dst[0];
|
|
|
|
for (i = 1; i < sc->sectorsize; i++) {
|
|
|
|
if (dst[i] != uc)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2002-01-21 20:57:03 +00:00
|
|
|
} else {
|
|
|
|
i = 0;
|
|
|
|
uc = 0;
|
|
|
|
}
|
2004-09-16 21:32:13 +00:00
|
|
|
if (i == sc->sectorsize) {
|
2002-05-26 09:38:51 +00:00
|
|
|
if (osp != uc)
|
|
|
|
error = s_write(sc->indir, secno, uc);
|
1999-11-26 20:08:44 +00:00
|
|
|
} else {
|
2002-05-26 09:38:51 +00:00
|
|
|
if (osp <= 255) {
|
2004-09-16 21:32:13 +00:00
|
|
|
sp = (uintptr_t)uma_zalloc(sc->uma,
|
2010-12-29 11:39:15 +00:00
|
|
|
md_malloc_wait ? M_WAITOK :
|
2004-09-16 21:32:13 +00:00
|
|
|
M_NOWAIT);
|
2002-05-26 09:38:51 +00:00
|
|
|
if (sp == 0) {
|
|
|
|
error = ENOSPC;
|
|
|
|
break;
|
|
|
|
}
|
2013-03-19 14:53:23 +00:00
|
|
|
if (notmapped) {
|
|
|
|
error = md_malloc_move(&m,
|
|
|
|
&ma_offs, sc->sectorsize,
|
|
|
|
(void *)sp, 0,
|
|
|
|
MD_MALLOC_MOVE_WRITE);
|
|
|
|
} else {
|
|
|
|
bcopy(dst, (void *)sp,
|
|
|
|
sc->sectorsize);
|
|
|
|
}
|
2002-05-26 09:38:51 +00:00
|
|
|
error = s_write(sc->indir, secno, sp);
|
2002-05-25 20:44:20 +00:00
|
|
|
} else {
|
2013-03-19 14:53:23 +00:00
|
|
|
if (notmapped) {
|
|
|
|
error = md_malloc_move(&m,
|
|
|
|
&ma_offs, sc->sectorsize,
|
|
|
|
(void *)osp, 0,
|
|
|
|
MD_MALLOC_MOVE_WRITE);
|
|
|
|
} else {
|
|
|
|
bcopy(dst, (void *)osp,
|
|
|
|
sc->sectorsize);
|
|
|
|
}
|
2002-05-26 09:38:51 +00:00
|
|
|
osp = 0;
|
2002-05-25 20:44:20 +00:00
|
|
|
}
|
1999-09-21 11:00:49 +00:00
|
|
|
}
|
2002-05-25 20:44:20 +00:00
|
|
|
} else {
|
|
|
|
error = EOPNOTSUPP;
|
1999-09-21 11:00:49 +00:00
|
|
|
}
|
2002-05-25 20:44:20 +00:00
|
|
|
if (osp > 255)
|
2002-05-26 06:48:55 +00:00
|
|
|
uma_zfree(sc->uma, (void*)osp);
|
2004-11-06 13:16:35 +00:00
|
|
|
if (error != 0)
|
2002-05-25 20:44:20 +00:00
|
|
|
break;
|
2002-01-21 20:57:03 +00:00
|
|
|
secno++;
|
2013-03-19 14:53:23 +00:00
|
|
|
if (!notmapped)
|
|
|
|
dst += sc->sectorsize;
|
2002-01-21 20:57:03 +00:00
|
|
|
}
|
|
|
|
bp->bio_resid = 0;
|
2002-05-25 20:44:20 +00:00
|
|
|
return (error);
|
1999-09-21 11:00:49 +00:00
|
|
|
}
|
|
|
|
|
2002-01-21 20:50:06 +00:00
|
|
|
static int
|
|
|
|
mdstart_preload(struct md_s *sc, struct bio *bp)
|
1999-11-26 20:08:44 +00:00
|
|
|
{
|
|
|
|
|
2004-09-16 19:42:17 +00:00
|
|
|
switch (bp->bio_cmd) {
|
|
|
|
case BIO_READ:
|
|
|
|
bcopy(sc->pl_ptr + bp->bio_offset, bp->bio_data,
|
|
|
|
bp->bio_length);
|
2009-05-18 18:37:18 +00:00
|
|
|
cpu_flush_dcache(bp->bio_data, bp->bio_length);
|
2004-09-16 19:42:17 +00:00
|
|
|
break;
|
|
|
|
case BIO_WRITE:
|
|
|
|
bcopy(bp->bio_data, sc->pl_ptr + bp->bio_offset,
|
|
|
|
bp->bio_length);
|
|
|
|
break;
|
2002-01-21 20:57:03 +00:00
|
|
|
}
|
|
|
|
bp->bio_resid = 0;
|
2002-01-21 20:50:06 +00:00
|
|
|
return (0);
|
1999-11-26 20:08:44 +00:00
|
|
|
}
|
|
|
|
|
2002-01-21 20:50:06 +00:00
|
|
|
static int
|
|
|
|
mdstart_vnode(struct md_s *sc, struct bio *bp)
|
2000-12-31 13:03:42 +00:00
|
|
|
{
|
2012-10-22 17:50:54 +00:00
|
|
|
int error;
|
2000-12-31 13:03:42 +00:00
|
|
|
struct uio auio;
|
|
|
|
struct iovec aiov;
|
|
|
|
struct mount *mp;
|
2006-11-01 18:56:18 +00:00
|
|
|
struct vnode *vp;
|
2013-03-19 14:53:23 +00:00
|
|
|
struct buf *pb;
|
2006-11-01 18:56:18 +00:00
|
|
|
struct thread *td;
|
2011-04-29 21:18:41 +00:00
|
|
|
off_t end, zerosize;
|
2006-11-01 18:56:18 +00:00
|
|
|
|
|
|
|
switch (bp->bio_cmd) {
|
|
|
|
case BIO_READ:
|
|
|
|
case BIO_WRITE:
|
2011-04-29 21:18:41 +00:00
|
|
|
case BIO_DELETE:
|
2006-11-01 18:56:18 +00:00
|
|
|
case BIO_FLUSH:
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
return (EOPNOTSUPP);
|
|
|
|
}
|
|
|
|
|
|
|
|
td = curthread;
|
|
|
|
vp = sc->vnode;
|
2000-12-31 13:03:42 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* VNODE I/O
|
|
|
|
*
|
2002-01-21 20:57:03 +00:00
|
|
|
* If an error occurs, we set BIO_ERROR but we do not set
|
|
|
|
* B_INVAL because (for a write anyway), the buffer is
|
2000-12-31 13:03:42 +00:00
|
|
|
* still valid.
|
|
|
|
*/
|
|
|
|
|
2006-11-01 18:56:18 +00:00
|
|
|
if (bp->bio_cmd == BIO_FLUSH) {
|
|
|
|
(void) vn_start_write(vp, &mp, V_WAIT);
|
2008-01-10 01:10:58 +00:00
|
|
|
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
|
2006-11-01 18:56:18 +00:00
|
|
|
error = VOP_FSYNC(vp, MNT_WAIT, td);
|
2008-01-13 14:44:15 +00:00
|
|
|
VOP_UNLOCK(vp, 0);
|
2006-11-01 18:56:18 +00:00
|
|
|
vn_finished_write(mp);
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
2002-01-21 20:57:03 +00:00
|
|
|
bzero(&auio, sizeof(auio));
|
2000-12-31 13:03:42 +00:00
|
|
|
|
2011-04-29 21:18:41 +00:00
|
|
|
/*
|
|
|
|
* Special case for BIO_DELETE. On the surface, this is very
|
|
|
|
* similar to BIO_WRITE, except that we write from our own
|
|
|
|
* fixed-length buffer, so we have to loop. The net result is
|
|
|
|
* that the two cases end up having very little in common.
|
|
|
|
*/
|
|
|
|
if (bp->bio_cmd == BIO_DELETE) {
|
2011-05-13 18:48:00 +00:00
|
|
|
zerosize = ZERO_REGION_SIZE -
|
|
|
|
(ZERO_REGION_SIZE % sc->sectorsize);
|
2011-04-29 21:18:41 +00:00
|
|
|
auio.uio_iov = &aiov;
|
|
|
|
auio.uio_iovcnt = 1;
|
|
|
|
auio.uio_offset = (vm_ooffset_t)bp->bio_offset;
|
|
|
|
auio.uio_segflg = UIO_SYSSPACE;
|
|
|
|
auio.uio_rw = UIO_WRITE;
|
|
|
|
auio.uio_td = td;
|
|
|
|
end = bp->bio_offset + bp->bio_length;
|
|
|
|
(void) vn_start_write(vp, &mp, V_WAIT);
|
|
|
|
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
|
|
|
|
error = 0;
|
|
|
|
while (auio.uio_offset < end) {
|
2011-05-13 18:48:00 +00:00
|
|
|
aiov.iov_base = __DECONST(void *, zero_region);
|
2011-04-29 21:18:41 +00:00
|
|
|
aiov.iov_len = end - auio.uio_offset;
|
|
|
|
if (aiov.iov_len > zerosize)
|
|
|
|
aiov.iov_len = zerosize;
|
|
|
|
auio.uio_resid = aiov.iov_len;
|
|
|
|
error = VOP_WRITE(vp, &auio,
|
|
|
|
sc->flags & MD_ASYNC ? 0 : IO_SYNC, sc->cred);
|
|
|
|
if (error != 0)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
VOP_UNLOCK(vp, 0);
|
|
|
|
vn_finished_write(mp);
|
|
|
|
bp->bio_resid = end - auio.uio_offset;
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
2013-03-19 14:53:23 +00:00
|
|
|
if ((bp->bio_flags & BIO_UNMAPPED) == 0) {
|
|
|
|
pb = NULL;
|
|
|
|
aiov.iov_base = bp->bio_data;
|
|
|
|
} else {
|
2013-12-10 20:52:31 +00:00
|
|
|
KASSERT(bp->bio_length <= MAXPHYS, ("bio_length %jd",
|
|
|
|
(uintmax_t)bp->bio_length));
|
2013-03-19 14:53:23 +00:00
|
|
|
pb = getpbuf(&md_vnode_pbuf_freecnt);
|
|
|
|
pmap_qenter((vm_offset_t)pb->b_data, bp->bio_ma, bp->bio_ma_n);
|
|
|
|
aiov.iov_base = (void *)((vm_offset_t)pb->b_data +
|
|
|
|
bp->bio_ma_offset);
|
|
|
|
}
|
2004-09-16 19:42:17 +00:00
|
|
|
aiov.iov_len = bp->bio_length;
|
2002-01-21 20:57:03 +00:00
|
|
|
auio.uio_iov = &aiov;
|
|
|
|
auio.uio_iovcnt = 1;
|
2004-09-16 19:42:17 +00:00
|
|
|
auio.uio_offset = (vm_ooffset_t)bp->bio_offset;
|
2002-01-21 20:57:03 +00:00
|
|
|
auio.uio_segflg = UIO_SYSSPACE;
|
2006-11-01 18:59:06 +00:00
|
|
|
if (bp->bio_cmd == BIO_READ)
|
2002-01-21 20:57:03 +00:00
|
|
|
auio.uio_rw = UIO_READ;
|
2006-11-01 18:59:06 +00:00
|
|
|
else if (bp->bio_cmd == BIO_WRITE)
|
2002-01-21 20:57:03 +00:00
|
|
|
auio.uio_rw = UIO_WRITE;
|
2003-08-05 06:54:44 +00:00
|
|
|
else
|
|
|
|
panic("wrong BIO_OP in mdstart_vnode");
|
2004-09-16 19:42:17 +00:00
|
|
|
auio.uio_resid = bp->bio_length;
|
2006-11-01 18:56:18 +00:00
|
|
|
auio.uio_td = td;
|
2002-01-21 20:57:03 +00:00
|
|
|
/*
|
|
|
|
* When reading set IO_DIRECT to try to avoid double-caching
|
2003-05-31 16:42:45 +00:00
|
|
|
* the data. When writing IO_DIRECT is not optimal.
|
2002-01-21 20:57:03 +00:00
|
|
|
*/
|
|
|
|
if (bp->bio_cmd == BIO_READ) {
|
2008-01-10 01:10:58 +00:00
|
|
|
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
|
2006-11-01 18:56:18 +00:00
|
|
|
error = VOP_READ(vp, &auio, IO_DIRECT, sc->cred);
|
2008-01-13 14:44:15 +00:00
|
|
|
VOP_UNLOCK(vp, 0);
|
2002-01-21 20:57:03 +00:00
|
|
|
} else {
|
2006-11-01 18:56:18 +00:00
|
|
|
(void) vn_start_write(vp, &mp, V_WAIT);
|
2008-01-10 01:10:58 +00:00
|
|
|
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
|
2006-11-01 18:56:18 +00:00
|
|
|
error = VOP_WRITE(vp, &auio, sc->flags & MD_ASYNC ? 0 : IO_SYNC,
|
|
|
|
sc->cred);
|
2008-01-13 14:44:15 +00:00
|
|
|
VOP_UNLOCK(vp, 0);
|
2002-01-21 20:57:03 +00:00
|
|
|
vn_finished_write(mp);
|
|
|
|
}
|
2013-03-19 14:53:23 +00:00
|
|
|
if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
|
|
|
|
pmap_qremove((vm_offset_t)pb->b_data, bp->bio_ma_n);
|
|
|
|
relpbuf(pb, &md_vnode_pbuf_freecnt);
|
|
|
|
}
|
2002-01-21 20:57:03 +00:00
|
|
|
bp->bio_resid = auio.uio_resid;
|
2002-01-21 20:50:06 +00:00
|
|
|
return (error);
|
2000-12-31 13:03:42 +00:00
|
|
|
}
|
|
|
|
|
2002-01-21 20:50:06 +00:00
|
|
|
static int
|
|
|
|
mdstart_swap(struct md_s *sc, struct bio *bp)
|
2000-12-31 13:03:42 +00:00
|
|
|
{
|
2004-03-19 21:19:15 +00:00
|
|
|
vm_page_t m;
|
|
|
|
u_char *p;
|
2013-03-19 14:53:23 +00:00
|
|
|
vm_pindex_t i, lastp;
|
|
|
|
int rv, ma_offs, offs, len, lastend;
|
2004-03-19 21:19:15 +00:00
|
|
|
|
2006-11-01 18:56:18 +00:00
|
|
|
switch (bp->bio_cmd) {
|
|
|
|
case BIO_READ:
|
|
|
|
case BIO_WRITE:
|
|
|
|
case BIO_DELETE:
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
return (EOPNOTSUPP);
|
|
|
|
}
|
|
|
|
|
2004-03-19 21:19:15 +00:00
|
|
|
p = bp->bio_data;
|
2013-03-19 14:53:23 +00:00
|
|
|
ma_offs = (bp->bio_flags & BIO_UNMAPPED) == 0 ? 0 : bp->bio_ma_offset;
|
2004-03-19 21:19:15 +00:00
|
|
|
|
|
|
|
/*
|
2005-12-28 15:18:18 +00:00
|
|
|
* offs is the offset at which to start operating on the
|
2004-03-19 21:19:15 +00:00
|
|
|
* next (ie, first) page. lastp is the last page on
|
|
|
|
* which we're going to operate. lastend is the ending
|
|
|
|
* position within that last page (ie, PAGE_SIZE if
|
|
|
|
* we're operating on complete aligned pages).
|
|
|
|
*/
|
|
|
|
offs = bp->bio_offset % PAGE_SIZE;
|
|
|
|
lastp = (bp->bio_offset + bp->bio_length - 1) / PAGE_SIZE;
|
|
|
|
lastend = (bp->bio_offset + bp->bio_length - 1) % PAGE_SIZE + 1;
|
2004-02-29 15:58:54 +00:00
|
|
|
|
2004-10-12 04:47:16 +00:00
|
|
|
rv = VM_PAGER_OK;
|
2013-03-09 02:32:23 +00:00
|
|
|
VM_OBJECT_WLOCK(sc->object);
|
2004-03-19 21:19:15 +00:00
|
|
|
vm_object_pip_add(sc->object, 1);
|
|
|
|
for (i = bp->bio_offset / PAGE_SIZE; i <= lastp; i++) {
|
|
|
|
len = ((i == lastp) ? lastend : PAGE_SIZE) - offs;
|
2013-08-30 20:12:23 +00:00
|
|
|
m = vm_page_grab(sc->object, i, VM_ALLOC_SYSTEM);
|
2004-03-19 21:19:15 +00:00
|
|
|
if (bp->bio_cmd == BIO_READ) {
|
2013-05-24 09:48:42 +00:00
|
|
|
if (m->valid == VM_PAGE_BITS_ALL)
|
|
|
|
rv = VM_PAGER_OK;
|
|
|
|
else
|
2004-03-19 21:19:15 +00:00
|
|
|
rv = vm_pager_get_pages(sc->object, &m, 1, 0);
|
2004-10-12 04:47:16 +00:00
|
|
|
if (rv == VM_PAGER_ERROR) {
|
2013-08-09 11:11:11 +00:00
|
|
|
vm_page_xunbusy(m);
|
2004-10-12 04:47:16 +00:00
|
|
|
break;
|
2012-11-08 03:17:41 +00:00
|
|
|
} else if (rv == VM_PAGER_FAIL) {
|
|
|
|
/*
|
|
|
|
* Pager does not have the page. Zero
|
|
|
|
* the allocated page, and mark it as
|
|
|
|
* valid. Do not set dirty, the page
|
|
|
|
* can be recreated if thrown out.
|
|
|
|
*/
|
2013-03-19 14:53:23 +00:00
|
|
|
pmap_zero_page(m);
|
2012-11-08 03:17:41 +00:00
|
|
|
m->valid = VM_PAGE_BITS_ALL;
|
2004-10-12 04:47:16 +00:00
|
|
|
}
|
2013-03-19 14:53:23 +00:00
|
|
|
if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
|
|
|
|
pmap_copy_pages(&m, offs, bp->bio_ma,
|
|
|
|
ma_offs, len);
|
|
|
|
} else {
|
|
|
|
physcopyout(VM_PAGE_TO_PHYS(m) + offs, p, len);
|
|
|
|
cpu_flush_dcache(p, len);
|
|
|
|
}
|
2004-03-19 21:19:15 +00:00
|
|
|
} else if (bp->bio_cmd == BIO_WRITE) {
|
|
|
|
if (len != PAGE_SIZE && m->valid != VM_PAGE_BITS_ALL)
|
|
|
|
rv = vm_pager_get_pages(sc->object, &m, 1, 0);
|
2013-05-24 09:48:42 +00:00
|
|
|
else
|
|
|
|
rv = VM_PAGER_OK;
|
2004-10-12 04:47:16 +00:00
|
|
|
if (rv == VM_PAGER_ERROR) {
|
2013-08-09 11:11:11 +00:00
|
|
|
vm_page_xunbusy(m);
|
2004-10-12 04:47:16 +00:00
|
|
|
break;
|
|
|
|
}
|
2013-03-19 14:53:23 +00:00
|
|
|
if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
|
|
|
|
pmap_copy_pages(bp->bio_ma, ma_offs, &m,
|
|
|
|
offs, len);
|
|
|
|
} else {
|
|
|
|
physcopyin(p, VM_PAGE_TO_PHYS(m) + offs, len);
|
|
|
|
}
|
2004-03-19 21:19:15 +00:00
|
|
|
m->valid = VM_PAGE_BITS_ALL;
|
|
|
|
} else if (bp->bio_cmd == BIO_DELETE) {
|
|
|
|
if (len != PAGE_SIZE && m->valid != VM_PAGE_BITS_ALL)
|
|
|
|
rv = vm_pager_get_pages(sc->object, &m, 1, 0);
|
2013-05-24 09:48:42 +00:00
|
|
|
else
|
|
|
|
rv = VM_PAGER_OK;
|
2004-10-12 04:47:16 +00:00
|
|
|
if (rv == VM_PAGER_ERROR) {
|
2013-08-09 11:11:11 +00:00
|
|
|
vm_page_xunbusy(m);
|
2004-10-12 04:47:16 +00:00
|
|
|
break;
|
|
|
|
}
|
2011-01-27 16:10:25 +00:00
|
|
|
if (len != PAGE_SIZE) {
|
2013-03-19 14:53:23 +00:00
|
|
|
pmap_zero_page_area(m, offs, len);
|
2011-01-27 16:10:25 +00:00
|
|
|
vm_page_clear_dirty(m, offs, len);
|
|
|
|
m->valid = VM_PAGE_BITS_ALL;
|
|
|
|
} else
|
|
|
|
vm_pager_page_unswapped(m);
|
2004-03-19 21:19:15 +00:00
|
|
|
}
|
2013-08-09 11:11:11 +00:00
|
|
|
vm_page_xunbusy(m);
|
2010-05-03 20:31:13 +00:00
|
|
|
vm_page_lock(m);
|
2011-01-27 16:10:25 +00:00
|
|
|
if (bp->bio_cmd == BIO_DELETE && len == PAGE_SIZE)
|
|
|
|
vm_page_free(m);
|
|
|
|
else
|
|
|
|
vm_page_activate(m);
|
2010-06-15 18:37:31 +00:00
|
|
|
vm_page_unlock(m);
|
2014-07-28 14:27:05 +00:00
|
|
|
if (bp->bio_cmd == BIO_WRITE) {
|
2004-03-19 21:19:15 +00:00
|
|
|
vm_page_dirty(m);
|
2014-07-28 14:27:05 +00:00
|
|
|
vm_pager_page_unswapped(m);
|
|
|
|
}
|
2004-03-19 21:19:15 +00:00
|
|
|
|
|
|
|
/* Actions on further pages start at offset 0 */
|
|
|
|
p += PAGE_SIZE - offs;
|
|
|
|
offs = 0;
|
2013-03-19 14:53:23 +00:00
|
|
|
ma_offs += len;
|
2003-08-05 06:54:44 +00:00
|
|
|
}
|
2014-03-19 01:13:42 +00:00
|
|
|
vm_object_pip_wakeup(sc->object);
|
2013-03-09 02:32:23 +00:00
|
|
|
VM_OBJECT_WUNLOCK(sc->object);
|
2004-10-12 04:47:16 +00:00
|
|
|
return (rv != VM_PAGER_ERROR ? 0 : ENOSPC);
|
2000-12-31 13:03:42 +00:00
|
|
|
}
|
|
|
|
|
2013-12-04 07:38:23 +00:00
|
|
|
static int
|
|
|
|
mdstart_null(struct md_s *sc, struct bio *bp)
|
|
|
|
{
|
|
|
|
|
|
|
|
switch (bp->bio_cmd) {
|
|
|
|
case BIO_READ:
|
|
|
|
bzero(bp->bio_data, bp->bio_length);
|
|
|
|
cpu_flush_dcache(bp->bio_data, bp->bio_length);
|
|
|
|
break;
|
|
|
|
case BIO_WRITE:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
bp->bio_resid = 0;
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
2002-06-03 22:09:04 +00:00
|
|
|
static void
|
|
|
|
md_kthread(void *arg)
|
|
|
|
{
|
|
|
|
struct md_s *sc;
|
|
|
|
struct bio *bp;
|
2006-03-28 21:25:11 +00:00
|
|
|
int error;
|
2002-06-03 22:09:04 +00:00
|
|
|
|
|
|
|
sc = arg;
|
Commit 14/14 of sched_lock decomposition.
- Use thread_lock() rather than sched_lock for per-thread scheduling
sychronization.
- Use the per-process spinlock rather than the sched_lock for per-process
scheduling synchronization.
Tested by: kris, current@
Tested on: i386, amd64, ULE, 4BSD, libthr, libkse, PREEMPTION, etc.
Discussed with: kris, attilio, kmacy, jhb, julian, bde (small parts each)
2007-06-05 00:00:57 +00:00
|
|
|
thread_lock(curthread);
|
2004-12-30 20:29:58 +00:00
|
|
|
sched_prio(curthread, PRIBIO);
|
Commit 14/14 of sched_lock decomposition.
- Use thread_lock() rather than sched_lock for per-thread scheduling
sychronization.
- Use the per-process spinlock rather than the sched_lock for per-process
scheduling synchronization.
Tested by: kris, current@
Tested on: i386, amd64, ULE, 4BSD, libthr, libkse, PREEMPTION, etc.
Discussed with: kris, attilio, kmacy, jhb, julian, bde (small parts each)
2007-06-05 00:00:57 +00:00
|
|
|
thread_unlock(curthread);
|
2006-12-14 11:34:07 +00:00
|
|
|
if (sc->type == MD_VNODE)
|
|
|
|
curthread->td_pflags |= TDP_NORUNNINGBUF;
|
2000-12-31 13:03:42 +00:00
|
|
|
|
2002-01-21 20:50:06 +00:00
|
|
|
for (;;) {
|
2006-03-28 21:25:11 +00:00
|
|
|
mtx_lock(&sc->queue_mtx);
|
2005-09-19 06:55:27 +00:00
|
|
|
if (sc->flags & MD_SHUTDOWN) {
|
2006-03-28 21:25:11 +00:00
|
|
|
sc->flags |= MD_EXITING;
|
|
|
|
mtx_unlock(&sc->queue_mtx);
|
2007-10-20 23:23:23 +00:00
|
|
|
kproc_exit(0);
|
2005-09-19 06:55:27 +00:00
|
|
|
}
|
2004-09-07 07:54:45 +00:00
|
|
|
bp = bioq_takefirst(&sc->bio_queue);
|
2002-06-03 22:09:04 +00:00
|
|
|
if (!bp) {
|
2003-01-13 08:50:23 +00:00
|
|
|
msleep(sc, &sc->queue_mtx, PRIBIO | PDROP, "mdwait", 0);
|
2002-06-03 22:09:04 +00:00
|
|
|
continue;
|
|
|
|
}
|
2003-01-13 08:50:23 +00:00
|
|
|
mtx_unlock(&sc->queue_mtx);
|
2003-04-09 11:59:29 +00:00
|
|
|
if (bp->bio_cmd == BIO_GETATTR) {
|
2010-12-29 12:11:07 +00:00
|
|
|
if ((sc->fwsectors && sc->fwheads &&
|
2003-04-09 11:59:29 +00:00
|
|
|
(g_handleattr_int(bp, "GEOM::fwsectors",
|
|
|
|
sc->fwsectors) ||
|
|
|
|
g_handleattr_int(bp, "GEOM::fwheads",
|
2010-12-29 12:11:07 +00:00
|
|
|
sc->fwheads))) ||
|
|
|
|
g_handleattr_int(bp, "GEOM::candelete", 1))
|
2003-04-09 11:59:29 +00:00
|
|
|
error = -1;
|
|
|
|
else
|
|
|
|
error = EOPNOTSUPP;
|
|
|
|
} else {
|
2005-09-19 06:55:27 +00:00
|
|
|
error = sc->start(sc, bp);
|
2002-01-21 20:50:06 +00:00
|
|
|
}
|
|
|
|
|
2003-01-12 21:16:49 +00:00
|
|
|
if (error != -1) {
|
|
|
|
bp->bio_completed = bp->bio_length;
|
2007-11-07 22:47:41 +00:00
|
|
|
if ((bp->bio_cmd == BIO_READ) || (bp->bio_cmd == BIO_WRITE))
|
|
|
|
devstat_end_transaction_bio(sc->devstat, bp);
|
2011-01-25 14:00:30 +00:00
|
|
|
g_io_deliver(bp, error);
|
2003-01-13 20:19:04 +00:00
|
|
|
}
|
2000-12-31 13:03:42 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
1999-11-26 20:08:44 +00:00
|
|
|
static struct md_s *
|
2000-12-31 13:03:42 +00:00
|
|
|
mdfind(int unit)
|
1999-09-21 11:00:49 +00:00
|
|
|
{
|
|
|
|
struct md_s *sc;
|
|
|
|
|
2000-08-20 21:34:39 +00:00
|
|
|
LIST_FOREACH(sc, &md_softc_list, list) {
|
|
|
|
if (sc->unit == unit)
|
2000-12-31 13:03:42 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
return (sc);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct md_s *
|
2005-10-06 19:47:04 +00:00
|
|
|
mdnew(int unit, int *errp, enum md_types type)
|
2000-12-31 13:03:42 +00:00
|
|
|
{
|
2010-07-22 10:24:28 +00:00
|
|
|
struct md_s *sc;
|
|
|
|
int error;
|
2000-12-31 13:03:42 +00:00
|
|
|
|
2005-09-19 06:55:27 +00:00
|
|
|
*errp = 0;
|
2000-12-31 13:03:42 +00:00
|
|
|
if (unit == -1)
|
2010-07-22 10:24:28 +00:00
|
|
|
unit = alloc_unr(md_uh);
|
|
|
|
else
|
|
|
|
unit = alloc_unr_specific(md_uh, unit);
|
|
|
|
|
|
|
|
if (unit == -1) {
|
|
|
|
*errp = EBUSY;
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
|
2005-09-19 06:55:27 +00:00
|
|
|
sc = (struct md_s *)malloc(sizeof *sc, M_MD, M_WAITOK | M_ZERO);
|
2005-10-06 19:47:04 +00:00
|
|
|
sc->type = type;
|
2005-09-19 06:55:27 +00:00
|
|
|
bioq_init(&sc->bio_queue);
|
|
|
|
mtx_init(&sc->queue_mtx, "md bio queue", NULL, MTX_DEF);
|
Merge GEOM direct dispatch changes from the projects/camlock branch.
When safety requirements are met, it allows to avoid passing I/O requests
to GEOM g_up/g_down thread, executing them directly in the caller context.
That allows to avoid CPU bottlenecks in g_up/g_down threads, plus avoid
several context switches per I/O.
The defined now safety requirements are:
- caller should not hold any locks and should be reenterable;
- callee should not depend on GEOM dual-threaded concurency semantics;
- on the way down, if request is unmapped while callee doesn't support it,
the context should be sleepable;
- kernel thread stack usage should be below 50%.
To keep compatibility with GEOM classes not meeting above requirements
new provider and consumer flags added:
- G_CF_DIRECT_SEND -- consumer code meets caller requirements (request);
- G_CF_DIRECT_RECEIVE -- consumer code meets callee requirements (done);
- G_PF_DIRECT_SEND -- provider code meets caller requirements (done);
- G_PF_DIRECT_RECEIVE -- provider code meets callee requirements (request).
Capable GEOM class can set them, allowing direct dispatch in cases where
it is safe. If any of requirements are not met, request is queued to
g_up or g_down thread same as before.
Such GEOM classes were reviewed and updated to support direct dispatch:
CONCAT, DEV, DISK, GATE, MD, MIRROR, MULTIPATH, NOP, PART, RAID, STRIPE,
VFS, ZERO, ZFS::VDEV, ZFS::ZVOL, all classes based on g_slice KPI (LABEL,
MAP, FLASHMAP, etc).
To declare direct completion capability disk(9) KPI got new flag equivalent
to G_PF_DIRECT_SEND -- DISKFLAG_DIRECT_COMPLETION. da(4) and ada(4) disk
drivers got it set now thanks to earlier CAM locking work.
This change more then twice increases peak block storage performance on
systems with manu CPUs, together with earlier CAM locking changes reaching
more then 1 million IOPS (512 byte raw reads from 16 SATA SSDs on 4 HBAs to
256 user-level threads).
Sponsored by: iXsystems, Inc.
MFC after: 2 months
2013-10-22 08:22:19 +00:00
|
|
|
mtx_init(&sc->stat_mtx, "md stat", NULL, MTX_DEF);
|
2000-08-20 21:34:39 +00:00
|
|
|
sc->unit = unit;
|
2002-05-26 06:48:55 +00:00
|
|
|
sprintf(sc->name, "md%d", unit);
|
2005-08-31 19:45:11 +00:00
|
|
|
LIST_INSERT_HEAD(&md_softc_list, sc, list);
|
2007-10-20 23:23:23 +00:00
|
|
|
error = kproc_create(md_kthread, sc, &sc->procp, 0, 0,"%s", sc->name);
|
2005-09-19 06:55:27 +00:00
|
|
|
if (error == 0)
|
|
|
|
return (sc);
|
|
|
|
LIST_REMOVE(sc, list);
|
Merge GEOM direct dispatch changes from the projects/camlock branch.
When safety requirements are met, it allows to avoid passing I/O requests
to GEOM g_up/g_down thread, executing them directly in the caller context.
That allows to avoid CPU bottlenecks in g_up/g_down threads, plus avoid
several context switches per I/O.
The defined now safety requirements are:
- caller should not hold any locks and should be reenterable;
- callee should not depend on GEOM dual-threaded concurency semantics;
- on the way down, if request is unmapped while callee doesn't support it,
the context should be sleepable;
- kernel thread stack usage should be below 50%.
To keep compatibility with GEOM classes not meeting above requirements
new provider and consumer flags added:
- G_CF_DIRECT_SEND -- consumer code meets caller requirements (request);
- G_CF_DIRECT_RECEIVE -- consumer code meets callee requirements (done);
- G_PF_DIRECT_SEND -- provider code meets caller requirements (done);
- G_PF_DIRECT_RECEIVE -- provider code meets callee requirements (request).
Capable GEOM class can set them, allowing direct dispatch in cases where
it is safe. If any of requirements are not met, request is queued to
g_up or g_down thread same as before.
Such GEOM classes were reviewed and updated to support direct dispatch:
CONCAT, DEV, DISK, GATE, MD, MIRROR, MULTIPATH, NOP, PART, RAID, STRIPE,
VFS, ZERO, ZFS::VDEV, ZFS::ZVOL, all classes based on g_slice KPI (LABEL,
MAP, FLASHMAP, etc).
To declare direct completion capability disk(9) KPI got new flag equivalent
to G_PF_DIRECT_SEND -- DISKFLAG_DIRECT_COMPLETION. da(4) and ada(4) disk
drivers got it set now thanks to earlier CAM locking work.
This change more then twice increases peak block storage performance on
systems with manu CPUs, together with earlier CAM locking changes reaching
more then 1 million IOPS (512 byte raw reads from 16 SATA SSDs on 4 HBAs to
256 user-level threads).
Sponsored by: iXsystems, Inc.
MFC after: 2 months
2013-10-22 08:22:19 +00:00
|
|
|
mtx_destroy(&sc->stat_mtx);
|
2005-09-19 06:55:27 +00:00
|
|
|
mtx_destroy(&sc->queue_mtx);
|
2010-07-22 10:24:28 +00:00
|
|
|
free_unr(md_uh, sc->unit);
|
2005-09-19 06:55:27 +00:00
|
|
|
free(sc, M_MD);
|
|
|
|
*errp = error;
|
|
|
|
return (NULL);
|
2000-12-31 13:03:42 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
mdinit(struct md_s *sc)
|
|
|
|
{
|
2003-01-12 21:16:49 +00:00
|
|
|
struct g_geom *gp;
|
|
|
|
struct g_provider *pp;
|
|
|
|
|
|
|
|
g_topology_lock();
|
|
|
|
gp = g_new_geomf(&g_md_class, "md%d", sc->unit);
|
|
|
|
gp->softc = sc;
|
|
|
|
pp = g_new_providerf(gp, "md%d", sc->unit);
|
Merge GEOM direct dispatch changes from the projects/camlock branch.
When safety requirements are met, it allows to avoid passing I/O requests
to GEOM g_up/g_down thread, executing them directly in the caller context.
That allows to avoid CPU bottlenecks in g_up/g_down threads, plus avoid
several context switches per I/O.
The defined now safety requirements are:
- caller should not hold any locks and should be reenterable;
- callee should not depend on GEOM dual-threaded concurency semantics;
- on the way down, if request is unmapped while callee doesn't support it,
the context should be sleepable;
- kernel thread stack usage should be below 50%.
To keep compatibility with GEOM classes not meeting above requirements
new provider and consumer flags added:
- G_CF_DIRECT_SEND -- consumer code meets caller requirements (request);
- G_CF_DIRECT_RECEIVE -- consumer code meets callee requirements (done);
- G_PF_DIRECT_SEND -- provider code meets caller requirements (done);
- G_PF_DIRECT_RECEIVE -- provider code meets callee requirements (request).
Capable GEOM class can set them, allowing direct dispatch in cases where
it is safe. If any of requirements are not met, request is queued to
g_up or g_down thread same as before.
Such GEOM classes were reviewed and updated to support direct dispatch:
CONCAT, DEV, DISK, GATE, MD, MIRROR, MULTIPATH, NOP, PART, RAID, STRIPE,
VFS, ZERO, ZFS::VDEV, ZFS::ZVOL, all classes based on g_slice KPI (LABEL,
MAP, FLASHMAP, etc).
To declare direct completion capability disk(9) KPI got new flag equivalent
to G_PF_DIRECT_SEND -- DISKFLAG_DIRECT_COMPLETION. da(4) and ada(4) disk
drivers got it set now thanks to earlier CAM locking work.
This change more then twice increases peak block storage performance on
systems with manu CPUs, together with earlier CAM locking changes reaching
more then 1 million IOPS (512 byte raw reads from 16 SATA SSDs on 4 HBAs to
256 user-level threads).
Sponsored by: iXsystems, Inc.
MFC after: 2 months
2013-10-22 08:22:19 +00:00
|
|
|
pp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE;
|
2004-09-16 21:32:13 +00:00
|
|
|
pp->mediasize = sc->mediasize;
|
|
|
|
pp->sectorsize = sc->sectorsize;
|
2013-04-02 19:39:31 +00:00
|
|
|
switch (sc->type) {
|
|
|
|
case MD_MALLOC:
|
|
|
|
case MD_VNODE:
|
|
|
|
case MD_SWAP:
|
|
|
|
pp->flags |= G_PF_ACCEPT_UNMAPPED;
|
|
|
|
break;
|
|
|
|
case MD_PRELOAD:
|
2013-12-04 07:38:23 +00:00
|
|
|
case MD_NULL:
|
2013-04-02 19:39:31 +00:00
|
|
|
break;
|
|
|
|
}
|
2003-01-12 21:16:49 +00:00
|
|
|
sc->gp = gp;
|
|
|
|
sc->pp = pp;
|
|
|
|
g_error_provider(pp, 0);
|
|
|
|
g_topology_unlock();
|
2007-11-07 22:47:41 +00:00
|
|
|
sc->devstat = devstat_new_entry("md", sc->unit, sc->sectorsize,
|
|
|
|
DEVSTAT_ALL_SUPPORTED, DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX);
|
1999-11-26 20:08:44 +00:00
|
|
|
}
|
|
|
|
|
2000-12-31 13:03:42 +00:00
|
|
|
static int
|
2004-09-16 21:32:13 +00:00
|
|
|
mdcreate_malloc(struct md_s *sc, struct md_ioctl *mdio)
|
2000-12-31 13:03:42 +00:00
|
|
|
{
|
2002-05-25 20:44:20 +00:00
|
|
|
uintptr_t sp;
|
|
|
|
int error;
|
2004-09-16 21:32:13 +00:00
|
|
|
off_t u;
|
2000-12-31 13:03:42 +00:00
|
|
|
|
2002-05-25 20:44:20 +00:00
|
|
|
error = 0;
|
2000-12-31 13:03:42 +00:00
|
|
|
if (mdio->md_options & ~(MD_AUTOUNIT | MD_COMPRESS | MD_RESERVE))
|
2001-08-27 13:25:47 +00:00
|
|
|
return (EINVAL);
|
2004-09-16 21:32:13 +00:00
|
|
|
if (mdio->md_sectorsize != 0 && !powerof2(mdio->md_sectorsize))
|
2003-03-03 13:05:00 +00:00
|
|
|
return (EINVAL);
|
2000-12-31 13:03:42 +00:00
|
|
|
/* Compression doesn't make sense if we have reserved space */
|
|
|
|
if (mdio->md_options & MD_RESERVE)
|
|
|
|
mdio->md_options &= ~MD_COMPRESS;
|
2003-04-09 11:59:29 +00:00
|
|
|
if (mdio->md_fwsectors != 0)
|
|
|
|
sc->fwsectors = mdio->md_fwsectors;
|
|
|
|
if (mdio->md_fwheads != 0)
|
|
|
|
sc->fwheads = mdio->md_fwheads;
|
2001-08-07 19:23:16 +00:00
|
|
|
sc->flags = mdio->md_options & (MD_COMPRESS | MD_FORCE);
|
2004-09-16 21:32:13 +00:00
|
|
|
sc->indir = dimension(sc->mediasize / sc->sectorsize);
|
|
|
|
sc->uma = uma_zcreate(sc->name, sc->sectorsize, NULL, NULL, NULL, NULL,
|
|
|
|
0x1ff, 0);
|
2001-01-21 22:57:56 +00:00
|
|
|
if (mdio->md_options & MD_RESERVE) {
|
2004-09-16 21:32:13 +00:00
|
|
|
off_t nsectors;
|
|
|
|
|
|
|
|
nsectors = sc->mediasize / sc->sectorsize;
|
|
|
|
for (u = 0; u < nsectors; u++) {
|
2011-01-19 16:48:07 +00:00
|
|
|
sp = (uintptr_t)uma_zalloc(sc->uma, (md_malloc_wait ?
|
|
|
|
M_WAITOK : M_NOWAIT) | M_ZERO);
|
2002-05-25 20:44:20 +00:00
|
|
|
if (sp != 0)
|
2002-05-26 09:38:51 +00:00
|
|
|
error = s_write(sc->indir, u, sp);
|
2002-05-25 20:44:20 +00:00
|
|
|
else
|
|
|
|
error = ENOMEM;
|
2004-09-16 21:32:13 +00:00
|
|
|
if (error != 0)
|
2002-05-25 20:44:20 +00:00
|
|
|
break;
|
|
|
|
}
|
2000-12-31 13:03:42 +00:00
|
|
|
}
|
2004-09-16 21:32:13 +00:00
|
|
|
return (error);
|
2000-12-31 13:03:42 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static int
|
|
|
|
mdsetcred(struct md_s *sc, struct ucred *cred)
|
|
|
|
{
|
|
|
|
char *tmpbuf;
|
|
|
|
int error = 0;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Set credits in our softc
|
|
|
|
*/
|
|
|
|
|
|
|
|
if (sc->cred)
|
|
|
|
crfree(sc->cred);
|
2001-10-11 23:38:17 +00:00
|
|
|
sc->cred = crhold(cred);
|
2000-12-31 13:03:42 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Horrible kludge to establish credentials for NFS XXX.
|
|
|
|
*/
|
|
|
|
|
|
|
|
if (sc->vnode) {
|
|
|
|
struct uio auio;
|
|
|
|
struct iovec aiov;
|
|
|
|
|
2004-09-16 21:32:13 +00:00
|
|
|
tmpbuf = malloc(sc->sectorsize, M_TEMP, M_WAITOK);
|
2000-12-31 13:03:42 +00:00
|
|
|
bzero(&auio, sizeof(auio));
|
|
|
|
|
|
|
|
aiov.iov_base = tmpbuf;
|
2004-09-16 21:32:13 +00:00
|
|
|
aiov.iov_len = sc->sectorsize;
|
2000-12-31 13:03:42 +00:00
|
|
|
auio.uio_iov = &aiov;
|
|
|
|
auio.uio_iovcnt = 1;
|
|
|
|
auio.uio_offset = 0;
|
|
|
|
auio.uio_rw = UIO_READ;
|
|
|
|
auio.uio_segflg = UIO_SYSSPACE;
|
|
|
|
auio.uio_resid = aiov.iov_len;
|
2008-01-10 01:10:58 +00:00
|
|
|
vn_lock(sc->vnode, LK_EXCLUSIVE | LK_RETRY);
|
2000-12-31 13:03:42 +00:00
|
|
|
error = VOP_READ(sc->vnode, &auio, 0, sc->cred);
|
2008-01-13 14:44:15 +00:00
|
|
|
VOP_UNLOCK(sc->vnode, 0);
|
2000-12-31 13:03:42 +00:00
|
|
|
free(tmpbuf, M_TEMP);
|
|
|
|
}
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2004-09-16 21:32:13 +00:00
|
|
|
mdcreate_vnode(struct md_s *sc, struct md_ioctl *mdio, struct thread *td)
|
1999-11-20 22:43:27 +00:00
|
|
|
{
|
2000-12-31 13:03:42 +00:00
|
|
|
struct vattr vattr;
|
|
|
|
struct nameidata nd;
|
2010-10-18 04:26:32 +00:00
|
|
|
char *fname;
|
2012-10-22 17:50:54 +00:00
|
|
|
int error, flags;
|
1999-11-20 22:43:27 +00:00
|
|
|
|
2005-08-17 01:24:55 +00:00
|
|
|
/*
|
2010-10-18 04:26:32 +00:00
|
|
|
* Kernel-originated requests must have the filename appended
|
|
|
|
* to the mdio structure to protect against malicious software.
|
|
|
|
*/
|
|
|
|
fname = mdio->md_file;
|
|
|
|
if ((void *)fname != (void *)(mdio + 1)) {
|
|
|
|
error = copyinstr(fname, sc->file, sizeof(sc->file), NULL);
|
|
|
|
if (error != 0)
|
|
|
|
return (error);
|
|
|
|
} else
|
|
|
|
strlcpy(sc->file, fname, sizeof(sc->file));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If the user specified that this is a read only device, don't
|
|
|
|
* set the FWRITE mask before trying to open the backing store.
|
2005-08-17 01:24:55 +00:00
|
|
|
*/
|
2010-10-18 04:26:32 +00:00
|
|
|
flags = FREAD | ((mdio->md_options & MD_READONLY) ? 0 : FWRITE);
|
2012-10-22 17:50:54 +00:00
|
|
|
NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, sc->file, td);
|
2007-05-31 11:51:53 +00:00
|
|
|
error = vn_open(&nd, &flags, 0, NULL);
|
2004-11-06 13:16:35 +00:00
|
|
|
if (error != 0)
|
2004-09-14 18:43:24 +00:00
|
|
|
return (error);
|
2006-01-27 11:27:55 +00:00
|
|
|
NDFREE(&nd, NDF_ONLY_PNBUF);
|
2009-03-11 14:13:47 +00:00
|
|
|
if (nd.ni_vp->v_type != VREG) {
|
|
|
|
error = EINVAL;
|
|
|
|
goto bad;
|
2010-07-26 10:37:14 +00:00
|
|
|
}
|
2009-03-11 14:13:47 +00:00
|
|
|
error = VOP_GETATTR(nd.ni_vp, &vattr, td->td_ucred);
|
|
|
|
if (error != 0)
|
|
|
|
goto bad;
|
|
|
|
if (VOP_ISLOCKED(nd.ni_vp) != LK_EXCLUSIVE) {
|
|
|
|
vn_lock(nd.ni_vp, LK_UPGRADE | LK_RETRY);
|
|
|
|
if (nd.ni_vp->v_iflag & VI_DOOMED) {
|
|
|
|
/* Forced unmount. */
|
|
|
|
error = EBADF;
|
|
|
|
goto bad;
|
|
|
|
}
|
2000-12-31 13:03:42 +00:00
|
|
|
}
|
2006-12-14 11:34:07 +00:00
|
|
|
nd.ni_vp->v_vflag |= VV_MD;
|
2008-01-13 14:44:15 +00:00
|
|
|
VOP_UNLOCK(nd.ni_vp, 0);
|
2002-05-03 17:55:10 +00:00
|
|
|
|
2004-01-12 10:52:00 +00:00
|
|
|
if (mdio->md_fwsectors != 0)
|
|
|
|
sc->fwsectors = mdio->md_fwsectors;
|
|
|
|
if (mdio->md_fwheads != 0)
|
|
|
|
sc->fwheads = mdio->md_fwheads;
|
2004-03-10 20:41:09 +00:00
|
|
|
sc->flags = mdio->md_options & (MD_FORCE | MD_ASYNC);
|
2002-05-03 17:55:10 +00:00
|
|
|
if (!(flags & FWRITE))
|
|
|
|
sc->flags |= MD_READONLY;
|
2000-12-31 13:03:42 +00:00
|
|
|
sc->vnode = nd.ni_vp;
|
2000-08-20 21:34:39 +00:00
|
|
|
|
2002-02-27 18:32:23 +00:00
|
|
|
error = mdsetcred(sc, td->td_ucred);
|
2004-09-16 21:32:13 +00:00
|
|
|
if (error != 0) {
|
2008-02-28 18:31:54 +00:00
|
|
|
sc->vnode = NULL;
|
2008-01-10 01:10:58 +00:00
|
|
|
vn_lock(nd.ni_vp, LK_EXCLUSIVE | LK_RETRY);
|
2006-12-14 11:34:07 +00:00
|
|
|
nd.ni_vp->v_vflag &= ~VV_MD;
|
2009-03-11 14:13:47 +00:00
|
|
|
goto bad;
|
2000-12-31 13:03:42 +00:00
|
|
|
}
|
|
|
|
return (0);
|
2009-03-11 14:13:47 +00:00
|
|
|
bad:
|
|
|
|
VOP_UNLOCK(nd.ni_vp, 0);
|
|
|
|
(void)vn_close(nd.ni_vp, flags, td->td_ucred, td);
|
|
|
|
return (error);
|
2000-12-31 13:03:42 +00:00
|
|
|
}
|
1999-09-21 11:00:49 +00:00
|
|
|
|
2000-12-31 13:03:42 +00:00
|
|
|
static int
|
2001-09-12 08:38:13 +00:00
|
|
|
mddestroy(struct md_s *sc, struct thread *td)
|
2000-12-31 13:03:42 +00:00
|
|
|
{
|
2001-07-04 16:20:28 +00:00
|
|
|
|
2003-01-12 21:16:49 +00:00
|
|
|
if (sc->gp) {
|
|
|
|
sc->gp->softc = NULL;
|
2005-09-19 06:55:27 +00:00
|
|
|
g_topology_lock();
|
|
|
|
g_wither_geom(sc->gp, ENXIO);
|
|
|
|
g_topology_unlock();
|
2003-05-02 06:18:58 +00:00
|
|
|
sc->gp = NULL;
|
|
|
|
sc->pp = NULL;
|
2001-05-06 17:17:23 +00:00
|
|
|
}
|
2007-11-07 22:47:41 +00:00
|
|
|
if (sc->devstat) {
|
|
|
|
devstat_remove_entry(sc->devstat);
|
|
|
|
sc->devstat = NULL;
|
|
|
|
}
|
2006-03-28 21:25:11 +00:00
|
|
|
mtx_lock(&sc->queue_mtx);
|
2002-06-03 22:09:04 +00:00
|
|
|
sc->flags |= MD_SHUTDOWN;
|
|
|
|
wakeup(sc);
|
2006-03-28 21:25:11 +00:00
|
|
|
while (!(sc->flags & MD_EXITING))
|
|
|
|
msleep(sc->procp, &sc->queue_mtx, PRIBIO, "mddestroy", hz / 10);
|
|
|
|
mtx_unlock(&sc->queue_mtx);
|
Merge GEOM direct dispatch changes from the projects/camlock branch.
When safety requirements are met, it allows to avoid passing I/O requests
to GEOM g_up/g_down thread, executing them directly in the caller context.
That allows to avoid CPU bottlenecks in g_up/g_down threads, plus avoid
several context switches per I/O.
The defined now safety requirements are:
- caller should not hold any locks and should be reenterable;
- callee should not depend on GEOM dual-threaded concurency semantics;
- on the way down, if request is unmapped while callee doesn't support it,
the context should be sleepable;
- kernel thread stack usage should be below 50%.
To keep compatibility with GEOM classes not meeting above requirements
new provider and consumer flags added:
- G_CF_DIRECT_SEND -- consumer code meets caller requirements (request);
- G_CF_DIRECT_RECEIVE -- consumer code meets callee requirements (done);
- G_PF_DIRECT_SEND -- provider code meets caller requirements (done);
- G_PF_DIRECT_RECEIVE -- provider code meets callee requirements (request).
Capable GEOM class can set them, allowing direct dispatch in cases where
it is safe. If any of requirements are not met, request is queued to
g_up or g_down thread same as before.
Such GEOM classes were reviewed and updated to support direct dispatch:
CONCAT, DEV, DISK, GATE, MD, MIRROR, MULTIPATH, NOP, PART, RAID, STRIPE,
VFS, ZERO, ZFS::VDEV, ZFS::ZVOL, all classes based on g_slice KPI (LABEL,
MAP, FLASHMAP, etc).
To declare direct completion capability disk(9) KPI got new flag equivalent
to G_PF_DIRECT_SEND -- DISKFLAG_DIRECT_COMPLETION. da(4) and ada(4) disk
drivers got it set now thanks to earlier CAM locking work.
This change more then twice increases peak block storage performance on
systems with manu CPUs, together with earlier CAM locking changes reaching
more then 1 million IOPS (512 byte raw reads from 16 SATA SSDs on 4 HBAs to
256 user-level threads).
Sponsored by: iXsystems, Inc.
MFC after: 2 months
2013-10-22 08:22:19 +00:00
|
|
|
mtx_destroy(&sc->stat_mtx);
|
2005-09-11 12:35:32 +00:00
|
|
|
mtx_destroy(&sc->queue_mtx);
|
2005-09-19 06:55:27 +00:00
|
|
|
if (sc->vnode != NULL) {
|
2008-01-10 01:10:58 +00:00
|
|
|
vn_lock(sc->vnode, LK_EXCLUSIVE | LK_RETRY);
|
2006-12-14 11:34:07 +00:00
|
|
|
sc->vnode->v_vflag &= ~VV_MD;
|
2008-01-13 14:44:15 +00:00
|
|
|
VOP_UNLOCK(sc->vnode, 0);
|
2001-08-27 13:25:47 +00:00
|
|
|
(void)vn_close(sc->vnode, sc->flags & MD_READONLY ?
|
2001-09-12 08:38:13 +00:00
|
|
|
FREAD : (FREAD|FWRITE), sc->cred, td);
|
2005-09-19 06:55:27 +00:00
|
|
|
}
|
2000-12-31 13:03:42 +00:00
|
|
|
if (sc->cred != NULL)
|
|
|
|
crfree(sc->cred);
|
2005-01-22 19:56:03 +00:00
|
|
|
if (sc->object != NULL)
|
2003-05-16 07:28:27 +00:00
|
|
|
vm_object_deallocate(sc->object);
|
2002-05-26 06:48:55 +00:00
|
|
|
if (sc->indir)
|
|
|
|
destroy_indir(sc, sc->indir);
|
|
|
|
if (sc->uma)
|
|
|
|
uma_zdestroy(sc->uma);
|
2001-05-06 17:17:23 +00:00
|
|
|
|
|
|
|
LIST_REMOVE(sc, list);
|
2010-07-22 10:24:28 +00:00
|
|
|
free_unr(md_uh, sc->unit);
|
2002-05-25 20:44:20 +00:00
|
|
|
free(sc, M_MD);
|
2000-12-31 13:03:42 +00:00
|
|
|
return (0);
|
1999-09-21 11:00:49 +00:00
|
|
|
}
|
|
|
|
|
2012-07-07 20:32:21 +00:00
|
|
|
static int
|
|
|
|
mdresize(struct md_s *sc, struct md_ioctl *mdio)
|
|
|
|
{
|
|
|
|
int error, res;
|
|
|
|
vm_pindex_t oldpages, newpages;
|
|
|
|
|
|
|
|
switch (sc->type) {
|
|
|
|
case MD_VNODE:
|
2013-12-04 07:38:23 +00:00
|
|
|
case MD_NULL:
|
2012-07-07 20:32:21 +00:00
|
|
|
break;
|
|
|
|
case MD_SWAP:
|
2012-08-02 15:05:34 +00:00
|
|
|
if (mdio->md_mediasize <= 0 ||
|
2012-07-07 20:32:21 +00:00
|
|
|
(mdio->md_mediasize % PAGE_SIZE) != 0)
|
|
|
|
return (EDOM);
|
|
|
|
oldpages = OFF_TO_IDX(round_page(sc->mediasize));
|
|
|
|
newpages = OFF_TO_IDX(round_page(mdio->md_mediasize));
|
|
|
|
if (newpages < oldpages) {
|
2013-03-09 02:32:23 +00:00
|
|
|
VM_OBJECT_WLOCK(sc->object);
|
2012-07-07 20:32:21 +00:00
|
|
|
vm_object_page_remove(sc->object, newpages, 0, 0);
|
|
|
|
swap_pager_freespace(sc->object, newpages,
|
|
|
|
oldpages - newpages);
|
|
|
|
swap_release_by_cred(IDX_TO_OFF(oldpages -
|
|
|
|
newpages), sc->cred);
|
|
|
|
sc->object->charge = IDX_TO_OFF(newpages);
|
|
|
|
sc->object->size = newpages;
|
2013-03-09 02:32:23 +00:00
|
|
|
VM_OBJECT_WUNLOCK(sc->object);
|
2012-07-07 20:32:21 +00:00
|
|
|
} else if (newpages > oldpages) {
|
|
|
|
res = swap_reserve_by_cred(IDX_TO_OFF(newpages -
|
|
|
|
oldpages), sc->cred);
|
|
|
|
if (!res)
|
|
|
|
return (ENOMEM);
|
|
|
|
if ((mdio->md_options & MD_RESERVE) ||
|
|
|
|
(sc->flags & MD_RESERVE)) {
|
|
|
|
error = swap_pager_reserve(sc->object,
|
|
|
|
oldpages, newpages - oldpages);
|
|
|
|
if (error < 0) {
|
|
|
|
swap_release_by_cred(
|
|
|
|
IDX_TO_OFF(newpages - oldpages),
|
|
|
|
sc->cred);
|
|
|
|
return (EDOM);
|
|
|
|
}
|
|
|
|
}
|
2013-03-09 02:32:23 +00:00
|
|
|
VM_OBJECT_WLOCK(sc->object);
|
2012-07-07 20:32:21 +00:00
|
|
|
sc->object->charge = IDX_TO_OFF(newpages);
|
|
|
|
sc->object->size = newpages;
|
2013-03-09 02:32:23 +00:00
|
|
|
VM_OBJECT_WUNLOCK(sc->object);
|
2012-07-07 20:32:21 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
return (EOPNOTSUPP);
|
|
|
|
}
|
|
|
|
|
|
|
|
sc->mediasize = mdio->md_mediasize;
|
|
|
|
g_topology_lock();
|
|
|
|
g_resize_provider(sc->pp, sc->mediasize);
|
|
|
|
g_topology_unlock();
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
2000-12-31 13:03:42 +00:00
|
|
|
static int
|
2004-09-16 21:32:13 +00:00
|
|
|
mdcreate_swap(struct md_s *sc, struct md_ioctl *mdio, struct thread *td)
|
2000-08-20 21:34:39 +00:00
|
|
|
{
|
2004-09-16 20:38:11 +00:00
|
|
|
vm_ooffset_t npage;
|
|
|
|
int error;
|
2000-12-31 13:03:42 +00:00
|
|
|
|
2000-08-20 21:34:39 +00:00
|
|
|
/*
|
2013-12-04 09:48:52 +00:00
|
|
|
* Range check. Disallow negative sizes and sizes not being
|
|
|
|
* multiple of page size.
|
2000-08-20 21:34:39 +00:00
|
|
|
*/
|
2012-08-02 15:05:34 +00:00
|
|
|
if (sc->mediasize <= 0 || (sc->mediasize % PAGE_SIZE) != 0)
|
2001-08-27 13:25:47 +00:00
|
|
|
return (EDOM);
|
2000-12-31 13:03:42 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Allocate an OBJT_SWAP object.
|
|
|
|
*
|
|
|
|
* Note the truncation.
|
|
|
|
*/
|
|
|
|
|
2004-09-16 21:32:13 +00:00
|
|
|
npage = mdio->md_mediasize / PAGE_SIZE;
|
2004-03-02 20:13:23 +00:00
|
|
|
if (mdio->md_fwsectors != 0)
|
|
|
|
sc->fwsectors = mdio->md_fwsectors;
|
|
|
|
if (mdio->md_fwheads != 0)
|
|
|
|
sc->fwheads = mdio->md_fwheads;
|
2004-09-16 20:38:11 +00:00
|
|
|
sc->object = vm_pager_allocate(OBJT_SWAP, NULL, PAGE_SIZE * npage,
|
Implement global and per-uid accounting of the anonymous memory. Add
rlimit RLIMIT_SWAP that limits the amount of swap that may be reserved
for the uid.
The accounting information (charge) is associated with either map entry,
or vm object backing the entry, assuming the object is the first one
in the shadow chain and entry does not require COW. Charge is moved
from entry to object on allocation of the object, e.g. during the mmap,
assuming the object is allocated, or on the first page fault on the
entry. It moves back to the entry on forks due to COW setup.
The per-entry granularity of accounting makes the charge process fair
for processes that change uid during lifetime, and decrements charge
for proper uid when region is unmapped.
The interface of vm_pager_allocate(9) is extended by adding struct ucred *,
that is used to charge appropriate uid when allocation if performed by
kernel, e.g. md(4).
Several syscalls, among them is fork(2), may now return ENOMEM when
global or per-uid limits are enforced.
In collaboration with: pho
Reviewed by: alc
Approved by: re (kensmith)
2009-06-23 20:45:22 +00:00
|
|
|
VM_PROT_DEFAULT, 0, td->td_ucred);
|
2004-10-12 04:47:16 +00:00
|
|
|
if (sc->object == NULL)
|
|
|
|
return (ENOMEM);
|
2012-07-07 20:32:21 +00:00
|
|
|
sc->flags = mdio->md_options & (MD_FORCE | MD_RESERVE);
|
2000-12-31 13:03:42 +00:00
|
|
|
if (mdio->md_options & MD_RESERVE) {
|
2004-09-16 20:38:11 +00:00
|
|
|
if (swap_pager_reserve(sc->object, 0, npage) < 0) {
|
Implement global and per-uid accounting of the anonymous memory. Add
rlimit RLIMIT_SWAP that limits the amount of swap that may be reserved
for the uid.
The accounting information (charge) is associated with either map entry,
or vm object backing the entry, assuming the object is the first one
in the shadow chain and entry does not require COW. Charge is moved
from entry to object on allocation of the object, e.g. during the mmap,
assuming the object is allocated, or on the first page fault on the
entry. It moves back to the entry on forks due to COW setup.
The per-entry granularity of accounting makes the charge process fair
for processes that change uid during lifetime, and decrements charge
for proper uid when region is unmapped.
The interface of vm_pager_allocate(9) is extended by adding struct ucred *,
that is used to charge appropriate uid when allocation if performed by
kernel, e.g. md(4).
Several syscalls, among them is fork(2), may now return ENOMEM when
global or per-uid limits are enforced.
In collaboration with: pho
Reviewed by: alc
Approved by: re (kensmith)
2009-06-23 20:45:22 +00:00
|
|
|
error = EDOM;
|
|
|
|
goto finish;
|
2000-12-31 13:03:42 +00:00
|
|
|
}
|
|
|
|
}
|
2002-02-27 18:32:23 +00:00
|
|
|
error = mdsetcred(sc, td->td_ucred);
|
Implement global and per-uid accounting of the anonymous memory. Add
rlimit RLIMIT_SWAP that limits the amount of swap that may be reserved
for the uid.
The accounting information (charge) is associated with either map entry,
or vm object backing the entry, assuming the object is the first one
in the shadow chain and entry does not require COW. Charge is moved
from entry to object on allocation of the object, e.g. during the mmap,
assuming the object is allocated, or on the first page fault on the
entry. It moves back to the entry on forks due to COW setup.
The per-entry granularity of accounting makes the charge process fair
for processes that change uid during lifetime, and decrements charge
for proper uid when region is unmapped.
The interface of vm_pager_allocate(9) is extended by adding struct ucred *,
that is used to charge appropriate uid when allocation if performed by
kernel, e.g. md(4).
Several syscalls, among them is fork(2), may now return ENOMEM when
global or per-uid limits are enforced.
In collaboration with: pho
Reviewed by: alc
Approved by: re (kensmith)
2009-06-23 20:45:22 +00:00
|
|
|
finish:
|
2004-11-06 13:16:35 +00:00
|
|
|
if (error != 0) {
|
2004-09-14 19:55:07 +00:00
|
|
|
vm_object_deallocate(sc->object);
|
|
|
|
sc->object = NULL;
|
2003-02-11 12:35:44 +00:00
|
|
|
}
|
2004-09-16 21:32:13 +00:00
|
|
|
return (error);
|
2001-08-27 13:25:47 +00:00
|
|
|
}
|
|
|
|
|
2013-12-04 07:38:23 +00:00
|
|
|
static int
|
|
|
|
mdcreate_null(struct md_s *sc, struct md_ioctl *mdio, struct thread *td)
|
|
|
|
{
|
|
|
|
|
|
|
|
/*
|
2013-12-04 09:48:52 +00:00
|
|
|
* Range check. Disallow negative sizes and sizes not being
|
|
|
|
* multiple of page size.
|
2013-12-04 07:38:23 +00:00
|
|
|
*/
|
|
|
|
if (sc->mediasize <= 0 || (sc->mediasize % PAGE_SIZE) != 0)
|
|
|
|
return (EDOM);
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
2000-12-31 13:03:42 +00:00
|
|
|
|
|
|
|
static int
|
2005-09-19 06:55:27 +00:00
|
|
|
xmdctlioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td)
|
2000-12-31 13:03:42 +00:00
|
|
|
{
|
|
|
|
struct md_ioctl *mdio;
|
|
|
|
struct md_s *sc;
|
2004-09-16 21:32:13 +00:00
|
|
|
int error, i;
|
2012-08-02 15:05:34 +00:00
|
|
|
unsigned sectsize;
|
2000-12-31 13:03:42 +00:00
|
|
|
|
|
|
|
if (md_debug)
|
|
|
|
printf("mdctlioctl(%s %lx %p %x %p)\n",
|
2001-09-12 08:38:13 +00:00
|
|
|
devtoname(dev), cmd, addr, flags, td);
|
2000-12-31 13:03:42 +00:00
|
|
|
|
2005-09-19 06:55:27 +00:00
|
|
|
mdio = (struct md_ioctl *)addr;
|
|
|
|
if (mdio->md_version != MDIOVERSION)
|
|
|
|
return (EINVAL);
|
|
|
|
|
2001-12-20 06:38:21 +00:00
|
|
|
/*
|
|
|
|
* We assert the version number in the individual ioctl
|
|
|
|
* handlers instead of out here because (a) it is possible we
|
|
|
|
* may add another ioctl in the future which doesn't read an
|
|
|
|
* mdio, and (b) the correct return value for an unknown ioctl
|
|
|
|
* is ENOIOCTL, not EINVAL.
|
|
|
|
*/
|
2005-09-19 06:55:27 +00:00
|
|
|
error = 0;
|
2000-12-31 13:03:42 +00:00
|
|
|
switch (cmd) {
|
|
|
|
case MDIOCATTACH:
|
|
|
|
switch (mdio->md_type) {
|
|
|
|
case MD_MALLOC:
|
|
|
|
case MD_PRELOAD:
|
|
|
|
case MD_VNODE:
|
|
|
|
case MD_SWAP:
|
2013-12-04 07:38:23 +00:00
|
|
|
case MD_NULL:
|
2004-09-16 21:32:13 +00:00
|
|
|
break;
|
2000-12-31 13:03:42 +00:00
|
|
|
default:
|
|
|
|
return (EINVAL);
|
|
|
|
}
|
2012-08-02 15:05:34 +00:00
|
|
|
if (mdio->md_sectorsize == 0)
|
|
|
|
sectsize = DEV_BSIZE;
|
|
|
|
else
|
|
|
|
sectsize = mdio->md_sectorsize;
|
|
|
|
if (sectsize > MAXPHYS || mdio->md_mediasize < sectsize)
|
|
|
|
return (EINVAL);
|
2005-08-31 19:45:11 +00:00
|
|
|
if (mdio->md_options & MD_AUTOUNIT)
|
2005-10-06 19:47:04 +00:00
|
|
|
sc = mdnew(-1, &error, mdio->md_type);
|
2010-07-22 10:24:28 +00:00
|
|
|
else {
|
|
|
|
if (mdio->md_unit > INT_MAX)
|
|
|
|
return (EINVAL);
|
2005-10-06 19:47:04 +00:00
|
|
|
sc = mdnew(mdio->md_unit, &error, mdio->md_type);
|
2010-07-22 10:24:28 +00:00
|
|
|
}
|
2005-08-31 19:45:11 +00:00
|
|
|
if (sc == NULL)
|
|
|
|
return (error);
|
|
|
|
if (mdio->md_options & MD_AUTOUNIT)
|
2004-09-16 21:32:13 +00:00
|
|
|
mdio->md_unit = sc->unit;
|
|
|
|
sc->mediasize = mdio->md_mediasize;
|
2012-08-02 15:05:34 +00:00
|
|
|
sc->sectorsize = sectsize;
|
2004-09-16 21:32:13 +00:00
|
|
|
error = EDOOFUS;
|
|
|
|
switch (sc->type) {
|
|
|
|
case MD_MALLOC:
|
2005-09-19 06:55:27 +00:00
|
|
|
sc->start = mdstart_malloc;
|
2004-09-16 21:32:13 +00:00
|
|
|
error = mdcreate_malloc(sc, mdio);
|
|
|
|
break;
|
|
|
|
case MD_PRELOAD:
|
2012-11-21 16:56:47 +00:00
|
|
|
/*
|
|
|
|
* We disallow attaching preloaded memory disks via
|
|
|
|
* ioctl. Preloaded memory disks are automatically
|
|
|
|
* attached in g_md_init().
|
|
|
|
*/
|
|
|
|
error = EOPNOTSUPP;
|
2004-09-16 21:32:13 +00:00
|
|
|
break;
|
|
|
|
case MD_VNODE:
|
2005-09-19 06:55:27 +00:00
|
|
|
sc->start = mdstart_vnode;
|
2004-09-16 21:32:13 +00:00
|
|
|
error = mdcreate_vnode(sc, mdio, td);
|
|
|
|
break;
|
|
|
|
case MD_SWAP:
|
2005-09-19 06:55:27 +00:00
|
|
|
sc->start = mdstart_swap;
|
2004-09-16 21:32:13 +00:00
|
|
|
error = mdcreate_swap(sc, mdio, td);
|
|
|
|
break;
|
2013-12-04 07:38:23 +00:00
|
|
|
case MD_NULL:
|
|
|
|
sc->start = mdstart_null;
|
|
|
|
error = mdcreate_null(sc, mdio, td);
|
|
|
|
break;
|
2004-09-16 21:32:13 +00:00
|
|
|
}
|
|
|
|
if (error != 0) {
|
|
|
|
mddestroy(sc, td);
|
|
|
|
return (error);
|
|
|
|
}
|
2005-09-19 06:55:27 +00:00
|
|
|
|
|
|
|
/* Prune off any residual fractional sector */
|
|
|
|
i = sc->mediasize % sc->sectorsize;
|
|
|
|
sc->mediasize -= i;
|
|
|
|
|
2004-09-16 21:32:13 +00:00
|
|
|
mdinit(sc);
|
|
|
|
return (0);
|
2000-12-31 13:03:42 +00:00
|
|
|
case MDIOCDETACH:
|
2009-01-10 17:17:18 +00:00
|
|
|
if (mdio->md_mediasize != 0 ||
|
|
|
|
(mdio->md_options & ~MD_FORCE) != 0)
|
2001-08-27 13:25:47 +00:00
|
|
|
return (EINVAL);
|
2005-09-19 06:55:27 +00:00
|
|
|
|
|
|
|
sc = mdfind(mdio->md_unit);
|
|
|
|
if (sc == NULL)
|
|
|
|
return (ENOENT);
|
2009-01-10 17:17:18 +00:00
|
|
|
if (sc->opencount != 0 && !(sc->flags & MD_FORCE) &&
|
|
|
|
!(mdio->md_options & MD_FORCE))
|
2005-09-19 06:55:27 +00:00
|
|
|
return (EBUSY);
|
|
|
|
return (mddestroy(sc, td));
|
2012-07-07 20:32:21 +00:00
|
|
|
case MDIOCRESIZE:
|
|
|
|
if ((mdio->md_options & ~(MD_FORCE | MD_RESERVE)) != 0)
|
|
|
|
return (EINVAL);
|
|
|
|
|
|
|
|
sc = mdfind(mdio->md_unit);
|
|
|
|
if (sc == NULL)
|
|
|
|
return (ENOENT);
|
2012-08-02 15:05:34 +00:00
|
|
|
if (mdio->md_mediasize < sc->sectorsize)
|
|
|
|
return (EINVAL);
|
2012-07-07 20:32:21 +00:00
|
|
|
if (mdio->md_mediasize < sc->mediasize &&
|
|
|
|
!(sc->flags & MD_FORCE) &&
|
|
|
|
!(mdio->md_options & MD_FORCE))
|
|
|
|
return (EBUSY);
|
|
|
|
return (mdresize(sc, mdio));
|
2001-02-25 13:12:57 +00:00
|
|
|
case MDIOCQUERY:
|
|
|
|
sc = mdfind(mdio->md_unit);
|
|
|
|
if (sc == NULL)
|
|
|
|
return (ENOENT);
|
|
|
|
mdio->md_type = sc->type;
|
|
|
|
mdio->md_options = sc->flags;
|
2004-09-16 21:32:13 +00:00
|
|
|
mdio->md_mediasize = sc->mediasize;
|
|
|
|
mdio->md_sectorsize = sc->sectorsize;
|
2005-09-19 06:55:27 +00:00
|
|
|
if (sc->type == MD_VNODE)
|
2004-12-27 17:20:06 +00:00
|
|
|
error = copyout(sc->file, mdio->md_file,
|
|
|
|
strlen(sc->file) + 1);
|
2005-09-19 06:55:27 +00:00
|
|
|
return (error);
|
2003-01-27 07:58:18 +00:00
|
|
|
case MDIOCLIST:
|
|
|
|
i = 1;
|
|
|
|
LIST_FOREACH(sc, &md_softc_list, list) {
|
|
|
|
if (i == MDNPAD - 1)
|
|
|
|
mdio->md_pad[i] = -1;
|
|
|
|
else
|
|
|
|
mdio->md_pad[i++] = sc->unit;
|
|
|
|
}
|
|
|
|
mdio->md_pad[0] = i - 1;
|
|
|
|
return (0);
|
2000-12-31 13:03:42 +00:00
|
|
|
default:
|
|
|
|
return (ENOIOCTL);
|
|
|
|
};
|
2005-09-19 06:55:27 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
mdctlioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td)
|
|
|
|
{
|
2010-07-26 10:37:14 +00:00
|
|
|
int error;
|
2005-09-19 06:55:27 +00:00
|
|
|
|
|
|
|
sx_xlock(&md_sx);
|
|
|
|
error = xmdctlioctl(dev, cmd, addr, flags, td);
|
|
|
|
sx_xunlock(&md_sx);
|
|
|
|
return (error);
|
2000-08-20 21:34:39 +00:00
|
|
|
}
|
|
|
|
|
2001-01-02 09:42:47 +00:00
|
|
|
static void
|
2012-11-21 17:05:57 +00:00
|
|
|
md_preloaded(u_char *image, size_t length, const char *name)
|
2001-01-02 09:42:47 +00:00
|
|
|
{
|
|
|
|
struct md_s *sc;
|
2005-09-19 06:55:27 +00:00
|
|
|
int error;
|
2001-01-02 09:42:47 +00:00
|
|
|
|
2005-10-06 19:47:04 +00:00
|
|
|
sc = mdnew(-1, &error, MD_PRELOAD);
|
2001-01-02 09:42:47 +00:00
|
|
|
if (sc == NULL)
|
|
|
|
return;
|
2004-09-16 21:32:13 +00:00
|
|
|
sc->mediasize = length;
|
|
|
|
sc->sectorsize = DEV_BSIZE;
|
2001-01-02 09:42:47 +00:00
|
|
|
sc->pl_ptr = image;
|
|
|
|
sc->pl_len = length;
|
2005-09-19 06:55:27 +00:00
|
|
|
sc->start = mdstart_preload;
|
2004-03-31 21:48:02 +00:00
|
|
|
#ifdef MD_ROOT
|
2002-01-21 20:57:03 +00:00
|
|
|
if (sc->unit == 0)
|
2012-11-03 21:20:55 +00:00
|
|
|
rootdevnames[0] = MD_ROOT_FSTYPE ":/dev/md0";
|
2004-03-31 21:48:02 +00:00
|
|
|
#endif
|
2001-01-02 09:42:47 +00:00
|
|
|
mdinit(sc);
|
2012-11-21 17:05:57 +00:00
|
|
|
if (name != NULL) {
|
|
|
|
printf("%s%d: Preloaded image <%s> %zd bytes at %p\n",
|
|
|
|
MD_NAME, sc->unit, name, length, image);
|
|
|
|
}
|
2001-01-02 09:42:47 +00:00
|
|
|
}
|
|
|
|
|
1999-09-21 11:00:49 +00:00
|
|
|
static void
|
2004-08-08 06:47:43 +00:00
|
|
|
g_md_init(struct g_class *mp __unused)
|
1999-09-21 11:00:49 +00:00
|
|
|
{
|
1999-11-20 22:43:27 +00:00
|
|
|
caddr_t mod;
|
|
|
|
u_char *ptr, *name, *type;
|
|
|
|
unsigned len;
|
2010-07-06 18:22:57 +00:00
|
|
|
int i;
|
|
|
|
|
|
|
|
/* figure out log2(NINDIR) */
|
|
|
|
for (i = NINDIR, nshift = -1; i; nshift++)
|
|
|
|
i >>= 1;
|
1999-11-20 22:43:27 +00:00
|
|
|
|
2003-12-13 18:12:58 +00:00
|
|
|
mod = NULL;
|
2005-09-19 06:55:27 +00:00
|
|
|
sx_init(&md_sx, "MD config lock");
|
2003-12-13 18:12:58 +00:00
|
|
|
g_topology_unlock();
|
2010-07-22 10:24:28 +00:00
|
|
|
md_uh = new_unrhdr(0, INT_MAX, NULL);
|
1999-11-26 20:08:44 +00:00
|
|
|
#ifdef MD_ROOT_SIZE
|
2005-09-19 06:55:27 +00:00
|
|
|
sx_xlock(&md_sx);
|
2012-11-21 17:05:57 +00:00
|
|
|
md_preloaded(mfs_root.start, sizeof(mfs_root.start), NULL);
|
2005-09-19 06:55:27 +00:00
|
|
|
sx_xunlock(&md_sx);
|
1999-11-26 20:08:44 +00:00
|
|
|
#endif
|
2005-09-19 06:55:27 +00:00
|
|
|
/* XXX: are preload_* static or do they need Giant ? */
|
1999-11-20 22:43:27 +00:00
|
|
|
while ((mod = preload_search_next_name(mod)) != NULL) {
|
|
|
|
name = (char *)preload_search_info(mod, MODINFO_NAME);
|
|
|
|
if (name == NULL)
|
|
|
|
continue;
|
2005-09-19 06:55:27 +00:00
|
|
|
type = (char *)preload_search_info(mod, MODINFO_TYPE);
|
1999-11-20 22:43:27 +00:00
|
|
|
if (type == NULL)
|
|
|
|
continue;
|
1999-11-26 20:08:44 +00:00
|
|
|
if (strcmp(type, "md_image") && strcmp(type, "mfs_root"))
|
1999-11-20 22:43:27 +00:00
|
|
|
continue;
|
2011-02-09 19:31:10 +00:00
|
|
|
ptr = preload_fetch_addr(mod);
|
|
|
|
len = preload_fetch_size(mod);
|
|
|
|
if (ptr != NULL && len != 0) {
|
|
|
|
sx_xlock(&md_sx);
|
2012-11-21 17:05:57 +00:00
|
|
|
md_preloaded(ptr, len, name);
|
2011-02-09 19:31:10 +00:00
|
|
|
sx_xunlock(&md_sx);
|
|
|
|
}
|
2002-01-21 20:57:03 +00:00
|
|
|
}
|
2013-03-19 14:53:23 +00:00
|
|
|
md_vnode_pbuf_freecnt = nswbuf / 10;
|
2008-05-29 12:50:46 +00:00
|
|
|
status_dev = make_dev(&mdctl_cdevsw, INT_MAX, UID_ROOT, GID_WHEEL,
|
2001-07-18 13:32:38 +00:00
|
|
|
0600, MDCTL_NAME);
|
2003-11-18 18:19:26 +00:00
|
|
|
g_topology_lock();
|
2001-02-24 16:26:41 +00:00
|
|
|
}
|
|
|
|
|
2006-03-26 23:21:11 +00:00
|
|
|
static void
|
2010-07-26 10:37:14 +00:00
|
|
|
g_md_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
|
2006-03-26 23:21:11 +00:00
|
|
|
struct g_consumer *cp __unused, struct g_provider *pp)
|
|
|
|
{
|
|
|
|
struct md_s *mp;
|
|
|
|
char *type;
|
|
|
|
|
|
|
|
mp = gp->softc;
|
|
|
|
if (mp == NULL)
|
|
|
|
return;
|
|
|
|
|
|
|
|
switch (mp->type) {
|
|
|
|
case MD_MALLOC:
|
|
|
|
type = "malloc";
|
|
|
|
break;
|
|
|
|
case MD_PRELOAD:
|
|
|
|
type = "preload";
|
|
|
|
break;
|
|
|
|
case MD_VNODE:
|
|
|
|
type = "vnode";
|
|
|
|
break;
|
|
|
|
case MD_SWAP:
|
|
|
|
type = "swap";
|
|
|
|
break;
|
2013-12-04 07:38:23 +00:00
|
|
|
case MD_NULL:
|
|
|
|
type = "null";
|
|
|
|
break;
|
2006-03-26 23:21:11 +00:00
|
|
|
default:
|
|
|
|
type = "unknown";
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (pp != NULL) {
|
|
|
|
if (indent == NULL) {
|
|
|
|
sbuf_printf(sb, " u %d", mp->unit);
|
|
|
|
sbuf_printf(sb, " s %ju", (uintmax_t) mp->sectorsize);
|
|
|
|
sbuf_printf(sb, " f %ju", (uintmax_t) mp->fwheads);
|
|
|
|
sbuf_printf(sb, " fs %ju", (uintmax_t) mp->fwsectors);
|
|
|
|
sbuf_printf(sb, " l %ju", (uintmax_t) mp->mediasize);
|
|
|
|
sbuf_printf(sb, " t %s", type);
|
|
|
|
if (mp->type == MD_VNODE && mp->vnode != NULL)
|
|
|
|
sbuf_printf(sb, " file %s", mp->file);
|
|
|
|
} else {
|
|
|
|
sbuf_printf(sb, "%s<unit>%d</unit>\n", indent,
|
|
|
|
mp->unit);
|
|
|
|
sbuf_printf(sb, "%s<sectorsize>%ju</sectorsize>\n",
|
|
|
|
indent, (uintmax_t) mp->sectorsize);
|
|
|
|
sbuf_printf(sb, "%s<fwheads>%ju</fwheads>\n",
|
|
|
|
indent, (uintmax_t) mp->fwheads);
|
|
|
|
sbuf_printf(sb, "%s<fwsectors>%ju</fwsectors>\n",
|
|
|
|
indent, (uintmax_t) mp->fwsectors);
|
|
|
|
sbuf_printf(sb, "%s<length>%ju</length>\n",
|
|
|
|
indent, (uintmax_t) mp->mediasize);
|
2011-10-31 10:53:27 +00:00
|
|
|
sbuf_printf(sb, "%s<compression>%s</compression>\n", indent,
|
|
|
|
(mp->flags & MD_COMPRESS) == 0 ? "off": "on");
|
|
|
|
sbuf_printf(sb, "%s<access>%s</access>\n", indent,
|
|
|
|
(mp->flags & MD_READONLY) == 0 ? "read-write":
|
|
|
|
"read-only");
|
2006-03-26 23:21:11 +00:00
|
|
|
sbuf_printf(sb, "%s<type>%s</type>\n", indent,
|
|
|
|
type);
|
2015-08-13 13:20:29 +00:00
|
|
|
if (mp->type == MD_VNODE && mp->vnode != NULL) {
|
|
|
|
sbuf_printf(sb, "%s<file>", indent);
|
|
|
|
g_conf_printf_escaped(sb, "%s", mp->file);
|
|
|
|
sbuf_printf(sb, "</file>\n");
|
|
|
|
}
|
2006-03-26 23:21:11 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2004-08-08 06:47:43 +00:00
|
|
|
static void
|
|
|
|
g_md_fini(struct g_class *mp __unused)
|
2001-02-24 16:26:41 +00:00
|
|
|
{
|
2001-08-27 13:25:47 +00:00
|
|
|
|
2005-09-19 06:55:27 +00:00
|
|
|
sx_destroy(&md_sx);
|
2004-08-08 06:47:43 +00:00
|
|
|
if (status_dev != NULL)
|
|
|
|
destroy_dev(status_dev);
|
2010-07-22 10:24:28 +00:00
|
|
|
delete_unrhdr(md_uh);
|
1999-09-21 11:00:49 +00:00
|
|
|
}
|