3289 lines
85 KiB
C
3289 lines
85 KiB
C
/*-
|
|
* Copyright (c) 2002 Scott Long <scottl@freebsd.org>
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*
|
|
* $FreeBSD$
|
|
*/
|
|
|
|
/* $NetBSD: rf_netbsdkintf.c,v 1.105 2001/04/05 02:48:51 oster Exp $ */
|
|
/*-
|
|
* Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
|
|
* All rights reserved.
|
|
*
|
|
* This code is derived from software contributed to The NetBSD Foundation
|
|
* by Greg Oster; Jason R. Thorpe.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* 3. All advertising materials mentioning features or use of this software
|
|
* must display the following acknowledgement:
|
|
* This product includes software developed by the NetBSD
|
|
* Foundation, Inc. and its contributors.
|
|
* 4. Neither the name of The NetBSD Foundation nor the names of its
|
|
* contributors may be used to endorse or promote products derived
|
|
* from this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
|
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
|
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
|
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
/*
|
|
* Copyright (c) 1988 University of Utah.
|
|
* Copyright (c) 1990, 1993
|
|
* The Regents of the University of California. All rights reserved.
|
|
*
|
|
* This code is derived from software contributed to Berkeley by
|
|
* the Systems Programming Group of the University of Utah Computer
|
|
* Science Department.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* 3. All advertising materials mentioning features or use of this software
|
|
* must display the following acknowledgement:
|
|
* This product includes software developed by the University of
|
|
* California, Berkeley and its contributors.
|
|
* 4. Neither the name of the University nor the names of its contributors
|
|
* may be used to endorse or promote products derived from this software
|
|
* without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*
|
|
* from: Utah $Hdr: cd.c 1.6 90/11/28$
|
|
*
|
|
* @(#)cd.c 8.2 (Berkeley) 11/16/93
|
|
*/
|
|
|
|
|
|
|
|
|
|
/*
|
|
* Copyright (c) 1995 Carnegie-Mellon University.
|
|
* All rights reserved.
|
|
*
|
|
* Authors: Mark Holland, Jim Zelenka
|
|
*
|
|
* Permission to use, copy, modify and distribute this software and
|
|
* its documentation is hereby granted, provided that both the copyright
|
|
* notice and this permission notice appear in all copies of the
|
|
* software, derivative works or modified versions, and any portions
|
|
* thereof, and that both notices appear in supporting documentation.
|
|
*
|
|
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
|
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
|
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
|
*
|
|
* Carnegie Mellon requests users of this software to return to
|
|
*
|
|
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
|
* School of Computer Science
|
|
* Carnegie Mellon University
|
|
* Pittsburgh PA 15213-3890
|
|
*
|
|
* any improvements or extensions that they make and grant Carnegie the
|
|
* rights to redistribute these changes.
|
|
*/
|
|
|
|
/***********************************************************
|
|
*
|
|
* rf_kintf.c -- the kernel interface routines for RAIDframe
|
|
*
|
|
***********************************************************/
|
|
|
|
#include <sys/types.h>
|
|
#include <sys/param.h>
|
|
#include <sys/errno.h>
|
|
#include <sys/param.h>
|
|
#include <sys/queue.h>
|
|
#include <sys/stat.h>
|
|
#include <sys/ioccom.h>
|
|
#include <sys/filio.h>
|
|
#include <sys/filedesc.h>
|
|
#include <sys/fcntl.h>
|
|
#include <sys/systm.h>
|
|
#include <sys/namei.h>
|
|
#include <sys/vnode.h>
|
|
#include <sys/bio.h>
|
|
#include <sys/buf.h>
|
|
#include <sys/disk.h>
|
|
#include <sys/diskslice.h>
|
|
#include <sys/disklabel.h>
|
|
#include <sys/conf.h>
|
|
#include <sys/lock.h>
|
|
#include <sys/reboot.h>
|
|
#include <sys/module.h>
|
|
#include <sys/devicestat.h>
|
|
#include <vm/uma.h>
|
|
|
|
#include "opt_raid.h"
|
|
#include <dev/raidframe/rf_raid.h>
|
|
#include <dev/raidframe/rf_raidframe.h>
|
|
#include <dev/raidframe/rf_copyback.h>
|
|
#include <dev/raidframe/rf_dag.h>
|
|
#include <dev/raidframe/rf_dagflags.h>
|
|
#include <dev/raidframe/rf_desc.h>
|
|
#include <dev/raidframe/rf_diskqueue.h>
|
|
#include <dev/raidframe/rf_acctrace.h>
|
|
#include <dev/raidframe/rf_etimer.h>
|
|
#include <dev/raidframe/rf_general.h>
|
|
#include <dev/raidframe/rf_debugMem.h>
|
|
#include <dev/raidframe/rf_kintf.h>
|
|
#include <dev/raidframe/rf_options.h>
|
|
#include <dev/raidframe/rf_driver.h>
|
|
#include <dev/raidframe/rf_parityscan.h>
|
|
#include <dev/raidframe/rf_debugprint.h>
|
|
#include <dev/raidframe/rf_threadstuff.h>
|
|
#include <dev/raidframe/rf_configure.h>
|
|
|
|
RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
|
|
|
|
static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
|
|
* spare table */
|
|
static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
|
|
* installation process */
|
|
|
|
/* prototypes */
|
|
static void KernelWakeupFunc(struct bio *);
|
|
static void InitBP(struct bio *, struct vnode *, unsigned rw_flag,
|
|
dev_t dev, RF_SectorNum_t startSect,
|
|
RF_SectorCount_t numSect, caddr_t buf,
|
|
void (*cbFunc) (struct bio *), void *cbArg,
|
|
int logBytesPerSector, struct proc * b_proc);
|
|
static dev_t raidinit(RF_Raid_t *);
|
|
static void rf_search_label(dev_t, struct disklabel *,
|
|
RF_AutoConfig_t **) __unused;
|
|
|
|
static int raid_modevent(module_t, int, void*);
|
|
void raidattach(void);
|
|
d_psize_t raidsize;
|
|
d_open_t raidopen;
|
|
d_close_t raidclose;
|
|
d_ioctl_t raidioctl;
|
|
d_write_t raidwrite;
|
|
d_read_t raidread;
|
|
d_strategy_t raidstrategy;
|
|
#if 0
|
|
d_dump_t raiddump;
|
|
#endif
|
|
|
|
d_open_t raidctlopen;
|
|
d_close_t raidctlclose;
|
|
d_ioctl_t raidctlioctl;
|
|
|
|
static struct cdevsw raid_cdevsw = {
|
|
raidopen,
|
|
raidclose,
|
|
raidread,
|
|
raidwrite,
|
|
raidioctl,
|
|
nopoll,
|
|
nommap,
|
|
raidstrategy,
|
|
"raid",
|
|
200,
|
|
nodump,
|
|
nopsize,
|
|
D_DISK,
|
|
};
|
|
|
|
static struct cdevsw raidctl_cdevsw = {
|
|
raidctlopen,
|
|
raidctlclose,
|
|
noread,
|
|
nowrite,
|
|
raidctlioctl,
|
|
nopoll,
|
|
nommap,
|
|
nostrategy,
|
|
"raidctl",
|
|
201,
|
|
nodump,
|
|
nopsize,
|
|
0,
|
|
};
|
|
|
|
static struct cdevsw raiddisk_cdevsw;
|
|
|
|
/*
|
|
* Pilfered from ccd.c
|
|
*/
|
|
|
|
struct raidbuf {
|
|
struct bio rf_buf; /* new I/O buf. MUST BE FIRST!!! */
|
|
struct bio *rf_obp; /* ptr. to original I/O buf */
|
|
int rf_flags; /* misc. flags */
|
|
RF_DiskQueueData_t *req;/* the request that this was part of.. */
|
|
};
|
|
|
|
|
|
#define RAIDGETBUF(sc) uma_zalloc((sc)->sc_cbufpool, M_NOWAIT)
|
|
#define RAIDPUTBUF(sc, cbp) uma_zfree((sc)->sc_cbufpool, cbp)
|
|
|
|
#define RF_MAX_ARRAYS 32
|
|
|
|
/* Raid control device */
|
|
struct raidctl_softc {
|
|
dev_t sc_dev; /* Device node */
|
|
int sc_flags; /* flags */
|
|
int sc_numraid; /* Number of configured raid devices */
|
|
dev_t sc_raiddevs[RF_MAX_ARRAYS];
|
|
};
|
|
|
|
struct raid_softc {
|
|
dev_t sc_dev; /* Our device */
|
|
dev_t sc_parent_dev;
|
|
int sc_flags; /* flags */
|
|
int sc_busycount; /* How many times are we opened? */
|
|
size_t sc_size; /* size of the raid device */
|
|
dev_t sc_parent; /* Parent device */
|
|
struct disk sc_dkdev; /* generic disk device info */
|
|
uma_zone_t sc_cbufpool; /* component buffer pool */
|
|
RF_Raid_t *raidPtr; /* Raid information struct */
|
|
struct bio_queue_head bio_queue; /* used for the device queue */
|
|
struct devstat device_stats; /* devstat gathering */
|
|
};
|
|
/* sc_flags */
|
|
#define RAIDF_OPEN 0x01 /* unit has been initialized */
|
|
#define RAIDF_WLABEL 0x02 /* label area is writable */
|
|
#define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
|
|
#define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
|
|
#define RAIDF_LOCKED 0x80 /* unit is locked */
|
|
|
|
/*
|
|
* Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
|
|
* Be aware that large numbers can allow the driver to consume a lot of
|
|
* kernel memory, especially on writes, and in degraded mode reads.
|
|
*
|
|
* For example: with a stripe width of 64 blocks (32k) and 5 disks,
|
|
* a single 64K write will typically require 64K for the old data,
|
|
* 64K for the old parity, and 64K for the new parity, for a total
|
|
* of 192K (if the parity buffer is not re-used immediately).
|
|
* Even it if is used immedately, that's still 128K, which when multiplied
|
|
* by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
|
|
*
|
|
* Now in degraded mode, for example, a 64K read on the above setup may
|
|
* require data reconstruction, which will require *all* of the 4 remaining
|
|
* disks to participate -- 4 * 32K/disk == 128K again.
|
|
*/
|
|
|
|
#ifndef RAIDOUTSTANDING
|
|
#define RAIDOUTSTANDING 10
|
|
#endif
|
|
|
|
#define RAIDLABELDEV(dev) dkmodpart(dev, RAW_PART)
|
|
#define DISKPART(dev) dkpart(dev)
|
|
|
|
static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *, struct disk*);
|
|
static int raidlock(struct raid_softc *);
|
|
static void raidunlock(struct raid_softc *);
|
|
|
|
static void rf_markalldirty(RF_Raid_t *);
|
|
|
|
static dev_t raidctl_dev;
|
|
|
|
void rf_ReconThread(struct rf_recon_req *);
|
|
/* XXX what I want is: */
|
|
/*void rf_ReconThread(RF_Raid_t *raidPtr); */
|
|
void rf_RewriteParityThread(RF_Raid_t *raidPtr);
|
|
void rf_CopybackThread(RF_Raid_t *raidPtr);
|
|
void rf_ReconstructInPlaceThread(struct rf_recon_req *);
|
|
void rf_buildroothack(void *, struct raidctl_softc *);
|
|
|
|
RF_AutoConfig_t *rf_find_raid_components(void);
|
|
RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
|
|
static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
|
|
static int rf_reasonable_label(RF_ComponentLabel_t *);
|
|
void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
|
|
int rf_set_autoconfig(RF_Raid_t *, int);
|
|
int rf_set_rootpartition(RF_Raid_t *, int);
|
|
void rf_release_all_vps(RF_ConfigSet_t *);
|
|
void rf_cleanup_config_set(RF_ConfigSet_t *);
|
|
int rf_have_enough_components(RF_ConfigSet_t *);
|
|
int rf_auto_config_set(RF_ConfigSet_t *, int *, struct raidctl_softc *);
|
|
static int raidgetunit(struct raidctl_softc *, int);
|
|
static int raidshutdown(void);
|
|
|
|
void
|
|
raidattach(void)
|
|
{
|
|
struct raidctl_softc *parent_sc = NULL;
|
|
RF_AutoConfig_t *ac_list; /* autoconfig list */
|
|
RF_ConfigSet_t *config_sets;
|
|
int autoconfig = 0;
|
|
|
|
/* This is where all the initialization stuff gets done. */
|
|
|
|
if(rf_mutex_init(&rf_sparet_wait_mutex, __FUNCTION__)) {
|
|
rf_printf(0, "RAIDframe: failed to initialize mutexes\n");
|
|
return;
|
|
}
|
|
|
|
rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
|
|
|
|
if (rf_BootRaidframe() != 0) {
|
|
rf_printf(0, "Serious error booting RAIDframe!!\n");
|
|
return;
|
|
}
|
|
|
|
rf_printf(0, "Kernelized RAIDframe activated\n");
|
|
MALLOC(parent_sc, struct raidctl_softc *, sizeof(*parent_sc),
|
|
M_RAIDFRAME, M_NOWAIT|M_ZERO);
|
|
if (parent_sc == NULL) {
|
|
RF_PANIC();
|
|
return;
|
|
}
|
|
|
|
parent_sc->sc_dev= make_dev(&raidctl_cdevsw, 0, UID_ROOT, GID_WHEEL,
|
|
0600, "raidctl");
|
|
parent_sc->sc_dev->si_drv1 = parent_sc;
|
|
raidctl_dev = parent_sc->sc_dev;
|
|
|
|
#if RAID_AUTOCONFIG
|
|
autoconfig = 1;
|
|
#endif
|
|
|
|
if (autoconfig) {
|
|
/* 1. locate all RAID components on the system */
|
|
|
|
rf_printf(0, "Searching for raid components...\n");
|
|
ac_list = rf_find_raid_components();
|
|
if (ac_list == NULL)
|
|
return;
|
|
|
|
/* 2. sort them into their respective sets */
|
|
|
|
config_sets = rf_create_auto_sets(ac_list);
|
|
|
|
/* 3. evaluate each set and configure the valid ones
|
|
This gets done in rf_buildroothack() */
|
|
|
|
/* schedule the creation of the thread to do the
|
|
"/ on RAID" stuff */
|
|
|
|
rf_buildroothack(config_sets, parent_sc);
|
|
#if 0
|
|
kthread_create(rf_buildroothack,config_sets);
|
|
|
|
#endif /* RAID_AUTOCONFIG */
|
|
}
|
|
}
|
|
|
|
void
|
|
rf_buildroothack(arg, parent_sc)
|
|
void *arg;
|
|
struct raidctl_softc *parent_sc;
|
|
{
|
|
RF_ConfigSet_t *config_sets = arg;
|
|
RF_ConfigSet_t *cset;
|
|
RF_ConfigSet_t *next_cset;
|
|
int retcode;
|
|
int raidID;
|
|
int rootID;
|
|
int num_root;
|
|
|
|
rootID = 0;
|
|
num_root = 0;
|
|
cset = config_sets;
|
|
while(cset != NULL ) {
|
|
next_cset = cset->next;
|
|
if (rf_have_enough_components(cset) &&
|
|
cset->ac->clabel->autoconfigure==1) {
|
|
retcode = rf_auto_config_set(cset, &raidID, parent_sc);
|
|
if (!retcode) {
|
|
if (cset->rootable) {
|
|
rootID = raidID;
|
|
num_root++;
|
|
}
|
|
} else {
|
|
/* The autoconfig didn't work :( */
|
|
rf_printf(1, "Autoconfig failed with code %d"
|
|
"for raid%d\n", retcode, raidID);
|
|
rf_release_all_vps(cset);
|
|
}
|
|
} else {
|
|
/* we're not autoconfiguring this set...
|
|
release the associated resources */
|
|
rf_release_all_vps(cset);
|
|
}
|
|
/* cleanup */
|
|
rf_cleanup_config_set(cset);
|
|
cset = next_cset;
|
|
}
|
|
if (boothowto & RB_ASKNAME) {
|
|
/* We don't auto-config... */
|
|
} else {
|
|
/* They didn't ask, and we found something bootable... */
|
|
|
|
#if 0
|
|
if (num_root == 1) {
|
|
booted_device = &raidrootdev[rootID];
|
|
} else if (num_root > 1) {
|
|
/* we can't guess.. require the user to answer... */
|
|
boothowto |= RB_ASKNAME;
|
|
}
|
|
#endif
|
|
}
|
|
}
|
|
|
|
int
|
|
raidctlopen(dev_t dev, int flags, int fmt, struct thread *td)
|
|
{
|
|
struct raidctl_softc *parent_sc;
|
|
|
|
parent_sc = dev->si_drv1;
|
|
|
|
if ((parent_sc->sc_flags & RAIDF_OPEN) != 0)
|
|
return (EBUSY);
|
|
|
|
parent_sc->sc_flags |= RAIDF_OPEN;
|
|
return (0);
|
|
}
|
|
|
|
int
|
|
raidctlclose(dev_t dev, int flags, int fmt, struct thread *td)
|
|
{
|
|
struct raidctl_softc *parent_sc;
|
|
|
|
parent_sc = dev->si_drv1;
|
|
|
|
parent_sc->sc_flags &= ~RAIDF_OPEN;
|
|
return (0);
|
|
}
|
|
|
|
int
|
|
raidctlioctl(dev_t dev, u_long cmd, caddr_t data, int flags, struct thread *td)
|
|
{
|
|
struct raidctl_softc *parent_sc;
|
|
struct raid_softc *sc;
|
|
RF_Config_t *u_cfg, *k_cfg;
|
|
RF_Raid_t *raidPtr;
|
|
u_char *specific_buf;
|
|
u_int unit;
|
|
int retcode = 0;
|
|
|
|
parent_sc = dev->si_drv1;
|
|
|
|
switch (cmd) {
|
|
/* configure the system */
|
|
case RAIDFRAME_CONFIGURE:
|
|
|
|
/* copy-in the configuration information */
|
|
/* data points to a pointer to the configuration structure */
|
|
|
|
u_cfg = *((RF_Config_t **) data);
|
|
RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
|
|
if (k_cfg == NULL) {
|
|
return (ENOMEM);
|
|
}
|
|
retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
|
|
sizeof(RF_Config_t));
|
|
if (retcode) {
|
|
RF_Free(k_cfg, sizeof(RF_Config_t));
|
|
rf_printf(2, "raidctlioctl: retcode=%d copyin.1\n",
|
|
retcode);
|
|
return (retcode);
|
|
}
|
|
/* allocate a buffer for the layout-specific data, and copy it
|
|
* in */
|
|
if (k_cfg->layoutSpecificSize) {
|
|
if (k_cfg->layoutSpecificSize > 10000) {
|
|
/* sanity check */
|
|
RF_Free(k_cfg, sizeof(RF_Config_t));
|
|
return (EINVAL);
|
|
}
|
|
RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
|
|
(u_char *));
|
|
if (specific_buf == NULL) {
|
|
RF_Free(k_cfg, sizeof(RF_Config_t));
|
|
return (ENOMEM);
|
|
}
|
|
retcode = copyin(k_cfg->layoutSpecific,
|
|
(caddr_t) specific_buf,
|
|
k_cfg->layoutSpecificSize);
|
|
if (retcode) {
|
|
RF_Free(k_cfg, sizeof(RF_Config_t));
|
|
RF_Free(specific_buf,
|
|
k_cfg->layoutSpecificSize);
|
|
rf_printf(2, "raidctlioctl: retcode=%d "
|
|
"copyin.2\n", retcode);
|
|
return (retcode);
|
|
}
|
|
} else
|
|
specific_buf = NULL;
|
|
k_cfg->layoutSpecific = specific_buf;
|
|
|
|
/* should do some kind of sanity check on the configuration.
|
|
* Store the sum of all the bytes in the last byte? */
|
|
|
|
/* configure the system */
|
|
|
|
RF_Malloc(raidPtr, sizeof(*raidPtr), (RF_Raid_t *));
|
|
if (raidPtr == NULL) {
|
|
rf_printf(0, "No memory for raid device\n");
|
|
RF_Free(k_cfg, sizeof(RF_Config_t));
|
|
retcode = ENOMEM;
|
|
}
|
|
bzero((char *) raidPtr, sizeof(RF_Raid_t));
|
|
|
|
/* Request a unit number for this soon-to-be device. */
|
|
unit = raidgetunit(parent_sc, 0);
|
|
if (unit == -1) {
|
|
rf_printf(0, "Cannot allocate raid unit\n");
|
|
RF_Free(raidPtr, sizeof(*raidPtr));
|
|
goto out;
|
|
}
|
|
raidPtr->raidid = unit;
|
|
|
|
if ((retcode = rf_Configure(raidPtr, k_cfg, NULL)) == 0) {
|
|
|
|
/* allow this many simultaneous IO's to
|
|
this RAID device */
|
|
raidPtr->openings = RAIDOUTSTANDING;
|
|
|
|
parent_sc->sc_raiddevs[unit] = raidinit(raidPtr);
|
|
if (parent_sc->sc_raiddevs[unit] == NULL) {
|
|
rf_printf(0, "Could not create raid device\n");
|
|
RF_Free(raidPtr, sizeof(*raidPtr));
|
|
goto out;
|
|
}
|
|
parent_sc->sc_numraid++;
|
|
((struct raid_softc *)raidPtr->sc)->sc_parent_dev = dev;
|
|
rf_markalldirty(raidPtr);
|
|
} else {
|
|
parent_sc->sc_raiddevs[unit] = NULL;
|
|
RF_Free(raidPtr, sizeof(*raidPtr));
|
|
}
|
|
|
|
out:
|
|
/* free the buffers. No return code here. */
|
|
if (k_cfg->layoutSpecificSize) {
|
|
RF_Free(specific_buf, k_cfg->layoutSpecificSize);
|
|
}
|
|
RF_Free(k_cfg, sizeof(RF_Config_t));
|
|
break;
|
|
|
|
case RAIDFRAME_SHUTDOWN:
|
|
|
|
unit = *(u_int *)data;
|
|
if ((unit >= RF_MAX_ARRAYS) ||
|
|
(parent_sc->sc_raiddevs[unit] == NULL))
|
|
return (EINVAL);
|
|
|
|
sc = parent_sc->sc_raiddevs[unit]->si_drv1;
|
|
if ((retcode = raidlock(sc)) != 0)
|
|
return (retcode);
|
|
|
|
/*
|
|
* If somebody has a partition mounted, we shouldn't
|
|
* shutdown.
|
|
*/
|
|
|
|
if ((sc->sc_flags & RAIDF_OPEN) != 0) {
|
|
raidunlock(sc);
|
|
return (EBUSY);
|
|
}
|
|
|
|
rf_printf(0, "Shutting down RAIDframe engine\n");
|
|
retcode = rf_Shutdown(sc->raidPtr);
|
|
RF_THREADGROUP_WAIT_STOP(&sc->raidPtr->engine_tg);
|
|
|
|
devstat_remove_entry(&sc->device_stats);
|
|
|
|
disk_destroy(parent_sc->sc_raiddevs[unit]);
|
|
raidunlock(sc);
|
|
|
|
/* XXX Need to be able to destroy the zone */
|
|
uma_zdestroy(sc->sc_cbufpool);
|
|
|
|
parent_sc->sc_numraid--;
|
|
parent_sc->sc_raiddevs[unit] = NULL;
|
|
|
|
RF_Free(sc->raidPtr, sizeof(*raidPtr));
|
|
RF_Free(sc, sizeof(*sc));
|
|
|
|
break;
|
|
|
|
default:
|
|
retcode = ENOIOCTL;
|
|
}
|
|
|
|
return (retcode);
|
|
}
|
|
|
|
#if 0 /* XXX DUMP!!!! */
|
|
int
|
|
raiddump(dev)
|
|
dev_t dev;
|
|
{
|
|
/* Not implemented. */
|
|
return ENXIO;
|
|
}
|
|
#endif
|
|
|
|
/* ARGSUSED */
|
|
int
|
|
raidopen(dev, flags, fmt, td)
|
|
dev_t dev;
|
|
int flags, fmt;
|
|
struct thread *td;
|
|
{
|
|
struct raid_softc *sc;
|
|
struct disk *dp;
|
|
int error = 0;
|
|
|
|
sc = dev->si_drv1;
|
|
|
|
if ((error = raidlock(sc)) != 0)
|
|
return (error);
|
|
dp = &sc->sc_dkdev;
|
|
|
|
rf_printf(1, "Opening raid device %s\n", dev->si_name);
|
|
|
|
/* Generate overall disklabel */
|
|
raidgetdefaultlabel(sc->raidPtr, sc, dp);
|
|
|
|
if (sc->sc_busycount == 0) {
|
|
/* First one... mark things as dirty... Note that we *MUST*
|
|
have done a configure before this. I DO NOT WANT TO BE
|
|
SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
|
|
THAT THEY BELONG TOGETHER!!!!! */
|
|
/* XXX should check to see if we're only open for reading
|
|
here... If so, we needn't do this, but then need some
|
|
other way of keeping track of what's happened.. */
|
|
|
|
rf_markalldirty( sc->raidPtr );
|
|
sc->sc_flags |= RAIDF_OPEN;
|
|
}
|
|
|
|
/* Prevent this unit from being unconfigured while open. */
|
|
sc->sc_busycount++;
|
|
|
|
raidunlock(sc);
|
|
|
|
return (error);
|
|
|
|
|
|
}
|
|
/* ARGSUSED */
|
|
int
|
|
raidclose(dev, flags, fmt, td)
|
|
dev_t dev;
|
|
int flags, fmt;
|
|
struct thread *td;
|
|
{
|
|
struct raid_softc *sc;
|
|
int error = 0;
|
|
|
|
sc = dev->si_drv1;
|
|
|
|
if ((error = raidlock(sc)) != 0)
|
|
return (error);
|
|
|
|
sc->sc_busycount--;
|
|
if (sc->sc_busycount == 0) {
|
|
sc->sc_flags &= ~RAIDF_OPEN;
|
|
rf_update_component_labels(sc->raidPtr,
|
|
RF_FINAL_COMPONENT_UPDATE);
|
|
}
|
|
|
|
raidunlock(sc);
|
|
return (0);
|
|
|
|
}
|
|
|
|
void
|
|
raidstrategy(bp)
|
|
struct bio *bp;
|
|
{
|
|
RF_Raid_t *raidPtr;
|
|
struct raid_softc *sc = bp->bio_dev->si_drv1;
|
|
int s;
|
|
|
|
raidPtr = sc->raidPtr;
|
|
if (raidPtr == NULL) {
|
|
bp->bio_error = ENODEV;
|
|
bp->bio_flags |= BIO_ERROR;
|
|
bp->bio_resid = bp->bio_bcount;
|
|
biodone(bp);
|
|
return;
|
|
}
|
|
if (!raidPtr->valid) {
|
|
bp->bio_error = ENODEV;
|
|
bp->bio_flags |= BIO_ERROR;
|
|
bp->bio_resid = bp->bio_bcount;
|
|
biodone(bp);
|
|
return;
|
|
}
|
|
if (bp->bio_bcount == 0) {
|
|
rf_printf(2, "b_bcount is zero..\n");
|
|
biodone(bp);
|
|
return;
|
|
}
|
|
|
|
s = splbio();
|
|
|
|
bp->bio_resid = 0;
|
|
|
|
/* stuff it onto our queue. XXX locking? */
|
|
bioq_insert_tail(&sc->bio_queue, bp);
|
|
|
|
raidstart(raidPtr);
|
|
|
|
splx(s);
|
|
}
|
|
|
|
int
|
|
raidread(dev, uio, flags)
|
|
dev_t dev;
|
|
struct uio *uio;
|
|
int flags;
|
|
{
|
|
struct raid_softc *sc;
|
|
|
|
sc = dev->si_drv1;
|
|
|
|
return (physio(dev, uio, BIO_READ));
|
|
|
|
}
|
|
|
|
int
|
|
raidwrite(dev, uio, flags)
|
|
dev_t dev;
|
|
struct uio *uio;
|
|
int flags;
|
|
{
|
|
struct raid_softc *sc;
|
|
int ret;
|
|
|
|
sc = dev->si_drv1;
|
|
|
|
rf_printf(3, "raidwrite\n");
|
|
ret = physio(dev, uio, BIO_WRITE);
|
|
|
|
return (ret);
|
|
|
|
}
|
|
|
|
int
|
|
raidioctl(dev, cmd, data, flag, td)
|
|
dev_t dev;
|
|
u_long cmd;
|
|
caddr_t data;
|
|
int flag;
|
|
struct thread *td;
|
|
{
|
|
struct raid_softc *sc;
|
|
RF_Raid_t *raidPtr;
|
|
RF_RaidDisk_t *diskPtr;
|
|
RF_AccTotals_t *totals;
|
|
RF_DeviceConfig_t *d_cfg, **ucfgp;
|
|
struct rf_recon_req *rrcopy, *rr;
|
|
RF_ComponentLabel_t *clabel;
|
|
RF_ComponentLabel_t *ci_label;
|
|
RF_SingleComponent_t *sparePtr,*componentPtr;
|
|
RF_SingleComponent_t *hot_spare, *component;
|
|
RF_ProgressInfo_t progressInfo;
|
|
int retcode = 0;
|
|
int row, column;
|
|
int unit;
|
|
int i, j, d;
|
|
|
|
sc = dev->si_drv1;
|
|
raidPtr = sc->raidPtr;
|
|
|
|
rf_printf(2, "raidioctl: %s %ld\n", dev->si_name, cmd);
|
|
|
|
switch (cmd) {
|
|
|
|
case RAIDFRAME_GET_COMPONENT_LABEL:
|
|
/* need to read the component label for the disk indicated
|
|
by row,column in clabel */
|
|
|
|
/* For practice, let's get it directly fromdisk, rather
|
|
than from the in-core copy */
|
|
RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
|
|
(RF_ComponentLabel_t *));
|
|
if (clabel == NULL)
|
|
return (ENOMEM);
|
|
|
|
bzero((char *) clabel, sizeof(RF_ComponentLabel_t));
|
|
|
|
bcopy(data, clabel, sizeof(RF_ComponentLabel_t));
|
|
|
|
row = clabel->row;
|
|
column = clabel->column;
|
|
|
|
if ((row < 0) || (row >= raidPtr->numRow) ||
|
|
(column < 0) || (column >= raidPtr->numCol +
|
|
raidPtr->numSpare)) {
|
|
RF_Free( clabel, sizeof(RF_ComponentLabel_t));
|
|
return(EINVAL);
|
|
}
|
|
|
|
raidread_component_label(raidPtr->Disks[row][column].dev,
|
|
raidPtr->raid_cinfo[row][column].ci_vp,
|
|
clabel );
|
|
|
|
bcopy(clabel, data, sizeof(RF_ComponentLabel_t));
|
|
RF_Free( clabel, sizeof(RF_ComponentLabel_t));
|
|
return (retcode);
|
|
|
|
case RAIDFRAME_SET_COMPONENT_LABEL:
|
|
clabel = (RF_ComponentLabel_t *) data;
|
|
|
|
/* XXX check the label for valid stuff... */
|
|
/* Note that some things *should not* get modified --
|
|
the user should be re-initing the labels instead of
|
|
trying to patch things.
|
|
*/
|
|
|
|
rf_printf(1, "Got component label:\n");
|
|
rf_printf(1, "Version: %d\n",clabel->version);
|
|
rf_printf(1, "Serial Number: %d\n",clabel->serial_number);
|
|
rf_printf(1, "Mod counter: %d\n",clabel->mod_counter);
|
|
rf_printf(1, "Row: %d\n", clabel->row);
|
|
rf_printf(1, "Column: %d\n", clabel->column);
|
|
rf_printf(1, "Num Rows: %d\n", clabel->num_rows);
|
|
rf_printf(1, "Num Columns: %d\n", clabel->num_columns);
|
|
rf_printf(1, "Clean: %d\n", clabel->clean);
|
|
rf_printf(1, "Status: %d\n", clabel->status);
|
|
|
|
row = clabel->row;
|
|
column = clabel->column;
|
|
|
|
if ((row < 0) || (row >= raidPtr->numRow) ||
|
|
(column < 0) || (column >= raidPtr->numCol)) {
|
|
return(EINVAL);
|
|
}
|
|
|
|
/* XXX this isn't allowed to do anything for now :-) */
|
|
|
|
/* XXX and before it is, we need to fill in the rest
|
|
of the fields!?!?!?! */
|
|
#if 0
|
|
raidwrite_component_label(
|
|
raidPtr->Disks[row][column].dev,
|
|
raidPtr->raid_cinfo[row][column].ci_vp,
|
|
clabel );
|
|
#endif
|
|
return (0);
|
|
|
|
case RAIDFRAME_INIT_LABELS:
|
|
MALLOC(ci_label, RF_ComponentLabel_t *,
|
|
sizeof(RF_ComponentLabel_t), M_RAIDFRAME,
|
|
M_WAITOK | M_ZERO);
|
|
clabel = (RF_ComponentLabel_t *) data;
|
|
/*
|
|
we only want the serial number from
|
|
the above. We get all the rest of the information
|
|
from the config that was used to create this RAID
|
|
set.
|
|
*/
|
|
|
|
raidPtr->serial_number = clabel->serial_number;
|
|
|
|
raid_init_component_label(raidPtr, ci_label);
|
|
ci_label->serial_number = clabel->serial_number;
|
|
|
|
for(row=0;row<raidPtr->numRow;row++) {
|
|
ci_label->row = row;
|
|
for(column=0;column<raidPtr->numCol;column++) {
|
|
diskPtr = &raidPtr->Disks[row][column];
|
|
if (!RF_DEAD_DISK(diskPtr->status)) {
|
|
ci_label->partitionSize =
|
|
diskPtr->partitionSize;
|
|
ci_label->column = column;
|
|
raidwrite_component_label(
|
|
raidPtr->Disks[row][column].dev,
|
|
raidPtr->raid_cinfo[row][column].ci_vp,
|
|
ci_label );
|
|
}
|
|
}
|
|
}
|
|
|
|
FREE(ci_label, M_RAIDFRAME);
|
|
return (retcode);
|
|
case RAIDFRAME_SET_AUTOCONFIG:
|
|
d = rf_set_autoconfig(raidPtr, *(int *) data);
|
|
rf_printf(1, "New autoconfig value is: %d\n", d);
|
|
*(int *) data = d;
|
|
return (retcode);
|
|
|
|
case RAIDFRAME_SET_ROOT:
|
|
d = rf_set_rootpartition(raidPtr, *(int *) data);
|
|
rf_printf(1, "New rootpartition value is: %d\n", d);
|
|
*(int *) data = d;
|
|
return (retcode);
|
|
|
|
/* initialize all parity */
|
|
case RAIDFRAME_REWRITEPARITY:
|
|
|
|
if (raidPtr->Layout.map->faultsTolerated == 0) {
|
|
/* Parity for RAID 0 is trivially correct */
|
|
raidPtr->parity_good = RF_RAID_CLEAN;
|
|
return(0);
|
|
}
|
|
|
|
if (raidPtr->parity_rewrite_in_progress == 1) {
|
|
/* Re-write is already in progress! */
|
|
return(EINVAL);
|
|
}
|
|
|
|
retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
|
|
rf_RewriteParityThread,
|
|
raidPtr,"raid_parity");
|
|
return (retcode);
|
|
|
|
|
|
case RAIDFRAME_ADD_HOT_SPARE:
|
|
MALLOC(hot_spare, RF_SingleComponent_t *,
|
|
sizeof(RF_SingleComponent_t), M_RAIDFRAME,
|
|
M_WAITOK | M_ZERO);
|
|
sparePtr = (RF_SingleComponent_t *) data;
|
|
memcpy( hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
|
|
retcode = rf_add_hot_spare(raidPtr, hot_spare);
|
|
FREE(hot_spare, M_RAIDFRAME);
|
|
return(retcode);
|
|
|
|
case RAIDFRAME_REMOVE_HOT_SPARE:
|
|
return(retcode);
|
|
|
|
case RAIDFRAME_DELETE_COMPONENT:
|
|
MALLOC(component, RF_SingleComponent_t *,
|
|
sizeof(RF_SingleComponent_t), M_RAIDFRAME,
|
|
M_WAITOK | M_ZERO);
|
|
componentPtr = (RF_SingleComponent_t *)data;
|
|
memcpy( component, componentPtr,
|
|
sizeof(RF_SingleComponent_t));
|
|
retcode = rf_delete_component(raidPtr, component);
|
|
FREE(component, M_RAIDFRAME);
|
|
return(retcode);
|
|
|
|
case RAIDFRAME_INCORPORATE_HOT_SPARE:
|
|
MALLOC(component, RF_SingleComponent_t *,
|
|
sizeof(RF_SingleComponent_t), M_RAIDFRAME,
|
|
M_WAITOK | M_ZERO);
|
|
componentPtr = (RF_SingleComponent_t *)data;
|
|
memcpy( component, componentPtr,
|
|
sizeof(RF_SingleComponent_t));
|
|
retcode = rf_incorporate_hot_spare(raidPtr, component);
|
|
FREE(component, M_RAIDFRAME);
|
|
return(retcode);
|
|
|
|
case RAIDFRAME_REBUILD_IN_PLACE:
|
|
|
|
MALLOC(component, RF_SingleComponent_t *,
|
|
sizeof(RF_SingleComponent_t), M_RAIDFRAME,
|
|
M_WAITOK | M_ZERO);
|
|
if (raidPtr->Layout.map->faultsTolerated == 0) {
|
|
/* Can't do this on a RAID 0!! */
|
|
FREE(component, M_RAIDFRAME);
|
|
return(EINVAL);
|
|
}
|
|
|
|
if (raidPtr->recon_in_progress == 1) {
|
|
/* a reconstruct is already in progress! */
|
|
FREE(component, M_RAIDFRAME);
|
|
return(EINVAL);
|
|
}
|
|
|
|
componentPtr = (RF_SingleComponent_t *) data;
|
|
memcpy( component, componentPtr,
|
|
sizeof(RF_SingleComponent_t));
|
|
row = component->row;
|
|
column = component->column;
|
|
unit = raidPtr->raidid;
|
|
rf_printf(0, "raid%d Rebuild: %d %d\n", unit, row, column);
|
|
if ((row < 0) || (row >= raidPtr->numRow) ||
|
|
(column < 0) || (column >= raidPtr->numCol)) {
|
|
FREE(component, M_RAIDFRAME);
|
|
return(EINVAL);
|
|
}
|
|
|
|
RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
|
|
if (rrcopy == NULL) {
|
|
FREE(component, M_RAIDFRAME);
|
|
return(ENOMEM);
|
|
}
|
|
|
|
rrcopy->raidPtr = (void *) raidPtr;
|
|
rrcopy->row = row;
|
|
rrcopy->col = column;
|
|
|
|
retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
|
|
rf_ReconstructInPlaceThread,
|
|
rrcopy,"raid_reconip");
|
|
FREE(component, M_RAIDFRAME);
|
|
return(retcode);
|
|
|
|
case RAIDFRAME_GET_UNIT:
|
|
|
|
*(int *)data = raidPtr->raidid;
|
|
return (0);
|
|
|
|
case RAIDFRAME_GET_INFO:
|
|
if (!raidPtr->valid)
|
|
return (ENODEV);
|
|
ucfgp = (RF_DeviceConfig_t **) data;
|
|
RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
|
|
(RF_DeviceConfig_t *));
|
|
if (d_cfg == NULL)
|
|
return (ENOMEM);
|
|
bzero((char *) d_cfg, sizeof(RF_DeviceConfig_t));
|
|
d_cfg->rows = raidPtr->numRow;
|
|
d_cfg->cols = raidPtr->numCol;
|
|
d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
|
|
if (d_cfg->ndevs >= RF_MAX_DISKS) {
|
|
RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
|
|
return (ENOMEM);
|
|
}
|
|
d_cfg->nspares = raidPtr->numSpare;
|
|
if (d_cfg->nspares >= RF_MAX_DISKS) {
|
|
RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
|
|
return (ENOMEM);
|
|
}
|
|
d_cfg->maxqdepth = raidPtr->maxQueueDepth;
|
|
d = 0;
|
|
for (i = 0; i < d_cfg->rows; i++) {
|
|
for (j = 0; j < d_cfg->cols; j++) {
|
|
d_cfg->devs[d] = raidPtr->Disks[i][j];
|
|
d++;
|
|
}
|
|
}
|
|
for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
|
|
d_cfg->spares[i] = raidPtr->Disks[0][j];
|
|
}
|
|
|
|
retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
|
|
|
|
RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
|
|
|
|
return (retcode);
|
|
|
|
case RAIDFRAME_CHECK_PARITY:
|
|
*(int *) data = raidPtr->parity_good;
|
|
return (0);
|
|
|
|
case RAIDFRAME_RESET_ACCTOTALS:
|
|
bzero(&raidPtr->acc_totals, sizeof(raidPtr->acc_totals));
|
|
return (0);
|
|
|
|
case RAIDFRAME_GET_ACCTOTALS:
|
|
totals = (RF_AccTotals_t *) data;
|
|
*totals = raidPtr->acc_totals;
|
|
return (0);
|
|
|
|
case RAIDFRAME_KEEP_ACCTOTALS:
|
|
raidPtr->keep_acc_totals = *(int *)data;
|
|
return (0);
|
|
|
|
case RAIDFRAME_GET_SIZE:
|
|
*(int *) data = raidPtr->totalSectors;
|
|
return (0);
|
|
|
|
/* fail a disk & optionally start reconstruction */
|
|
case RAIDFRAME_FAIL_DISK:
|
|
|
|
if (raidPtr->Layout.map->faultsTolerated == 0) {
|
|
/* Can't do this on a RAID 0!! */
|
|
return(EINVAL);
|
|
}
|
|
|
|
rr = (struct rf_recon_req *) data;
|
|
|
|
if (rr->row < 0 || rr->row >= raidPtr->numRow
|
|
|| rr->col < 0 || rr->col >= raidPtr->numCol)
|
|
return (EINVAL);
|
|
|
|
rf_printf(0, "%s: Failing the disk: row: %d col: %d\n",
|
|
dev->si_name, rr->row, rr->col);
|
|
|
|
/* make a copy of the recon request so that we don't rely on
|
|
* the user's buffer */
|
|
RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
|
|
if (rrcopy == NULL)
|
|
return(ENOMEM);
|
|
bcopy(rr, rrcopy, sizeof(*rr));
|
|
rrcopy->raidPtr = (void *) raidPtr;
|
|
|
|
retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
|
|
rf_ReconThread,
|
|
rrcopy,"raid_recon");
|
|
return (0);
|
|
|
|
/* invoke a copyback operation after recon on whatever disk
|
|
* needs it, if any */
|
|
case RAIDFRAME_COPYBACK:
|
|
|
|
if (raidPtr->Layout.map->faultsTolerated == 0) {
|
|
/* This makes no sense on a RAID 0!! */
|
|
return(EINVAL);
|
|
}
|
|
|
|
if (raidPtr->copyback_in_progress == 1) {
|
|
/* Copyback is already in progress! */
|
|
return(EINVAL);
|
|
}
|
|
|
|
retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
|
|
rf_CopybackThread,
|
|
raidPtr,"raid_copyback");
|
|
return (retcode);
|
|
|
|
/* return the percentage completion of reconstruction */
|
|
case RAIDFRAME_CHECK_RECON_STATUS:
|
|
if (raidPtr->Layout.map->faultsTolerated == 0) {
|
|
/* This makes no sense on a RAID 0, so tell the
|
|
user it's done. */
|
|
*(int *) data = 100;
|
|
return(0);
|
|
}
|
|
row = 0; /* XXX we only consider a single row... */
|
|
if (raidPtr->status[row] != rf_rs_reconstructing)
|
|
*(int *) data = 100;
|
|
else
|
|
*(int *) data = raidPtr->reconControl[row]->percentComplete;
|
|
return (0);
|
|
case RAIDFRAME_CHECK_RECON_STATUS_EXT:
|
|
row = 0; /* XXX we only consider a single row... */
|
|
if (raidPtr->status[row] != rf_rs_reconstructing) {
|
|
progressInfo.remaining = 0;
|
|
progressInfo.completed = 100;
|
|
progressInfo.total = 100;
|
|
} else {
|
|
progressInfo.total =
|
|
raidPtr->reconControl[row]->numRUsTotal;
|
|
progressInfo.completed =
|
|
raidPtr->reconControl[row]->numRUsComplete;
|
|
progressInfo.remaining = progressInfo.total -
|
|
progressInfo.completed;
|
|
}
|
|
bcopy((caddr_t) &progressInfo, data, sizeof(RF_ProgressInfo_t));
|
|
return (retcode);
|
|
|
|
case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
|
|
if (raidPtr->Layout.map->faultsTolerated == 0) {
|
|
/* This makes no sense on a RAID 0, so tell the
|
|
user it's done. */
|
|
*(int *) data = 100;
|
|
return(0);
|
|
}
|
|
if (raidPtr->parity_rewrite_in_progress == 1) {
|
|
*(int *) data = 100 *
|
|
raidPtr->parity_rewrite_stripes_done /
|
|
raidPtr->Layout.numStripe;
|
|
} else {
|
|
*(int *) data = 100;
|
|
}
|
|
return (0);
|
|
|
|
case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
|
|
if (raidPtr->parity_rewrite_in_progress == 1) {
|
|
progressInfo.total = raidPtr->Layout.numStripe;
|
|
progressInfo.completed =
|
|
raidPtr->parity_rewrite_stripes_done;
|
|
progressInfo.remaining = progressInfo.total -
|
|
progressInfo.completed;
|
|
} else {
|
|
progressInfo.remaining = 0;
|
|
progressInfo.completed = 100;
|
|
progressInfo.total = 100;
|
|
}
|
|
bcopy((caddr_t) &progressInfo, data, sizeof(RF_ProgressInfo_t));
|
|
return (retcode);
|
|
|
|
case RAIDFRAME_CHECK_COPYBACK_STATUS:
|
|
if (raidPtr->Layout.map->faultsTolerated == 0) {
|
|
/* This makes no sense on a RAID 0 */
|
|
*(int *) data = 100;
|
|
return(0);
|
|
}
|
|
if (raidPtr->copyback_in_progress == 1) {
|
|
*(int *) data = 100 * raidPtr->copyback_stripes_done /
|
|
raidPtr->Layout.numStripe;
|
|
} else {
|
|
*(int *) data = 100;
|
|
}
|
|
return (0);
|
|
|
|
case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
|
|
if (raidPtr->copyback_in_progress == 1) {
|
|
progressInfo.total = raidPtr->Layout.numStripe;
|
|
progressInfo.completed =
|
|
raidPtr->copyback_stripes_done;
|
|
progressInfo.remaining = progressInfo.total -
|
|
progressInfo.completed;
|
|
} else {
|
|
progressInfo.remaining = 0;
|
|
progressInfo.completed = 100;
|
|
progressInfo.total = 100;
|
|
}
|
|
bcopy((caddr_t) &progressInfo, data, sizeof(RF_ProgressInfo_t));
|
|
return (retcode);
|
|
|
|
/* the sparetable daemon calls this to wait for the kernel to
|
|
* need a spare table. this ioctl does not return until a
|
|
* spare table is needed. XXX -- calling mpsleep here in the
|
|
* ioctl code is almost certainly wrong and evil. -- XXX XXX
|
|
* -- I should either compute the spare table in the kernel,
|
|
* or have a different -- XXX XXX -- interface (a different
|
|
* character device) for delivering the table -- XXX */
|
|
#if 0
|
|
case RAIDFRAME_SPARET_WAIT:
|
|
RF_LOCK_MUTEX(rf_sparet_wait_mutex);
|
|
while (!rf_sparet_wait_queue)
|
|
mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
|
|
waitreq = rf_sparet_wait_queue;
|
|
rf_sparet_wait_queue = rf_sparet_wait_queue->next;
|
|
RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
|
|
|
|
/* structure assignment */
|
|
*((RF_SparetWait_t *) data) = *waitreq;
|
|
|
|
RF_Free(waitreq, sizeof(*waitreq));
|
|
return (0);
|
|
|
|
/* wakes up a process waiting on SPARET_WAIT and puts an error
|
|
* code in it that will cause the dameon to exit */
|
|
case RAIDFRAME_ABORT_SPARET_WAIT:
|
|
RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
|
|
waitreq->fcol = -1;
|
|
RF_LOCK_MUTEX(rf_sparet_wait_mutex);
|
|
waitreq->next = rf_sparet_wait_queue;
|
|
rf_sparet_wait_queue = waitreq;
|
|
RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
|
|
wakeup(&rf_sparet_wait_queue);
|
|
return (0);
|
|
|
|
/* used by the spare table daemon to deliver a spare table
|
|
* into the kernel */
|
|
case RAIDFRAME_SEND_SPARET:
|
|
|
|
/* install the spare table */
|
|
retcode = rf_SetSpareTable(raidPtr, *(void **) data);
|
|
|
|
/* respond to the requestor. the return status of the spare
|
|
* table installation is passed in the "fcol" field */
|
|
RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
|
|
waitreq->fcol = retcode;
|
|
RF_LOCK_MUTEX(rf_sparet_wait_mutex);
|
|
waitreq->next = rf_sparet_resp_queue;
|
|
rf_sparet_resp_queue = waitreq;
|
|
wakeup(&rf_sparet_resp_queue);
|
|
RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
|
|
|
|
return (retcode);
|
|
#endif
|
|
|
|
default:
|
|
retcode = ENOIOCTL;
|
|
break; /* fall through to the os-specific code below */
|
|
|
|
}
|
|
|
|
return (retcode);
|
|
|
|
}
|
|
|
|
|
|
/* raidinit -- complete the rest of the initialization for the
|
|
RAIDframe device. */
|
|
|
|
|
|
static dev_t
|
|
raidinit(raidPtr)
|
|
RF_Raid_t *raidPtr;
|
|
{
|
|
struct raid_softc *sc;
|
|
dev_t diskdev;
|
|
|
|
RF_Malloc(sc, sizeof(struct raid_softc), (struct raid_softc *));
|
|
if (sc == NULL) {
|
|
rf_printf(1, "No memory for raid device\n");
|
|
return(NULL);
|
|
}
|
|
|
|
sc->raidPtr = raidPtr;
|
|
|
|
/* XXX Should check return code here */
|
|
bioq_init(&sc->bio_queue);
|
|
sc->sc_cbufpool = uma_zcreate("raidpl", sizeof(struct raidbuf), NULL,
|
|
NULL, NULL, NULL, 0, 0);
|
|
|
|
/* XXX There may be a weird interaction here between this, and
|
|
* protectedSectors, as used in RAIDframe. */
|
|
|
|
sc->sc_size = raidPtr->totalSectors;
|
|
|
|
/* Create the disk device */
|
|
diskdev = disk_create(raidPtr->raidid, &sc->sc_dkdev, 0, &raid_cdevsw,
|
|
&raiddisk_cdevsw);
|
|
if (diskdev == NODEV) {
|
|
rf_printf(1, "disk_create failed\n");
|
|
return (NULL);
|
|
}
|
|
sc->sc_dkdev.d_dev->si_drv1 = sc;
|
|
sc->sc_dev = diskdev;
|
|
raidPtr->sc = sc;
|
|
|
|
/* Register with devstat */
|
|
devstat_add_entry(&sc->device_stats, "raid", raidPtr->raidid, 0,
|
|
DEVSTAT_NO_BLOCKSIZE | DEVSTAT_NO_ORDERED_TAGS,
|
|
DEVSTAT_TYPE_IF_OTHER, DEVSTAT_PRIORITY_ARRAY);
|
|
|
|
return (diskdev);
|
|
}
|
|
|
|
/* wake up the daemon & tell it to get us a spare table
|
|
* XXX
|
|
* the entries in the queues should be tagged with the raidPtr
|
|
* so that in the extremely rare case that two recons happen at once,
|
|
* we know for which device were requesting a spare table
|
|
* XXX
|
|
*
|
|
* XXX This code is not currently used. GO
|
|
*/
|
|
int
|
|
rf_GetSpareTableFromDaemon(req)
|
|
RF_SparetWait_t *req;
|
|
{
|
|
int retcode;
|
|
|
|
RF_LOCK_MUTEX(rf_sparet_wait_mutex);
|
|
req->next = rf_sparet_wait_queue;
|
|
rf_sparet_wait_queue = req;
|
|
wakeup(&rf_sparet_wait_queue);
|
|
|
|
/* mpsleep unlocks the mutex */
|
|
while (!rf_sparet_resp_queue) {
|
|
tsleep(&rf_sparet_resp_queue, PRIBIO,
|
|
"raidframe getsparetable", 0);
|
|
}
|
|
req = rf_sparet_resp_queue;
|
|
rf_sparet_resp_queue = req->next;
|
|
RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
|
|
|
|
retcode = req->fcol;
|
|
RF_Free(req, sizeof(*req)); /* this is not the same req as we
|
|
* alloc'd */
|
|
return (retcode);
|
|
}
|
|
|
|
/* a wrapper around rf_DoAccess that extracts appropriate info from the
|
|
* bp & passes it down.
|
|
* any calls originating in the kernel must use non-blocking I/O
|
|
* do some extra sanity checking to return "appropriate" error values for
|
|
* certain conditions (to make some standard utilities work)
|
|
*
|
|
* Formerly known as: rf_DoAccessKernel
|
|
*/
|
|
void
|
|
raidstart(raidPtr)
|
|
RF_Raid_t *raidPtr;
|
|
{
|
|
RF_SectorCount_t num_blocks, pb, sum;
|
|
RF_RaidAddr_t raid_addr;
|
|
struct raid_softc *sc;
|
|
struct bio *bp;
|
|
daddr_t blocknum;
|
|
int unit, retcode, do_async;
|
|
|
|
unit = raidPtr->raidid;
|
|
sc = raidPtr->sc;
|
|
|
|
/* quick check to see if anything has died recently */
|
|
RF_LOCK_MUTEX(raidPtr->mutex);
|
|
if (raidPtr->numNewFailures > 0) {
|
|
raidPtr->numNewFailures--;
|
|
RF_UNLOCK_MUTEX(raidPtr->mutex);
|
|
rf_update_component_labels(raidPtr,
|
|
RF_NORMAL_COMPONENT_UPDATE);
|
|
} else
|
|
RF_UNLOCK_MUTEX(raidPtr->mutex);
|
|
|
|
/* Check to see if we're at the limit... */
|
|
RF_LOCK_MUTEX(raidPtr->mutex);
|
|
while (raidPtr->openings > 0) {
|
|
RF_UNLOCK_MUTEX(raidPtr->mutex);
|
|
|
|
/* get the next item, if any, from the queue */
|
|
if ((bp = bioq_first(&sc->bio_queue)) == NULL) {
|
|
/* nothing more to do */
|
|
return;
|
|
}
|
|
bioq_remove(&sc->bio_queue, bp);
|
|
|
|
/* Ok, for the bp we have here, bp->b_blkno is relative to the
|
|
* partition.. Need to make it absolute to the underlying
|
|
* device.. */
|
|
|
|
blocknum = bp->bio_blkno;
|
|
#if 0 /* XXX Is this needed? */
|
|
if (DISKPART(bp->bio_dev) != RAW_PART) {
|
|
struct partition *pp;
|
|
pp = &sc->sc_dkdev.d_label.d_partitions[DISKPART(
|
|
bp->bio_dev)];
|
|
blocknum += pp->p_offset;
|
|
}
|
|
#endif
|
|
|
|
rf_printf(3, "Blocks: %ld, %ld\n", (long)bp->bio_blkno, (long)blocknum);
|
|
|
|
rf_printf(3, "bp->bio_bcount = %d\n", (int) bp->bio_bcount);
|
|
rf_printf(3, "bp->bio_resid = %d\n", (int) bp->bio_resid);
|
|
|
|
/* *THIS* is where we adjust what block we're going to...
|
|
* but DO NOT TOUCH bp->bio_blkno!!! */
|
|
raid_addr = blocknum;
|
|
|
|
num_blocks = bp->bio_bcount >> raidPtr->logBytesPerSector;
|
|
pb = (bp->bio_bcount & raidPtr->sectorMask) ? 1 : 0;
|
|
sum = raid_addr + num_blocks + pb;
|
|
if (rf_debugKernelAccess) {
|
|
rf_printf(0, "raid_addr=0x%x sum=%d num_blocks=%d(+%d) "
|
|
"(%d)\n", (int)raid_addr, (int)sum,
|
|
(int)num_blocks, (int)pb,
|
|
(int)bp->bio_resid);
|
|
}
|
|
if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
|
|
|| (sum < num_blocks) || (sum < pb)) {
|
|
bp->bio_error = ENOSPC;
|
|
bp->bio_flags |= BIO_ERROR;
|
|
bp->bio_resid = bp->bio_bcount;
|
|
biodone(bp);
|
|
RF_LOCK_MUTEX(raidPtr->mutex);
|
|
continue;
|
|
}
|
|
/*
|
|
* XXX rf_DoAccess() should do this, not just DoAccessKernel()
|
|
*/
|
|
|
|
if (bp->bio_bcount & raidPtr->sectorMask) {
|
|
bp->bio_error = EINVAL;
|
|
bp->bio_flags |= BIO_ERROR;
|
|
bp->bio_resid = bp->bio_bcount;
|
|
biodone(bp);
|
|
RF_LOCK_MUTEX(raidPtr->mutex);
|
|
continue;
|
|
|
|
}
|
|
rf_printf(3, "Calling DoAccess..\n");
|
|
|
|
|
|
RF_LOCK_MUTEX(raidPtr->mutex);
|
|
raidPtr->openings--;
|
|
RF_UNLOCK_MUTEX(raidPtr->mutex);
|
|
|
|
/*
|
|
* Everything is async.
|
|
*/
|
|
do_async = 1;
|
|
|
|
devstat_start_transaction(&sc->device_stats);
|
|
|
|
/* XXX we're still at splbio() here... do we *really*
|
|
need to be? */
|
|
|
|
/* don't ever condition on bp->bio_cmd & BIO_WRITE.
|
|
* always condition on BIO_READ instead */
|
|
|
|
retcode = rf_DoAccess(raidPtr, (bp->bio_cmd & BIO_READ) ?
|
|
RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
|
|
do_async, raid_addr, num_blocks,
|
|
bp->bio_data, bp, NULL, NULL,
|
|
RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
|
|
|
|
|
|
RF_LOCK_MUTEX(raidPtr->mutex);
|
|
}
|
|
RF_UNLOCK_MUTEX(raidPtr->mutex);
|
|
}
|
|
|
|
|
|
|
|
|
|
/* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
|
|
|
|
int
|
|
rf_DispatchKernelIO(queue, req)
|
|
RF_DiskQueue_t *queue;
|
|
RF_DiskQueueData_t *req;
|
|
{
|
|
int op = (req->type == RF_IO_TYPE_READ) ? BIO_READ : BIO_WRITE;
|
|
struct bio *bp;
|
|
struct raidbuf *raidbp = NULL;
|
|
struct raid_softc *sc;
|
|
|
|
/* XXX along with the vnode, we also need the softc associated with
|
|
* this device.. */
|
|
|
|
req->queue = queue;
|
|
|
|
sc = queue->raidPtr->sc;
|
|
|
|
rf_printf(3, "DispatchKernelIO %s\n", sc->sc_dev->si_name);
|
|
|
|
bp = req->bp;
|
|
#if 1
|
|
/* XXX when there is a physical disk failure, someone is passing us a
|
|
* buffer that contains old stuff!! Attempt to deal with this problem
|
|
* without taking a performance hit... (not sure where the real bug
|
|
* is. It's buried in RAIDframe somewhere) :-( GO ) */
|
|
|
|
if (bp->bio_flags & BIO_ERROR) {
|
|
bp->bio_flags &= ~BIO_ERROR;
|
|
}
|
|
if (bp->bio_error != 0) {
|
|
bp->bio_error = 0;
|
|
}
|
|
#endif
|
|
raidbp = RAIDGETBUF(sc);
|
|
|
|
raidbp->rf_flags = 0; /* XXX not really used anywhere... */
|
|
|
|
/*
|
|
* context for raidiodone
|
|
*/
|
|
raidbp->rf_obp = bp;
|
|
raidbp->req = req;
|
|
|
|
#if 0 /* XXX */
|
|
LIST_INIT(&raidbp->rf_buf.b_dep);
|
|
#endif
|
|
|
|
switch (req->type) {
|
|
case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
|
|
/* XXX need to do something extra here.. */
|
|
/* I'm leaving this in, as I've never actually seen it used,
|
|
* and I'd like folks to report it... GO */
|
|
rf_printf(2, "WAKEUP CALLED\n");
|
|
queue->numOutstanding++;
|
|
|
|
/* XXX need to glue the original buffer into this? */
|
|
|
|
KernelWakeupFunc(&raidbp->rf_buf);
|
|
break;
|
|
|
|
case RF_IO_TYPE_READ:
|
|
case RF_IO_TYPE_WRITE:
|
|
|
|
if (req->tracerec) {
|
|
RF_ETIMER_START(req->tracerec->timer);
|
|
}
|
|
InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
|
|
op | bp->bio_cmd, queue->rf_cinfo->ci_dev,
|
|
req->sectorOffset, req->numSector,
|
|
req->buf, KernelWakeupFunc, (void *) req,
|
|
queue->raidPtr->logBytesPerSector, req->b_proc);
|
|
|
|
if (rf_debugKernelAccess) {
|
|
rf_printf(0, "dispatch: bp->bio_blkno = %ld\n",
|
|
(long) bp->bio_blkno);
|
|
}
|
|
queue->numOutstanding++;
|
|
queue->last_deq_sector = req->sectorOffset;
|
|
/* acc wouldn't have been let in if there were any pending
|
|
* reqs at any other priority */
|
|
queue->curPriority = req->priority;
|
|
|
|
rf_printf(3, "Going for %c to %s row %d col %d\n",
|
|
req->type, sc->sc_dev->si_name, queue->row, queue->col);
|
|
rf_printf(3, "sector %d count %d (%d bytes) %d\n",
|
|
(int) req->sectorOffset, (int) req->numSector,
|
|
(int) (req->numSector <<
|
|
queue->raidPtr->logBytesPerSector),
|
|
(int) queue->raidPtr->logBytesPerSector);
|
|
#if 0 /* XXX */
|
|
if ((raidbp->rf_buf.bio_cmd & BIO_READ) == 0) {
|
|
raidbp->rf_buf.b_vp->v_numoutput++;
|
|
}
|
|
#endif
|
|
BIO_STRATEGY(&raidbp->rf_buf);
|
|
|
|
break;
|
|
|
|
default:
|
|
panic("bad req->type in rf_DispatchKernelIO");
|
|
}
|
|
rf_printf(3, "Exiting from DispatchKernelIO\n");
|
|
/* splx(s); */ /* want to test this */
|
|
return (0);
|
|
}
|
|
/* This is the callback function associated with an I/O invoked from
|
|
kernel code.
|
|
*/
|
|
static void
|
|
KernelWakeupFunc(vbp)
|
|
struct bio *vbp;
|
|
{
|
|
RF_DiskQueueData_t *req = NULL;
|
|
RF_DiskQueue_t *queue;
|
|
struct raidbuf *raidbp = (struct raidbuf *) vbp;
|
|
struct bio *bp;
|
|
struct raid_softc *sc;
|
|
int s;
|
|
|
|
s = splbio();
|
|
rf_printf(2, "recovering the request queue:\n");
|
|
req = raidbp->req;
|
|
|
|
bp = raidbp->rf_obp;
|
|
queue = (RF_DiskQueue_t *) req->queue;
|
|
sc = queue->raidPtr->sc;
|
|
|
|
if (raidbp->rf_buf.bio_flags & BIO_ERROR) {
|
|
bp->bio_flags |= BIO_ERROR;
|
|
bp->bio_error = raidbp->rf_buf.bio_error ?
|
|
raidbp->rf_buf.bio_error : EIO;
|
|
}
|
|
|
|
/* XXX methinks this could be wrong... */
|
|
#if 1
|
|
bp->bio_resid = raidbp->rf_buf.bio_resid;
|
|
#endif
|
|
|
|
if (req->tracerec) {
|
|
RF_ETIMER_STOP(req->tracerec->timer);
|
|
RF_ETIMER_EVAL(req->tracerec->timer);
|
|
RF_LOCK_MUTEX(rf_tracing_mutex);
|
|
req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
|
|
req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
|
|
req->tracerec->num_phys_ios++;
|
|
RF_UNLOCK_MUTEX(rf_tracing_mutex);
|
|
}
|
|
bp->bio_bcount = raidbp->rf_buf.bio_bcount; /* XXXX ? */
|
|
|
|
/* XXX Ok, let's get aggressive... If BIO_ERROR is set, let's go
|
|
* ballistic, and mark the component as hosed... */
|
|
|
|
if (bp->bio_flags & BIO_ERROR) {
|
|
/* Mark the disk as dead */
|
|
/* but only mark it once... */
|
|
if (queue->raidPtr->Disks[queue->row][queue->col].status ==
|
|
rf_ds_optimal) {
|
|
rf_printf(0, "%s: IO Error. Marking %s as "
|
|
"failed.\n", sc->sc_dev->si_name, queue->raidPtr->
|
|
Disks[queue->row][queue->col].devname);
|
|
queue->raidPtr->Disks[queue->row][queue->col].status =
|
|
rf_ds_failed;
|
|
queue->raidPtr->status[queue->row] = rf_rs_degraded;
|
|
queue->raidPtr->numFailures++;
|
|
queue->raidPtr->numNewFailures++;
|
|
} else { /* Disk is already dead... */
|
|
/* printf("Disk already marked as dead!\n"); */
|
|
}
|
|
|
|
}
|
|
|
|
RAIDPUTBUF(sc, raidbp);
|
|
|
|
rf_DiskIOComplete(queue, req, (bp->bio_flags & BIO_ERROR) ? 1 : 0);
|
|
(req->CompleteFunc)(req->argument, (bp->bio_flags & BIO_ERROR) ? 1 : 0);
|
|
|
|
splx(s);
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
* initialize a buf structure for doing an I/O in the kernel.
|
|
*/
|
|
static void
|
|
InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
|
|
logBytesPerSector, b_proc)
|
|
struct bio *bp;
|
|
struct vnode *b_vp;
|
|
unsigned rw_flag;
|
|
dev_t dev;
|
|
RF_SectorNum_t startSect;
|
|
RF_SectorCount_t numSect;
|
|
caddr_t buf;
|
|
void (*cbFunc) (struct bio *);
|
|
void *cbArg;
|
|
int logBytesPerSector;
|
|
struct proc *b_proc;
|
|
{
|
|
/* bp->b_flags = B_PHYS | rw_flag; */
|
|
bp->bio_cmd = rw_flag; /* XXX need B_PHYS here too? */
|
|
bp->bio_bcount = numSect << logBytesPerSector;
|
|
#if 0 /* XXX */
|
|
bp->bio_bufsize = bp->bio_bcount;
|
|
#endif
|
|
bp->bio_error = 0;
|
|
bp->bio_dev = dev;
|
|
bp->bio_data = buf;
|
|
bp->bio_blkno = startSect;
|
|
bp->bio_resid = bp->bio_bcount; /* XXX is this right!?!?!! */
|
|
if (bp->bio_bcount == 0) {
|
|
panic("bp->bio_bcount is zero in InitBP!!\n");
|
|
}
|
|
/*
|
|
bp->b_proc = b_proc;
|
|
bp->b_vp = b_vp;
|
|
*/
|
|
bp->bio_done = cbFunc;
|
|
|
|
}
|
|
|
|
static void
|
|
raidgetdefaultlabel(raidPtr, sc, dp)
|
|
RF_Raid_t *raidPtr;
|
|
struct raid_softc *sc;
|
|
struct disk *dp;
|
|
{
|
|
rf_printf(1, "Building a default label...\n");
|
|
if (dp == NULL)
|
|
panic("raidgetdefaultlabel(): dp is NULL\n");
|
|
|
|
/* fabricate a label... */
|
|
dp->d_mediasize = raidPtr->totalSectors * raidPtr->bytesPerSector;
|
|
dp->d_sectorsize = raidPtr->bytesPerSector;
|
|
dp->d_fwsectors = raidPtr->Layout.dataSectorsPerStripe;
|
|
dp->d_fwheads = 4 * raidPtr->numCol;
|
|
|
|
}
|
|
/*
|
|
* Lookup the provided name in the filesystem. If the file exists,
|
|
* is a valid block device, and isn't being used by anyone else,
|
|
* set *vpp to the file's vnode.
|
|
* You'll find the original of this in ccd.c
|
|
*/
|
|
int
|
|
raidlookup(path, td, vpp)
|
|
char *path;
|
|
struct thread *td;
|
|
struct vnode **vpp; /* result */
|
|
{
|
|
struct nameidata *nd;
|
|
struct vnode *vp;
|
|
struct vattr *va;
|
|
struct proc *p;
|
|
int error = 0, flags;
|
|
|
|
MALLOC(nd, struct nameidata *, sizeof(struct nameidata), M_TEMP, M_NOWAIT | M_ZERO);
|
|
MALLOC(va, struct vattr *, sizeof(struct vattr), M_TEMP, M_NOWAIT | M_ZERO);
|
|
if ((nd == NULL) || (va == NULL)) {
|
|
printf("Out of memory?\n");
|
|
return (ENOMEM);
|
|
}
|
|
|
|
/* Sanity check the p_fd fields. This is really just a hack */
|
|
p = td->td_proc;
|
|
if (!p->p_fd->fd_rdir || !p->p_fd->fd_cdir)
|
|
printf("Warning: p_fd fields not set\n");
|
|
|
|
if (!td->td_proc->p_fd->fd_rdir)
|
|
p->p_fd->fd_rdir = rootvnode;
|
|
|
|
if (!p->p_fd->fd_cdir)
|
|
p->p_fd->fd_cdir = rootvnode;
|
|
|
|
NDINIT(nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, curthread);
|
|
flags = FREAD | FWRITE;
|
|
if ((error = vn_open(nd, &flags, 0)) != 0) {
|
|
rf_printf(2, "RAIDframe: vn_open returned %d\n", error);
|
|
goto end1;
|
|
}
|
|
vp = nd->ni_vp;
|
|
if (vp->v_usecount > 1) {
|
|
rf_printf(1, "raidlookup() vp->v_usecount= %d\n", vp->v_usecount);
|
|
error = EBUSY;
|
|
goto end;
|
|
}
|
|
if ((error = VOP_GETATTR(vp, va, td->td_ucred, td)) != 0) {
|
|
rf_printf(1, "raidlookup() VOP_GETATTR returned %d", error);
|
|
goto end;
|
|
}
|
|
/* XXX: eventually we should handle VREG, too. */
|
|
if (va->va_type != VCHR) {
|
|
rf_printf(1, "Returning ENOTBLK\n");
|
|
error = ENOTBLK;
|
|
}
|
|
*vpp = vp;
|
|
|
|
end:
|
|
VOP_UNLOCK(vp, 0, td);
|
|
NDFREE(nd, NDF_ONLY_PNBUF);
|
|
end1:
|
|
FREE(nd, M_TEMP);
|
|
FREE(va, M_TEMP);
|
|
return (error);
|
|
}
|
|
/*
|
|
* Wait interruptibly for an exclusive lock.
|
|
*
|
|
* XXX
|
|
* Several drivers do this; it should be abstracted and made MP-safe.
|
|
* (Hmm... where have we seen this warning before :-> GO )
|
|
*/
|
|
static int
|
|
raidlock(sc)
|
|
struct raid_softc *sc;
|
|
{
|
|
int error;
|
|
|
|
while ((sc->sc_flags & RAIDF_LOCKED) != 0) {
|
|
sc->sc_flags |= RAIDF_WANTED;
|
|
if ((error =
|
|
tsleep(sc, PRIBIO | PCATCH, "raidlck", 0)) != 0)
|
|
return (error);
|
|
}
|
|
sc->sc_flags |= RAIDF_LOCKED;
|
|
return (0);
|
|
}
|
|
/*
|
|
* Unlock and wake up any waiters.
|
|
*/
|
|
static void
|
|
raidunlock(sc)
|
|
struct raid_softc *sc;
|
|
{
|
|
|
|
sc->sc_flags &= ~RAIDF_LOCKED;
|
|
if ((sc->sc_flags & RAIDF_WANTED) != 0) {
|
|
sc->sc_flags &= ~RAIDF_WANTED;
|
|
wakeup(sc);
|
|
}
|
|
}
|
|
|
|
|
|
#define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
|
|
#define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
|
|
|
|
int
|
|
raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
|
|
{
|
|
RF_ComponentLabel_t *clabel;
|
|
|
|
MALLOC(clabel, RF_ComponentLabel_t *, sizeof(RF_ComponentLabel_t),
|
|
M_RAIDFRAME, M_NOWAIT | M_ZERO);
|
|
if (clabel == NULL) {
|
|
printf("raidmarkclean: Out of memory?\n");
|
|
return (ENOMEM);
|
|
}
|
|
|
|
raidread_component_label(dev, b_vp, clabel);
|
|
clabel->mod_counter = mod_counter;
|
|
clabel->clean = RF_RAID_CLEAN;
|
|
raidwrite_component_label(dev, b_vp, clabel);
|
|
FREE(clabel, M_RAIDFRAME);
|
|
return(0);
|
|
}
|
|
|
|
|
|
int
|
|
raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
|
|
{
|
|
RF_ComponentLabel_t *clabel;
|
|
|
|
MALLOC(clabel, RF_ComponentLabel_t *, sizeof(RF_ComponentLabel_t),
|
|
M_RAIDFRAME, M_NOWAIT | M_ZERO);
|
|
if (clabel == NULL) {
|
|
printf("raidmarkclean: Out of memory?\n");
|
|
return (ENOMEM);
|
|
}
|
|
|
|
raidread_component_label(dev, b_vp, clabel);
|
|
clabel->mod_counter = mod_counter;
|
|
clabel->clean = RF_RAID_DIRTY;
|
|
raidwrite_component_label(dev, b_vp, clabel);
|
|
FREE(clabel, M_RAIDFRAME);
|
|
return(0);
|
|
}
|
|
|
|
/* ARGSUSED */
|
|
int
|
|
raidread_component_label(dev, b_vp, clabel)
|
|
dev_t dev;
|
|
struct vnode *b_vp;
|
|
RF_ComponentLabel_t *clabel;
|
|
{
|
|
struct buf *bp;
|
|
int error;
|
|
|
|
/* XXX should probably ensure that we don't try to do this if
|
|
someone has changed rf_protected_sectors. */
|
|
|
|
if (b_vp == NULL) {
|
|
/* For whatever reason, this component is not valid.
|
|
Don't try to read a component label from it. */
|
|
return(EINVAL);
|
|
}
|
|
|
|
/* get a block of the appropriate size... */
|
|
bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
|
|
bp->b_dev = dev;
|
|
|
|
/* get our ducks in a row for the read */
|
|
bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
|
|
bp->b_bcount = RF_COMPONENT_INFO_SIZE;
|
|
bp->b_iocmd = BIO_READ;
|
|
bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
|
|
|
|
DEV_STRATEGY(bp);
|
|
error = bufwait(bp);
|
|
|
|
if (!error) {
|
|
memcpy(clabel, bp->b_data, sizeof(RF_ComponentLabel_t));
|
|
#if 0
|
|
rf_print_component_label( clabel );
|
|
#endif
|
|
} else {
|
|
#if 0
|
|
rf_printf(0, "Failed to read RAID component label!\n");
|
|
#endif
|
|
}
|
|
|
|
bp->b_flags |= B_INVAL | B_AGE;
|
|
brelse(bp);
|
|
return(error);
|
|
}
|
|
/* ARGSUSED */
|
|
int
|
|
raidwrite_component_label(dev, b_vp, clabel)
|
|
dev_t dev;
|
|
struct vnode *b_vp;
|
|
RF_ComponentLabel_t *clabel;
|
|
{
|
|
struct buf *bp;
|
|
int error;
|
|
|
|
/* get a block of the appropriate size... */
|
|
bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
|
|
bp->b_dev = dev;
|
|
|
|
/* get our ducks in a row for the write */
|
|
bp->b_flags = 0;
|
|
bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
|
|
bp->b_bcount = RF_COMPONENT_INFO_SIZE;
|
|
bp->b_iocmd = BIO_WRITE;
|
|
bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
|
|
|
|
memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
|
|
|
|
memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
|
|
|
|
DEV_STRATEGY(bp);
|
|
error = bufwait(bp);
|
|
|
|
bp->b_flags |= B_INVAL | B_AGE;
|
|
brelse(bp);
|
|
if (error) {
|
|
#if 1
|
|
rf_printf(0, "Failed to write RAID component info!\n");
|
|
rf_printf(0, "b_error= %d\n", bp->b_error);
|
|
#endif
|
|
}
|
|
|
|
return(error);
|
|
}
|
|
|
|
void
|
|
rf_markalldirty(raidPtr)
|
|
RF_Raid_t *raidPtr;
|
|
{
|
|
RF_ComponentLabel_t *clabel;
|
|
int r,c;
|
|
|
|
MALLOC(clabel, RF_ComponentLabel_t *, sizeof(RF_ComponentLabel_t),
|
|
M_RAIDFRAME, M_NOWAIT | M_ZERO);
|
|
|
|
if (clabel == NULL) {
|
|
printf("rf_markalldirty: Out of memory?\n");
|
|
return;
|
|
}
|
|
|
|
raidPtr->mod_counter++;
|
|
for (r = 0; r < raidPtr->numRow; r++) {
|
|
for (c = 0; c < raidPtr->numCol; c++) {
|
|
/* we don't want to touch (at all) a disk that has
|
|
failed */
|
|
if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) {
|
|
raidread_component_label(
|
|
raidPtr->Disks[r][c].dev,
|
|
raidPtr->raid_cinfo[r][c].ci_vp,
|
|
clabel);
|
|
if (clabel->status == rf_ds_spared) {
|
|
/* XXX do something special...
|
|
but whatever you do, don't
|
|
try to access it!! */
|
|
} else {
|
|
#if 0
|
|
clabel->status =
|
|
raidPtr->Disks[r][c].status;
|
|
raidwrite_component_label(
|
|
raidPtr->Disks[r][c].dev,
|
|
raidPtr->raid_cinfo[r][c].ci_vp,
|
|
clabel);
|
|
#endif
|
|
raidmarkdirty(
|
|
raidPtr->Disks[r][c].dev,
|
|
raidPtr->raid_cinfo[r][c].ci_vp,
|
|
raidPtr->mod_counter);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
/* printf("Component labels marked dirty.\n"); */
|
|
#if 0
|
|
for( c = 0; c < raidPtr->numSpare ; c++) {
|
|
sparecol = raidPtr->numCol + c;
|
|
if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
|
|
/*
|
|
|
|
XXX this is where we get fancy and map this spare
|
|
into it's correct spot in the array.
|
|
|
|
*/
|
|
/*
|
|
|
|
we claim this disk is "optimal" if it's
|
|
rf_ds_used_spare, as that means it should be
|
|
directly substitutable for the disk it replaced.
|
|
We note that too...
|
|
|
|
*/
|
|
|
|
for(i=0;i<raidPtr->numRow;i++) {
|
|
for(j=0;j<raidPtr->numCol;j++) {
|
|
if ((raidPtr->Disks[i][j].spareRow ==
|
|
r) &&
|
|
(raidPtr->Disks[i][j].spareCol ==
|
|
sparecol)) {
|
|
srow = r;
|
|
scol = sparecol;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
raidread_component_label(
|
|
raidPtr->Disks[r][sparecol].dev,
|
|
raidPtr->raid_cinfo[r][sparecol].ci_vp,
|
|
&clabel);
|
|
/* make sure status is noted */
|
|
clabel.version = RF_COMPONENT_LABEL_VERSION;
|
|
clabel.mod_counter = raidPtr->mod_counter;
|
|
clabel.serial_number = raidPtr->serial_number;
|
|
clabel.row = srow;
|
|
clabel.column = scol;
|
|
clabel.num_rows = raidPtr->numRow;
|
|
clabel.num_columns = raidPtr->numCol;
|
|
clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
|
|
clabel.status = rf_ds_optimal;
|
|
raidwrite_component_label(
|
|
raidPtr->Disks[r][sparecol].dev,
|
|
raidPtr->raid_cinfo[r][sparecol].ci_vp,
|
|
&clabel);
|
|
raidmarkclean( raidPtr->Disks[r][sparecol].dev,
|
|
raidPtr->raid_cinfo[r][sparecol].ci_vp);
|
|
}
|
|
}
|
|
|
|
#endif
|
|
FREE(clabel, M_RAIDFRAME);
|
|
}
|
|
|
|
|
|
void
|
|
rf_update_component_labels(raidPtr, final)
|
|
RF_Raid_t *raidPtr;
|
|
int final;
|
|
{
|
|
RF_ComponentLabel_t *clabel;
|
|
int sparecol;
|
|
int r,c;
|
|
int i,j;
|
|
int srow, scol;
|
|
|
|
srow = -1;
|
|
scol = -1;
|
|
|
|
MALLOC(clabel, RF_ComponentLabel_t *, sizeof(RF_ComponentLabel_t),
|
|
M_RAIDFRAME, M_NOWAIT | M_ZERO);
|
|
if (clabel == NULL) {
|
|
printf("rf_update_component_labels: Out of memory?\n");
|
|
return;
|
|
}
|
|
|
|
/* XXX should do extra checks to make sure things really are clean,
|
|
rather than blindly setting the clean bit... */
|
|
|
|
raidPtr->mod_counter++;
|
|
|
|
for (r = 0; r < raidPtr->numRow; r++) {
|
|
for (c = 0; c < raidPtr->numCol; c++) {
|
|
if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
|
|
raidread_component_label(
|
|
raidPtr->Disks[r][c].dev,
|
|
raidPtr->raid_cinfo[r][c].ci_vp,
|
|
clabel);
|
|
/* make sure status is noted */
|
|
clabel->status = rf_ds_optimal;
|
|
/* bump the counter */
|
|
clabel->mod_counter = raidPtr->mod_counter;
|
|
|
|
raidwrite_component_label(
|
|
raidPtr->Disks[r][c].dev,
|
|
raidPtr->raid_cinfo[r][c].ci_vp,
|
|
clabel);
|
|
if (final == RF_FINAL_COMPONENT_UPDATE) {
|
|
if (raidPtr->parity_good == RF_RAID_CLEAN) {
|
|
raidmarkclean(
|
|
raidPtr->Disks[r][c].dev,
|
|
raidPtr->raid_cinfo[r][c].ci_vp,
|
|
raidPtr->mod_counter);
|
|
}
|
|
}
|
|
}
|
|
/* else we don't touch it.. */
|
|
}
|
|
}
|
|
|
|
for( c = 0; c < raidPtr->numSpare ; c++) {
|
|
sparecol = raidPtr->numCol + c;
|
|
if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
|
|
/*
|
|
|
|
we claim this disk is "optimal" if it's
|
|
rf_ds_used_spare, as that means it should be
|
|
directly substitutable for the disk it replaced.
|
|
We note that too...
|
|
|
|
*/
|
|
|
|
for(i=0;i<raidPtr->numRow;i++) {
|
|
for(j=0;j<raidPtr->numCol;j++) {
|
|
if ((raidPtr->Disks[i][j].spareRow ==
|
|
0) &&
|
|
(raidPtr->Disks[i][j].spareCol ==
|
|
sparecol)) {
|
|
srow = i;
|
|
scol = j;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* XXX shouldn't *really* need this... */
|
|
raidread_component_label(
|
|
raidPtr->Disks[0][sparecol].dev,
|
|
raidPtr->raid_cinfo[0][sparecol].ci_vp,
|
|
clabel);
|
|
/* make sure status is noted */
|
|
|
|
raid_init_component_label(raidPtr, clabel);
|
|
|
|
clabel->mod_counter = raidPtr->mod_counter;
|
|
clabel->row = srow;
|
|
clabel->column = scol;
|
|
clabel->status = rf_ds_optimal;
|
|
|
|
raidwrite_component_label(
|
|
raidPtr->Disks[0][sparecol].dev,
|
|
raidPtr->raid_cinfo[0][sparecol].ci_vp,
|
|
clabel);
|
|
if (final == RF_FINAL_COMPONENT_UPDATE) {
|
|
if (raidPtr->parity_good == RF_RAID_CLEAN) {
|
|
raidmarkclean( raidPtr->Disks[0][sparecol].dev,
|
|
raidPtr->raid_cinfo[0][sparecol].ci_vp,
|
|
raidPtr->mod_counter);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
FREE(clabel, M_RAIDFRAME);
|
|
rf_printf(1, "Component labels updated\n");
|
|
}
|
|
|
|
void
|
|
rf_close_component(raidPtr, vp, auto_configured)
|
|
RF_Raid_t *raidPtr;
|
|
struct vnode *vp;
|
|
int auto_configured;
|
|
{
|
|
struct thread *td;
|
|
|
|
td = raidPtr->engine_thread;
|
|
|
|
if (vp != NULL) {
|
|
if (auto_configured == 1) {
|
|
VOP_CLOSE(vp, FREAD | FWRITE, td->td_ucred, td);
|
|
|
|
vrele(vp);
|
|
} else {
|
|
vn_close(vp, FREAD | FWRITE, td->td_ucred, td);
|
|
}
|
|
} else {
|
|
rf_printf(1, "vnode was NULL\n");
|
|
}
|
|
}
|
|
|
|
|
|
void
|
|
rf_UnconfigureVnodes(raidPtr)
|
|
RF_Raid_t *raidPtr;
|
|
{
|
|
int r,c;
|
|
struct thread *td;
|
|
struct vnode *vp;
|
|
int acd;
|
|
|
|
|
|
/* We take this opportunity to close the vnodes like we should.. */
|
|
|
|
td = raidPtr->engine_thread;
|
|
|
|
for (r = 0; r < raidPtr->numRow; r++) {
|
|
for (c = 0; c < raidPtr->numCol; c++) {
|
|
rf_printf(1, "Closing vnode for row: %d col: %d\n", r, c);
|
|
vp = raidPtr->raid_cinfo[r][c].ci_vp;
|
|
acd = raidPtr->Disks[r][c].auto_configured;
|
|
rf_close_component(raidPtr, vp, acd);
|
|
raidPtr->raid_cinfo[r][c].ci_vp = NULL;
|
|
raidPtr->Disks[r][c].auto_configured = 0;
|
|
}
|
|
}
|
|
for (r = 0; r < raidPtr->numSpare; r++) {
|
|
rf_printf(1, "Closing vnode for spare: %d\n", r);
|
|
vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
|
|
acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
|
|
rf_close_component(raidPtr, vp, acd);
|
|
raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
|
|
raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
|
|
}
|
|
}
|
|
|
|
|
|
void
|
|
rf_ReconThread(req)
|
|
struct rf_recon_req *req;
|
|
{
|
|
RF_Raid_t *raidPtr;
|
|
|
|
mtx_lock(&Giant);
|
|
raidPtr = (RF_Raid_t *) req->raidPtr;
|
|
raidPtr->recon_in_progress = 1;
|
|
|
|
rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
|
|
((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
|
|
|
|
/* XXX get rid of this! we don't need it at all.. */
|
|
RF_Free(req, sizeof(*req));
|
|
|
|
raidPtr->recon_in_progress = 0;
|
|
|
|
/* That's all... */
|
|
RF_THREAD_EXIT(0); /* does not return */
|
|
}
|
|
|
|
void
|
|
rf_RewriteParityThread(raidPtr)
|
|
RF_Raid_t *raidPtr;
|
|
{
|
|
int retcode;
|
|
|
|
mtx_lock(&Giant);
|
|
raidPtr->parity_rewrite_in_progress = 1;
|
|
retcode = rf_RewriteParity(raidPtr);
|
|
if (retcode) {
|
|
rf_printf(0, "raid%d: Error re-writing parity!\n",raidPtr->raidid);
|
|
} else {
|
|
/* set the clean bit! If we shutdown correctly,
|
|
the clean bit on each component label will get
|
|
set */
|
|
raidPtr->parity_good = RF_RAID_CLEAN;
|
|
}
|
|
raidPtr->parity_rewrite_in_progress = 0;
|
|
|
|
/* Anyone waiting for us to stop? If so, inform them... */
|
|
if (raidPtr->waitShutdown) {
|
|
wakeup(&raidPtr->parity_rewrite_in_progress);
|
|
}
|
|
|
|
/* That's all... */
|
|
RF_THREAD_EXIT(0); /* does not return */
|
|
}
|
|
|
|
|
|
void
|
|
rf_CopybackThread(raidPtr)
|
|
RF_Raid_t *raidPtr;
|
|
{
|
|
mtx_lock(&Giant);
|
|
raidPtr->copyback_in_progress = 1;
|
|
rf_CopybackReconstructedData(raidPtr);
|
|
raidPtr->copyback_in_progress = 0;
|
|
|
|
/* That's all... */
|
|
RF_THREAD_EXIT(0); /* does not return */
|
|
}
|
|
|
|
|
|
void
|
|
rf_ReconstructInPlaceThread(req)
|
|
struct rf_recon_req *req;
|
|
{
|
|
int retcode;
|
|
RF_Raid_t *raidPtr;
|
|
|
|
mtx_lock(&Giant);
|
|
raidPtr = req->raidPtr;
|
|
raidPtr->recon_in_progress = 1;
|
|
retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
|
|
RF_Free(req, sizeof(*req));
|
|
raidPtr->recon_in_progress = 0;
|
|
|
|
/* That's all... */
|
|
RF_THREAD_EXIT(0); /* does not return */
|
|
}
|
|
|
|
RF_AutoConfig_t *
|
|
rf_find_raid_components()
|
|
{
|
|
RF_AutoConfig_t *ac_list = NULL;
|
|
#if 0 /* XXX GEOM */
|
|
struct vnode *vp;
|
|
struct disklabel *label;
|
|
struct diskslice *slice;
|
|
struct diskslices *slices;
|
|
struct disk *disk;
|
|
struct thread *td;
|
|
dev_t dev;
|
|
char *devname;
|
|
int error, j;
|
|
int nslices;
|
|
|
|
td = curthread;
|
|
|
|
MALLOC(label, struct disklabel *, sizeof(struct disklabel),
|
|
M_RAIDFRAME, M_NOWAIT|M_ZERO);
|
|
MALLOC(slices, struct diskslices *, sizeof(struct diskslices),
|
|
M_RAIDFRAME, M_NOWAIT|M_ZERO);
|
|
if ((label == NULL) || (slices == NULL)) {
|
|
printf("rf_find_raid_components: Out of Memory?\n");
|
|
return (NULL);
|
|
}
|
|
|
|
/* initialize the AutoConfig list */
|
|
ac_list = NULL;
|
|
|
|
/* we begin by trolling through *all* the disk devices on the system */
|
|
|
|
disk = NULL;
|
|
while ((disk = disk_enumerate(disk))) {
|
|
|
|
/* we don't care about floppies... */
|
|
devname = disk->d_dev->si_name;
|
|
if (!strncmp(devname, "fd", 2) ||
|
|
!strncmp(devname, "cd", 2) ||
|
|
!strncmp(devname, "acd", 3))
|
|
continue;
|
|
|
|
rf_printf(1, "Examining %s\n", disk->d_dev->si_name);
|
|
if (bdevvp(disk->d_dev, &vp))
|
|
panic("RAIDframe can't alloc vnode");
|
|
vref(vp);
|
|
|
|
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
|
|
error = VOP_OPEN(vp, FREAD, td->td_ucred, td);
|
|
VOP_UNLOCK(vp, 0, td);
|
|
if (error) {
|
|
vput(vp);
|
|
continue;
|
|
}
|
|
|
|
error = VOP_IOCTL(vp, DIOCGSLICEINFO, (caddr_t)slices,
|
|
FREAD, td->td_ucred, td);
|
|
VOP_CLOSE(vp, FREAD | FWRITE, td->td_ucred, td);
|
|
vrele(vp);
|
|
if (error) {
|
|
/* No slice table. */
|
|
continue;
|
|
}
|
|
|
|
nslices = slices->dss_nslices;
|
|
if ((nslices == 0) || (nslices > MAX_SLICES))
|
|
continue;
|
|
|
|
/* Iterate through the slices */
|
|
for (j = 1; j < nslices; j++) {
|
|
|
|
rf_printf(1, "Examining slice %d\n", j);
|
|
slice = &slices->dss_slices[j - 1];
|
|
dev = dkmodslice(disk->d_dev, j);
|
|
if (bdevvp(dev, &vp))
|
|
panic("RAIDframe can't alloc vnode");
|
|
|
|
vref(vp);
|
|
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
|
|
error = VOP_OPEN(vp, FREAD, td->td_ucred, td);
|
|
VOP_UNLOCK(vp, 0, td);
|
|
if (error) {
|
|
continue;
|
|
}
|
|
|
|
error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)label,
|
|
FREAD, td->td_ucred, td);
|
|
VOP_CLOSE(vp, FREAD | FWRITE, td->td_ucred, td);
|
|
vrele(vp);
|
|
if (error)
|
|
continue;
|
|
|
|
rf_search_label(dev, label, &ac_list);
|
|
}
|
|
}
|
|
|
|
FREE(label, M_RAIDFRAME);
|
|
FREE(slices, M_RAIDFRAME);
|
|
#endif
|
|
return (ac_list);
|
|
}
|
|
|
|
static void
|
|
rf_search_label(dev_t dev, struct disklabel *label, RF_AutoConfig_t **ac_list)
|
|
{
|
|
RF_AutoConfig_t *ac;
|
|
RF_ComponentLabel_t *clabel;
|
|
struct vnode *vp;
|
|
struct thread *td;
|
|
dev_t dev1;
|
|
int i, error, good_one;
|
|
|
|
td = curthread;
|
|
|
|
/* Iterate through the partitions */
|
|
for (i=0; i < label->d_npartitions; i++) {
|
|
/* We only support partitions marked as RAID */
|
|
if (label->d_partitions[i].p_fstype != FS_RAID)
|
|
continue;
|
|
|
|
dev1 = dkmodpart(dev, i);
|
|
if (dev1 == NULL) {
|
|
rf_printf(1, "dev1 == null\n");
|
|
continue;
|
|
}
|
|
if (bdevvp(dev1, &vp))
|
|
panic("RAIDframe can't alloc vnode");
|
|
|
|
vref(vp);
|
|
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
|
|
error = VOP_OPEN(vp, FREAD, td->td_ucred, td);
|
|
VOP_UNLOCK(vp, 0, td);
|
|
if (error) {
|
|
/* Whatever... */
|
|
continue;
|
|
}
|
|
|
|
good_one = 0;
|
|
|
|
clabel = (RF_ComponentLabel_t *)
|
|
malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME,
|
|
M_NOWAIT);
|
|
if (clabel == NULL) {
|
|
/* XXX CLEANUP HERE */
|
|
panic("RAID autoconfig: no memory!\n");
|
|
}
|
|
|
|
if (!raidread_component_label(dev1, vp, clabel)) {
|
|
/* Got the label. Is it reasonable? */
|
|
if (rf_reasonable_label(clabel) &&
|
|
(clabel->partitionSize <=
|
|
label->d_partitions[i].p_size)) {
|
|
rf_printf(1, "Component on: %s: %d\n",
|
|
dev1->si_name, label->d_partitions[i].p_size);
|
|
rf_print_component_label(clabel);
|
|
/* if it's reasonable, add it, else ignore it */
|
|
ac = (RF_AutoConfig_t *)
|
|
malloc(sizeof(RF_AutoConfig_t),
|
|
M_RAIDFRAME, M_NOWAIT);
|
|
if (ac == NULL) {
|
|
/* XXX should panic? */
|
|
panic("RAID autoconfig: no memory!\n");
|
|
}
|
|
|
|
sprintf(ac->devname, "%s", dev->si_name);
|
|
ac->dev = dev1;
|
|
ac->vp = vp;
|
|
ac->clabel = clabel;
|
|
ac->next = *ac_list;
|
|
*ac_list = ac;
|
|
good_one = 1;
|
|
}
|
|
}
|
|
if (!good_one) {
|
|
/* cleanup */
|
|
free(clabel, M_RAIDFRAME);
|
|
VOP_CLOSE(vp, FREAD | FWRITE, td->td_ucred, td);
|
|
vrele(vp);
|
|
}
|
|
}
|
|
}
|
|
|
|
static int
|
|
rf_reasonable_label(clabel)
|
|
RF_ComponentLabel_t *clabel;
|
|
{
|
|
|
|
if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
|
|
(clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
|
|
((clabel->clean == RF_RAID_CLEAN) ||
|
|
(clabel->clean == RF_RAID_DIRTY)) &&
|
|
clabel->row >=0 &&
|
|
clabel->column >= 0 &&
|
|
clabel->num_rows > 0 &&
|
|
clabel->num_columns > 0 &&
|
|
clabel->row < clabel->num_rows &&
|
|
clabel->column < clabel->num_columns &&
|
|
clabel->blockSize > 0 &&
|
|
clabel->numBlocks > 0) {
|
|
/* label looks reasonable enough... */
|
|
return(1);
|
|
}
|
|
return(0);
|
|
}
|
|
|
|
|
|
void
|
|
rf_print_component_label(clabel)
|
|
RF_ComponentLabel_t *clabel;
|
|
{
|
|
rf_printf(1, " Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
|
|
clabel->row, clabel->column,
|
|
clabel->num_rows, clabel->num_columns);
|
|
rf_printf(1, " Version: %d Serial Number: %d Mod Counter: %d\n",
|
|
clabel->version, clabel->serial_number,
|
|
clabel->mod_counter);
|
|
rf_printf(1, " Clean: %s Status: %d\n",
|
|
clabel->clean ? "Yes" : "No", clabel->status );
|
|
rf_printf(1, " sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
|
|
clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
|
|
rf_printf(1, " RAID Level: %c blocksize: %d numBlocks: %d\n",
|
|
(char) clabel->parityConfig, clabel->blockSize,
|
|
clabel->numBlocks);
|
|
rf_printf(1, " Autoconfig: %s\n", clabel->autoconfigure ? "Yes":"No");
|
|
rf_printf(1, " Contains root partition: %s\n",
|
|
clabel->root_partition ? "Yes" : "No" );
|
|
rf_printf(1, " Last configured as: raid%d\n", clabel->last_unit );
|
|
#if 0
|
|
rf_printf(1, " Config order: %d\n", clabel->config_order);
|
|
#endif
|
|
|
|
}
|
|
|
|
RF_ConfigSet_t *
|
|
rf_create_auto_sets(ac_list)
|
|
RF_AutoConfig_t *ac_list;
|
|
{
|
|
RF_AutoConfig_t *ac;
|
|
RF_ConfigSet_t *config_sets;
|
|
RF_ConfigSet_t *cset;
|
|
RF_AutoConfig_t *ac_next;
|
|
|
|
|
|
config_sets = NULL;
|
|
|
|
/* Go through the AutoConfig list, and figure out which components
|
|
belong to what sets. */
|
|
ac = ac_list;
|
|
while(ac!=NULL) {
|
|
/* we're going to putz with ac->next, so save it here
|
|
for use at the end of the loop */
|
|
ac_next = ac->next;
|
|
|
|
if (config_sets == NULL) {
|
|
/* will need at least this one... */
|
|
config_sets = (RF_ConfigSet_t *)
|
|
malloc(sizeof(RF_ConfigSet_t),
|
|
M_RAIDFRAME, M_NOWAIT);
|
|
if (config_sets == NULL) {
|
|
panic("rf_create_auto_sets: No memory!\n");
|
|
}
|
|
/* this one is easy :) */
|
|
config_sets->ac = ac;
|
|
config_sets->next = NULL;
|
|
config_sets->rootable = 0;
|
|
ac->next = NULL;
|
|
} else {
|
|
/* which set does this component fit into? */
|
|
cset = config_sets;
|
|
while(cset!=NULL) {
|
|
if (rf_does_it_fit(cset, ac)) {
|
|
/* looks like it matches... */
|
|
ac->next = cset->ac;
|
|
cset->ac = ac;
|
|
break;
|
|
}
|
|
cset = cset->next;
|
|
}
|
|
if (cset==NULL) {
|
|
/* didn't find a match above... new set..*/
|
|
cset = (RF_ConfigSet_t *)
|
|
malloc(sizeof(RF_ConfigSet_t),
|
|
M_RAIDFRAME, M_NOWAIT);
|
|
if (cset == NULL) {
|
|
panic("rf_create_auto_sets: No memory!\n");
|
|
}
|
|
cset->ac = ac;
|
|
ac->next = NULL;
|
|
cset->next = config_sets;
|
|
cset->rootable = 0;
|
|
config_sets = cset;
|
|
}
|
|
}
|
|
ac = ac_next;
|
|
}
|
|
|
|
|
|
return(config_sets);
|
|
}
|
|
|
|
static int
|
|
rf_does_it_fit(cset, ac)
|
|
RF_ConfigSet_t *cset;
|
|
RF_AutoConfig_t *ac;
|
|
{
|
|
RF_ComponentLabel_t *clabel1, *clabel2;
|
|
|
|
/* If this one matches the *first* one in the set, that's good
|
|
enough, since the other members of the set would have been
|
|
through here too... */
|
|
/* note that we are not checking partitionSize here..
|
|
|
|
Note that we are also not checking the mod_counters here.
|
|
If everything else matches execpt the mod_counter, that's
|
|
good enough for this test. We will deal with the mod_counters
|
|
a little later in the autoconfiguration process.
|
|
|
|
(clabel1->mod_counter == clabel2->mod_counter) &&
|
|
|
|
The reason we don't check for this is that failed disks
|
|
will have lower modification counts. If those disks are
|
|
not added to the set they used to belong to, then they will
|
|
form their own set, which may result in 2 different sets,
|
|
for example, competing to be configured at raid0, and
|
|
perhaps competing to be the root filesystem set. If the
|
|
wrong ones get configured, or both attempt to become /,
|
|
weird behaviour and or serious lossage will occur. Thus we
|
|
need to bring them into the fold here, and kick them out at
|
|
a later point.
|
|
|
|
*/
|
|
|
|
clabel1 = cset->ac->clabel;
|
|
clabel2 = ac->clabel;
|
|
if ((clabel1->version == clabel2->version) &&
|
|
(clabel1->serial_number == clabel2->serial_number) &&
|
|
(clabel1->num_rows == clabel2->num_rows) &&
|
|
(clabel1->num_columns == clabel2->num_columns) &&
|
|
(clabel1->sectPerSU == clabel2->sectPerSU) &&
|
|
(clabel1->SUsPerPU == clabel2->SUsPerPU) &&
|
|
(clabel1->SUsPerRU == clabel2->SUsPerRU) &&
|
|
(clabel1->parityConfig == clabel2->parityConfig) &&
|
|
(clabel1->maxOutstanding == clabel2->maxOutstanding) &&
|
|
(clabel1->blockSize == clabel2->blockSize) &&
|
|
(clabel1->numBlocks == clabel2->numBlocks) &&
|
|
(clabel1->autoconfigure == clabel2->autoconfigure) &&
|
|
(clabel1->root_partition == clabel2->root_partition) &&
|
|
(clabel1->last_unit == clabel2->last_unit) &&
|
|
(clabel1->config_order == clabel2->config_order)) {
|
|
/* if it get's here, it almost *has* to be a match */
|
|
} else {
|
|
/* it's not consistent with somebody in the set..
|
|
punt */
|
|
return(0);
|
|
}
|
|
/* all was fine.. it must fit... */
|
|
return(1);
|
|
}
|
|
|
|
int
|
|
rf_have_enough_components(cset)
|
|
RF_ConfigSet_t *cset;
|
|
{
|
|
RF_AutoConfig_t *ac;
|
|
RF_AutoConfig_t *auto_config;
|
|
RF_ComponentLabel_t *clabel;
|
|
int r,c;
|
|
int num_rows;
|
|
int num_cols;
|
|
int num_missing;
|
|
int mod_counter;
|
|
int mod_counter_found;
|
|
int even_pair_failed;
|
|
char parity_type;
|
|
|
|
|
|
/* check to see that we have enough 'live' components
|
|
of this set. If so, we can configure it if necessary */
|
|
|
|
num_rows = cset->ac->clabel->num_rows;
|
|
num_cols = cset->ac->clabel->num_columns;
|
|
parity_type = cset->ac->clabel->parityConfig;
|
|
|
|
/* XXX Check for duplicate components!?!?!? */
|
|
|
|
/* Determine what the mod_counter is supposed to be for this set. */
|
|
|
|
mod_counter_found = 0;
|
|
mod_counter = 0;
|
|
ac = cset->ac;
|
|
while(ac!=NULL) {
|
|
if (mod_counter_found==0) {
|
|
mod_counter = ac->clabel->mod_counter;
|
|
mod_counter_found = 1;
|
|
} else {
|
|
if (ac->clabel->mod_counter > mod_counter) {
|
|
mod_counter = ac->clabel->mod_counter;
|
|
}
|
|
}
|
|
ac = ac->next;
|
|
}
|
|
|
|
num_missing = 0;
|
|
auto_config = cset->ac;
|
|
|
|
for(r=0; r<num_rows; r++) {
|
|
even_pair_failed = 0;
|
|
for(c=0; c<num_cols; c++) {
|
|
ac = auto_config;
|
|
while(ac!=NULL) {
|
|
if ((ac->clabel->row == r) &&
|
|
(ac->clabel->column == c) &&
|
|
(ac->clabel->mod_counter == mod_counter)) {
|
|
/* it's this one... */
|
|
rf_printf(1, "Found: %s at %d,%d\n",
|
|
ac->devname,r,c);
|
|
break;
|
|
}
|
|
ac=ac->next;
|
|
}
|
|
if (ac==NULL) {
|
|
/* Didn't find one here! */
|
|
/* special case for RAID 1, especially
|
|
where there are more than 2
|
|
components (where RAIDframe treats
|
|
things a little differently :( ) */
|
|
if (parity_type == '1') {
|
|
if (c%2 == 0) { /* even component */
|
|
even_pair_failed = 1;
|
|
} else { /* odd component. If
|
|
we're failed, and
|
|
so is the even
|
|
component, it's
|
|
"Good Night, Charlie" */
|
|
if (even_pair_failed == 1) {
|
|
return(0);
|
|
}
|
|
}
|
|
} else {
|
|
/* normal accounting */
|
|
num_missing++;
|
|
}
|
|
}
|
|
if ((parity_type == '1') && (c%2 == 1)) {
|
|
/* Just did an even component, and we didn't
|
|
bail.. reset the even_pair_failed flag,
|
|
and go on to the next component.... */
|
|
even_pair_failed = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
clabel = cset->ac->clabel;
|
|
|
|
if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
|
|
((clabel->parityConfig == '4') && (num_missing > 1)) ||
|
|
((clabel->parityConfig == '5') && (num_missing > 1))) {
|
|
/* XXX this needs to be made *much* more general */
|
|
/* Too many failures */
|
|
return(0);
|
|
}
|
|
/* otherwise, all is well, and we've got enough to take a kick
|
|
at autoconfiguring this set */
|
|
return(1);
|
|
}
|
|
|
|
void
|
|
rf_create_configuration(ac,config,raidPtr)
|
|
RF_AutoConfig_t *ac;
|
|
RF_Config_t *config;
|
|
RF_Raid_t *raidPtr;
|
|
{
|
|
RF_ComponentLabel_t *clabel;
|
|
int i;
|
|
|
|
clabel = ac->clabel;
|
|
|
|
/* 1. Fill in the common stuff */
|
|
config->numRow = clabel->num_rows;
|
|
config->numCol = clabel->num_columns;
|
|
config->numSpare = 0; /* XXX should this be set here? */
|
|
config->sectPerSU = clabel->sectPerSU;
|
|
config->SUsPerPU = clabel->SUsPerPU;
|
|
config->SUsPerRU = clabel->SUsPerRU;
|
|
config->parityConfig = clabel->parityConfig;
|
|
/* XXX... */
|
|
strcpy(config->diskQueueType,"fifo");
|
|
config->maxOutstandingDiskReqs = clabel->maxOutstanding;
|
|
config->layoutSpecificSize = 0; /* XXX ? */
|
|
|
|
while(ac!=NULL) {
|
|
/* row/col values will be in range due to the checks
|
|
in reasonable_label() */
|
|
strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
|
|
ac->devname);
|
|
ac = ac->next;
|
|
}
|
|
|
|
for(i=0;i<RF_MAXDBGV;i++) {
|
|
config->debugVars[i][0] = NULL;
|
|
}
|
|
}
|
|
|
|
int
|
|
rf_set_autoconfig(raidPtr, new_value)
|
|
RF_Raid_t *raidPtr;
|
|
int new_value;
|
|
{
|
|
RF_ComponentLabel_t *clabel;
|
|
struct vnode *vp;
|
|
dev_t dev;
|
|
int row, column;
|
|
|
|
MALLOC(clabel, RF_ComponentLabel_t *, sizeof(RF_ComponentLabel_t),
|
|
M_RAIDFRAME, M_WAITOK | M_ZERO);
|
|
|
|
raidPtr->autoconfigure = new_value;
|
|
for(row=0; row<raidPtr->numRow; row++) {
|
|
for(column=0; column<raidPtr->numCol; column++) {
|
|
if (raidPtr->Disks[row][column].status ==
|
|
rf_ds_optimal) {
|
|
dev = raidPtr->Disks[row][column].dev;
|
|
vp = raidPtr->raid_cinfo[row][column].ci_vp;
|
|
raidread_component_label(dev, vp, clabel);
|
|
clabel->autoconfigure = new_value;
|
|
raidwrite_component_label(dev, vp, clabel);
|
|
}
|
|
}
|
|
}
|
|
FREE(clabel, M_RAIDFRAME);
|
|
return(new_value);
|
|
}
|
|
|
|
int
|
|
rf_set_rootpartition(raidPtr, new_value)
|
|
RF_Raid_t *raidPtr;
|
|
int new_value;
|
|
{
|
|
RF_ComponentLabel_t *clabel;
|
|
struct vnode *vp;
|
|
dev_t dev;
|
|
int row, column;
|
|
|
|
MALLOC(clabel, RF_ComponentLabel_t *, sizeof(RF_ComponentLabel_t),
|
|
M_RAIDFRAME, M_WAITOK | M_ZERO);
|
|
|
|
raidPtr->root_partition = new_value;
|
|
for(row=0; row<raidPtr->numRow; row++) {
|
|
for(column=0; column<raidPtr->numCol; column++) {
|
|
if (raidPtr->Disks[row][column].status ==
|
|
rf_ds_optimal) {
|
|
dev = raidPtr->Disks[row][column].dev;
|
|
vp = raidPtr->raid_cinfo[row][column].ci_vp;
|
|
raidread_component_label(dev, vp, clabel);
|
|
clabel->root_partition = new_value;
|
|
raidwrite_component_label(dev, vp, clabel);
|
|
}
|
|
}
|
|
}
|
|
FREE(clabel, M_RAIDFRAME);
|
|
return(new_value);
|
|
}
|
|
|
|
void
|
|
rf_release_all_vps(cset)
|
|
RF_ConfigSet_t *cset;
|
|
{
|
|
RF_AutoConfig_t *ac;
|
|
struct thread *td;
|
|
|
|
td = curthread;
|
|
ac = cset->ac;
|
|
while(ac!=NULL) {
|
|
/* Close the vp, and give it back */
|
|
if (ac->vp) {
|
|
VOP_CLOSE(ac->vp, FREAD, td->td_ucred, td);
|
|
vrele(ac->vp);
|
|
ac->vp = NULL;
|
|
}
|
|
ac = ac->next;
|
|
}
|
|
}
|
|
|
|
|
|
void
|
|
rf_cleanup_config_set(cset)
|
|
RF_ConfigSet_t *cset;
|
|
{
|
|
RF_AutoConfig_t *ac;
|
|
RF_AutoConfig_t *next_ac;
|
|
|
|
ac = cset->ac;
|
|
while(ac!=NULL) {
|
|
next_ac = ac->next;
|
|
/* nuke the label */
|
|
free(ac->clabel, M_RAIDFRAME);
|
|
/* cleanup the config structure */
|
|
free(ac, M_RAIDFRAME);
|
|
/* "next.." */
|
|
ac = next_ac;
|
|
}
|
|
/* and, finally, nuke the config set */
|
|
free(cset, M_RAIDFRAME);
|
|
}
|
|
|
|
|
|
void
|
|
raid_init_component_label(raidPtr, clabel)
|
|
RF_Raid_t *raidPtr;
|
|
RF_ComponentLabel_t *clabel;
|
|
{
|
|
/* current version number */
|
|
clabel->version = RF_COMPONENT_LABEL_VERSION;
|
|
clabel->serial_number = raidPtr->serial_number;
|
|
clabel->mod_counter = raidPtr->mod_counter;
|
|
clabel->num_rows = raidPtr->numRow;
|
|
clabel->num_columns = raidPtr->numCol;
|
|
clabel->clean = RF_RAID_DIRTY; /* not clean */
|
|
clabel->status = rf_ds_optimal; /* "It's good!" */
|
|
|
|
clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
|
|
clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
|
|
clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
|
|
|
|
clabel->blockSize = raidPtr->bytesPerSector;
|
|
clabel->numBlocks = raidPtr->sectorsPerDisk;
|
|
|
|
/* XXX not portable */
|
|
clabel->parityConfig = raidPtr->Layout.map->parityConfig;
|
|
clabel->maxOutstanding = raidPtr->maxOutstanding;
|
|
clabel->autoconfigure = raidPtr->autoconfigure;
|
|
clabel->root_partition = raidPtr->root_partition;
|
|
clabel->last_unit = raidPtr->raidid;
|
|
clabel->config_order = raidPtr->config_order;
|
|
}
|
|
|
|
int
|
|
rf_auto_config_set(cset, unit, parent_sc)
|
|
RF_ConfigSet_t *cset;
|
|
int *unit;
|
|
struct raidctl_softc *parent_sc;
|
|
{
|
|
int retcode = 0;
|
|
RF_Raid_t *raidPtr;
|
|
RF_Config_t *config;
|
|
int raidID;
|
|
|
|
rf_printf(0, "RAIDframe autoconfigure\n");
|
|
|
|
*unit = -1;
|
|
|
|
/* 1. Create a config structure */
|
|
|
|
config = (RF_Config_t *)malloc(sizeof(RF_Config_t), M_RAIDFRAME,
|
|
M_NOWAIT|M_ZERO);
|
|
if (config==NULL) {
|
|
rf_printf(0, "Out of mem at rf_auto_config_set\n");
|
|
/* XXX do something more intelligent here. */
|
|
return(1);
|
|
}
|
|
|
|
/* XXX raidID needs to be set correctly.. */
|
|
|
|
/*
|
|
2. Figure out what RAID ID this one is supposed to live at
|
|
See if we can get the same RAID dev that it was configured
|
|
on last time..
|
|
*/
|
|
|
|
raidID = cset->ac->clabel->last_unit;
|
|
if (raidID < 0) {
|
|
/* let's not wander off into lala land. */
|
|
raidID = raidgetunit(parent_sc, 0);
|
|
} else {
|
|
raidID = raidgetunit(parent_sc, raidID);
|
|
}
|
|
|
|
if (raidID < 0) {
|
|
/* punt... */
|
|
rf_printf(0, "Unable to auto configure this set!\n");
|
|
rf_printf(1, "Out of RAID devs!\n");
|
|
return(1);
|
|
}
|
|
rf_printf(0, "Configuring raid%d:\n",raidID);
|
|
RF_Malloc(raidPtr, sizeof(*raidPtr), (RF_Raid_t *));
|
|
if (raidPtr == NULL) {
|
|
rf_printf(0, "Out of mem at rf_auto_config_set\n");
|
|
return (1);
|
|
}
|
|
bzero((char *)raidPtr, sizeof(RF_Raid_t));
|
|
|
|
/* XXX all this stuff should be done SOMEWHERE ELSE! */
|
|
raidPtr->raidid = raidID;
|
|
raidPtr->openings = RAIDOUTSTANDING;
|
|
|
|
/* 3. Build the configuration structure */
|
|
rf_create_configuration(cset->ac, config, raidPtr);
|
|
|
|
/* 4. Do the configuration */
|
|
retcode = rf_Configure(raidPtr, config, cset->ac);
|
|
|
|
if (retcode == 0) {
|
|
|
|
parent_sc->sc_raiddevs[raidID] = raidinit(raidPtr);
|
|
if (parent_sc->sc_raiddevs[raidID] == NULL) {
|
|
rf_printf(0, "Could not create RAID device\n");
|
|
RF_Free(raidPtr, sizeof(RF_Raid_t));
|
|
free(config, M_RAIDFRAME);
|
|
return (1);
|
|
}
|
|
|
|
parent_sc->sc_numraid++;
|
|
((struct raid_softc *)raidPtr->sc)->sc_parent_dev =
|
|
parent_sc->sc_dev;
|
|
rf_markalldirty(raidPtr);
|
|
raidPtr->autoconfigure = 1; /* XXX do this here? */
|
|
if (cset->ac->clabel->root_partition==1) {
|
|
/* everything configured just fine. Make a note
|
|
that this set is eligible to be root. */
|
|
cset->rootable = 1;
|
|
/* XXX do this here? */
|
|
raidPtr->root_partition = 1;
|
|
}
|
|
}
|
|
|
|
/* 5. Cleanup */
|
|
free(config, M_RAIDFRAME);
|
|
|
|
*unit = raidID;
|
|
return(retcode);
|
|
}
|
|
|
|
void
|
|
rf_disk_unbusy(desc)
|
|
RF_RaidAccessDesc_t *desc;
|
|
{
|
|
struct raid_softc *sc;
|
|
struct bio *bp;
|
|
|
|
sc = desc->raidPtr->sc;
|
|
bp = (struct bio *)desc->bp;
|
|
|
|
devstat_end_transaction_bio(&sc->device_stats, bp);
|
|
}
|
|
|
|
/*
|
|
* Get the next available unit number from the bitmap. You can also request
|
|
* a particular unit number by passing it in the second arg. If it's not
|
|
* available, then grab the next free one. Return -1 if none are available.
|
|
*/
|
|
static int
|
|
raidgetunit(struct raidctl_softc *parent_sc, int id)
|
|
{
|
|
int i;
|
|
|
|
if (id >= RF_MAX_ARRAYS)
|
|
return (-1);
|
|
|
|
for (i = id; i < RF_MAX_ARRAYS; i++) {
|
|
if (parent_sc->sc_raiddevs[i] == NULL)
|
|
return (i);
|
|
}
|
|
|
|
if (id != 0) {
|
|
for (i = 0; i < id; i++) {
|
|
if (parent_sc->sc_raiddevs[i] == NULL)
|
|
return (i);
|
|
}
|
|
}
|
|
|
|
return (-1);
|
|
}
|
|
|
|
static int
|
|
raidshutdown(void)
|
|
{
|
|
struct raidctl_softc *parent_sc;
|
|
int i, error = 0;
|
|
|
|
parent_sc = raidctl_dev->si_drv1;
|
|
|
|
if (parent_sc->sc_numraid != 0) {
|
|
#if XXX_KTHREAD_EXIT_RACE
|
|
return (EBUSY);
|
|
#else
|
|
for (i = 0; i < RF_MAX_ARRAYS; i++) {
|
|
if (parent_sc->sc_raiddevs[i] != NULL) {
|
|
rf_printf(0, "Shutting down raid%d\n", i);
|
|
error = raidctlioctl(raidctl_dev,
|
|
RAIDFRAME_SHUTDOWN, (caddr_t)&i, 0, NULL);
|
|
if (error)
|
|
return (error);
|
|
if (parent_sc->sc_numraid == 0)
|
|
break;
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
|
|
destroy_dev(raidctl_dev);
|
|
|
|
return (error);
|
|
}
|
|
|
|
int
|
|
raid_getcomponentsize(RF_Raid_t *raidPtr, RF_RowCol_t row, RF_RowCol_t col)
|
|
{
|
|
struct vnode *vp;
|
|
struct vattr va;
|
|
RF_Thread_t td;
|
|
off_t mediasize;
|
|
u_int secsize;
|
|
int retcode;
|
|
|
|
td = raidPtr->engine_thread;
|
|
|
|
retcode = raidlookup(raidPtr->Disks[row][col].devname, td, &vp);
|
|
|
|
if (retcode) {
|
|
printf("raid%d: rebuilding: raidlookup on device: %s failed: %d!\n",raidPtr->raidid,
|
|
raidPtr->Disks[row][col].devname, retcode);
|
|
|
|
/* XXX the component isn't responding properly...
|
|
must be still dead :-( */
|
|
raidPtr->reconInProgress--;
|
|
return(retcode);
|
|
|
|
} else {
|
|
|
|
/* Ok, so we can at least do a lookup...
|
|
How about actually getting a vp for it? */
|
|
|
|
if ((retcode = VOP_GETATTR(vp, &va, rf_getucred(td),
|
|
td)) != 0) {
|
|
raidPtr->reconInProgress--;
|
|
return(retcode);
|
|
}
|
|
|
|
retcode = VOP_IOCTL(vp, DIOCGSECTORSIZE, (caddr_t)&secsize,
|
|
FREAD, rf_getucred(td), td);
|
|
if (retcode)
|
|
return (retcode);
|
|
raidPtr->Disks[row][col].blockSize = secsize;
|
|
|
|
retcode = VOP_IOCTL(vp, DIOCGMEDIASIZE, (caddr_t)&mediasize,
|
|
FREAD, rf_getucred(td), td);
|
|
if (retcode)
|
|
return (retcode);
|
|
raidPtr->Disks[row][col].numBlocks = mediasize / secsize;
|
|
|
|
raidPtr->raid_cinfo[row][col].ci_vp = vp;
|
|
raidPtr->raid_cinfo[row][col].ci_dev = udev2dev(va.va_rdev, 0);
|
|
raidPtr->Disks[row][col].dev = udev2dev(va.va_rdev, 0);
|
|
|
|
/* we allow the user to specify that only a
|
|
fraction of the disks should be used this is
|
|
just for debug: it speeds up
|
|
* the parity scan */
|
|
raidPtr->Disks[row][col].numBlocks =
|
|
raidPtr->Disks[row][col].numBlocks *
|
|
rf_sizePercentage / 100;
|
|
}
|
|
|
|
return(retcode);
|
|
}
|
|
|
|
static int
|
|
raid_modevent(mod, type, data)
|
|
module_t mod;
|
|
int type;
|
|
void *data;
|
|
{
|
|
int error = 0;
|
|
|
|
switch (type) {
|
|
case MOD_LOAD:
|
|
raidattach();
|
|
break;
|
|
|
|
case MOD_UNLOAD:
|
|
case MOD_SHUTDOWN:
|
|
error = raidshutdown();
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return (error);
|
|
}
|
|
|
|
moduledata_t raid_mod = {
|
|
"raidframe",
|
|
(modeventhand_t) raid_modevent,
|
|
0};
|
|
|
|
DECLARE_MODULE(raidframe, raid_mod, SI_SUB_RAID, SI_ORDER_MIDDLE);
|