a246e10f55
field in struct buf: b_iocmd. The b_iocmd is enforced to have exactly one bit set. B_WRITE was bogusly defined as zero giving rise to obvious coding mistakes. Also eliminate the redundant struct buf flag B_CALL, it can just as efficiently be done by comparing b_iodone to NULL. Should you get a panic or drop into the debugger, complaining about "b_iocmd", don't continue. It is likely to write on your disk where it should have been reading. This change is a step in the direction towards a stackable BIO capability. A lot of this patch were machine generated (Thanks to style(9) compliance!) Vinum users: Greg has not had time to test this yet, be careful.
1094 lines
33 KiB
C
1094 lines
33 KiB
C
/*-
|
|
* Copyright (c) 1997, 1998, 1999
|
|
* Nan Yang Computer Services Limited. All rights reserved.
|
|
*
|
|
* Parts copyright (c) 1997, 1998 Cybernet Corporation, NetMAX project.
|
|
*
|
|
* Written by Greg Lehey
|
|
*
|
|
* This software is distributed under the so-called ``Berkeley
|
|
* License'':
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* 3. All advertising materials mentioning features or use of this software
|
|
* must display the following acknowledgement:
|
|
* This product includes software developed by Nan Yang Computer
|
|
* Services Limited.
|
|
* 4. Neither the name of the Company nor the names of its contributors
|
|
* may be used to endorse or promote products derived from this software
|
|
* without specific prior written permission.
|
|
*
|
|
* This software is provided ``as is'', and any express or implied
|
|
* warranties, including, but not limited to, the implied warranties of
|
|
* merchantability and fitness for a particular purpose are disclaimed.
|
|
* In no event shall the company or contributors be liable for any
|
|
* direct, indirect, incidental, special, exemplary, or consequential
|
|
* damages (including, but not limited to, procurement of substitute
|
|
* goods or services; loss of use, data, or profits; or business
|
|
* interruption) however caused and on any theory of liability, whether
|
|
* in contract, strict liability, or tort (including negligence or
|
|
* otherwise) arising in any way out of the use of this software, even if
|
|
* advised of the possibility of such damage.
|
|
*
|
|
* $Id: vinumstate.c,v 2.15 2000/01/04 04:39:25 grog Exp grog $
|
|
* $FreeBSD$
|
|
*/
|
|
|
|
#include <dev/vinum/vinumhdr.h>
|
|
#include <dev/vinum/request.h>
|
|
|
|
/* Update drive state */
|
|
/* Return 1 if the state changes, otherwise 0 */
|
|
int
|
|
set_drive_state(int driveno, enum drivestate newstate, enum setstateflags flags)
|
|
{
|
|
struct drive *drive = &DRIVE[driveno];
|
|
int oldstate = drive->state;
|
|
int sdno;
|
|
|
|
if (drive->state == drive_unallocated) /* no drive to do anything with, */
|
|
return 0;
|
|
|
|
if (newstate == oldstate) /* don't change it if it's not different */
|
|
return 1; /* all OK */
|
|
if ((newstate == drive_down) /* the drive's going down */
|
|
&&(!(flags & setstate_force))
|
|
&& (drive->opencount != 0)) /* we can't do it */
|
|
return 0; /* don't do it */
|
|
drive->state = newstate; /* set the state */
|
|
if (drive->label.name[0] != '\0') /* we have a name, */
|
|
log(LOG_INFO,
|
|
"vinum: drive %s is %s\n",
|
|
drive->label.name,
|
|
drive_state(drive->state));
|
|
if (drive->state != oldstate) { /* state has changed */
|
|
for (sdno = 0; sdno < vinum_conf.subdisks_allocated; sdno++) { /* find this drive's subdisks */
|
|
if ((SD[sdno].state >= sd_referenced)
|
|
&& (SD[sdno].driveno == driveno)) /* belongs to this drive */
|
|
update_sd_state(sdno); /* update the state */
|
|
}
|
|
}
|
|
if (newstate == drive_up) { /* want to bring it up */
|
|
if ((drive->flags & VF_OPEN) == 0) /* should be open, but we're not */
|
|
init_drive(drive, 1); /* which changes the state again */
|
|
} else /* taking it down or worse */
|
|
queue_daemon_request(daemonrq_closedrive, /* get the daemon to close it */
|
|
(union daemoninfo) drive);
|
|
if ((flags & setstate_configuring) == 0) /* configuring? */
|
|
save_config(); /* no: save the updated configuration now */
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* Try to set the subdisk state. Return 1 if state changed to
|
|
* what we wanted, -1 if it changed to something else, and 0
|
|
* if no change.
|
|
*
|
|
* This routine is called both from the user (up, down states only)
|
|
* and internally.
|
|
*
|
|
* The setstate_force bit in the flags enables the state change even
|
|
* if it could be dangerous to data consistency. It shouldn't allow
|
|
* nonsense.
|
|
*/
|
|
int
|
|
set_sd_state(int sdno, enum sdstate newstate, enum setstateflags flags)
|
|
{
|
|
struct sd *sd = &SD[sdno];
|
|
struct plex *plex;
|
|
struct volume *vol;
|
|
int oldstate = sd->state;
|
|
int status = 1; /* status to return */
|
|
|
|
if (newstate == oldstate) /* already there, */
|
|
return 1;
|
|
else if (sd->state == sd_unallocated) /* no subdisk to do anything with, */
|
|
return 0; /* can't do it */
|
|
|
|
if (sd->driveoffset < 0) { /* not allocated space */
|
|
sd->state = sd_down;
|
|
if (newstate != sd_down) {
|
|
if (sd->plexno >= 0)
|
|
sdstatemap(&PLEX[sd->plexno]); /* count up subdisks */
|
|
return -1;
|
|
}
|
|
} else { /* space allocated */
|
|
switch (newstate) {
|
|
case sd_down: /* take it down? */
|
|
/*
|
|
* If we're attached to a plex, and we're
|
|
* not reborn, we won't go down without
|
|
* use of force.
|
|
*/
|
|
if ((!flags & setstate_force)
|
|
&& (sd->plexno >= 0)
|
|
&& (sd->state != sd_reborn))
|
|
return 0; /* don't do it */
|
|
break;
|
|
|
|
case sd_initialized:
|
|
if ((sd->state == sd_initializing) /* we were initializing */
|
|
||(flags & setstate_force)) /* or we forced it */
|
|
break;
|
|
return 0; /* can't do it otherwise */
|
|
|
|
case sd_up:
|
|
if (DRIVE[sd->driveno].state != drive_up) /* can't bring the sd up if the drive isn't, */
|
|
return 0; /* not even by force */
|
|
if (flags & setstate_force) /* forcing it, */
|
|
break; /* just do it, and damn the consequences */
|
|
switch (sd->state) {
|
|
/*
|
|
* Perform the necessary tests. To allow
|
|
* the state transition, just break out of
|
|
* the switch.
|
|
*/
|
|
case sd_crashed:
|
|
case sd_reborn:
|
|
case sd_down: /* been down, no data lost */
|
|
/*
|
|
* If we're associated with a plex, and
|
|
* the plex isn't up, or we're the only
|
|
* subdisk in the plex, we can do it.
|
|
*/
|
|
if ((sd->plexno >= 0)
|
|
&& (((PLEX[sd->plexno].state < plex_firstup)
|
|
|| (PLEX[sd->plexno].subdisks > 1))))
|
|
break; /* do it */
|
|
if (oldstate != sd_reborn) {
|
|
sd->state = sd_reborn; /* here it is again */
|
|
log(LOG_INFO,
|
|
"vinum: %s is %s, not %s\n",
|
|
sd->name,
|
|
sd_state(sd->state),
|
|
sd_state(newstate));
|
|
}
|
|
status = -1;
|
|
break;
|
|
|
|
case sd_init: /* brand new */
|
|
if (flags & setstate_configuring) /* we're doing this while configuring */
|
|
break;
|
|
/* otherwise it's like being empty */
|
|
/* FALLTHROUGH */
|
|
|
|
case sd_empty:
|
|
case sd_initialized:
|
|
/*
|
|
* If we're not part of a plex, or the
|
|
* plex is not part of a volume with other
|
|
* plexes which are up, we can come up
|
|
* without being inconsistent.
|
|
*
|
|
* If we're part of a parity plex, we'll
|
|
* come up if the caller uses force. This
|
|
* is the way we bring them up after
|
|
* initialization.
|
|
*/
|
|
if ((sd->plexno < 0)
|
|
|| ((vpstate(&PLEX[sd->plexno]) & volplex_otherup) == 0)
|
|
|| (isparity((&PLEX[sd->plexno]))
|
|
&& (flags & setstate_force)))
|
|
break;
|
|
|
|
/* Otherwise it's just out of date */
|
|
/* FALLTHROUGH */
|
|
|
|
case sd_stale: /* out of date info, need reviving */
|
|
case sd_obsolete:
|
|
/*
|
|
|
|
* 1. If the subdisk is not part of a
|
|
* plex, bring it up, don't revive.
|
|
*
|
|
* 2. If the subdisk is part of a
|
|
* one-plex volume or an unattached
|
|
* plex, and it's not RAID-4 or
|
|
* RAID-5, we *can't revive*. The
|
|
* subdisk doesn't change its state.
|
|
*
|
|
* 3. If the subdisk is part of a
|
|
* one-plex volume or an unattached
|
|
* plex, and it's RAID-4 or RAID-5,
|
|
* but more than one subdisk is down,
|
|
* we *still can't revive*. The
|
|
* subdisk doesn't change its state.
|
|
*
|
|
* 4. If the subdisk is part of a
|
|
* multi-plex volume, we'll change to
|
|
* reviving and let the revive
|
|
* routines find out whether it will
|
|
* work or not. If they don't, the
|
|
* revive stops with an error message,
|
|
* but the state doesn't change
|
|
* (FWIW).
|
|
*/
|
|
if (sd->plexno < 0) /* no plex associated, */
|
|
break; /* bring it up */
|
|
plex = &PLEX[sd->plexno];
|
|
if (plex->volno >= 0) /* have a volume */
|
|
vol = &VOL[plex->volno];
|
|
else
|
|
vol = NULL;
|
|
/*
|
|
* We can't do it if:
|
|
*
|
|
* 1: we don't have a volume
|
|
* 2: we're the only plex in the volume
|
|
* 3: we're a RAID-4 or RAID-5 plex, and
|
|
* more than one subdisk is down.
|
|
*/
|
|
if (((vol == NULL)
|
|
|| (vol->plexes == 1))
|
|
&& ((!isparity(plex))
|
|
|| (plex->sddowncount > 1))) {
|
|
if (sd->state == sd_initializing) /* it's finished initializing */
|
|
sd->state = sd_initialized;
|
|
else
|
|
return 0; /* can't do it */
|
|
} else {
|
|
sd->state = sd_reviving; /* put in reviving state */
|
|
sd->revived = 0; /* nothing done yet */
|
|
status = EAGAIN; /* need to repeat */
|
|
}
|
|
break;
|
|
|
|
case sd_reviving:
|
|
if (flags & setstate_force) /* insist, */
|
|
break;
|
|
return EAGAIN; /* no, try again */
|
|
|
|
default: /* can't do it */
|
|
/*
|
|
* There's no way to bring subdisks up directly from
|
|
* other states. First they need to be initialized
|
|
* or revived.
|
|
*/
|
|
return 0;
|
|
}
|
|
break;
|
|
|
|
default: /* other ones, only internal with force */
|
|
if ((flags & setstate_force) == 0) /* no force? What's this? */
|
|
return 0; /* don't do it */
|
|
}
|
|
}
|
|
if (status == 1) { /* we can do it, */
|
|
sd->state = newstate;
|
|
if (flags & setstate_force)
|
|
log(LOG_INFO, "vinum: %s is %s by force\n", sd->name, sd_state(sd->state));
|
|
else
|
|
log(LOG_INFO, "vinum: %s is %s\n", sd->name, sd_state(sd->state));
|
|
} else /* we don't get here with status 0 */
|
|
log(LOG_INFO,
|
|
"vinum: %s is %s, not %s\n",
|
|
sd->name,
|
|
sd_state(sd->state),
|
|
sd_state(newstate));
|
|
if (sd->plexno >= 0) /* we belong to a plex */
|
|
update_plex_state(sd->plexno); /* update plex state */
|
|
if ((flags & setstate_configuring) == 0) /* save config now */
|
|
save_config();
|
|
return status;
|
|
}
|
|
|
|
/*
|
|
* Set the state of a plex dependent on its subdisks.
|
|
* This time round, we'll let plex state just reflect
|
|
* aggregate subdisk state, so this becomes an order of
|
|
* magnitude less complicated. In particular, ignore
|
|
* the requested state.
|
|
*/
|
|
int
|
|
set_plex_state(int plexno, enum plexstate state, enum setstateflags flags)
|
|
{
|
|
struct plex *plex; /* point to our plex */
|
|
enum plexstate oldstate;
|
|
enum volplexstate vps; /* how do we compare with the other plexes? */
|
|
|
|
plex = &PLEX[plexno]; /* point to our plex */
|
|
oldstate = plex->state;
|
|
|
|
/* If the plex isn't allocated, we can't do it. */
|
|
if (plex->state == plex_unallocated)
|
|
return 0;
|
|
|
|
/*
|
|
* If it's already in the the state we want,
|
|
* and it's not up, just return. If it's up,
|
|
* we still need to do some housekeeping.
|
|
*/
|
|
if ((state == oldstate)
|
|
&& (state != plex_up))
|
|
return 1;
|
|
vps = vpstate(plex); /* how do we compare with the other plexes? */
|
|
switch (state) {
|
|
/*
|
|
* We can't bring the plex up, even by force,
|
|
* unless it's ready. update_plex_state
|
|
* checks that.
|
|
*/
|
|
case plex_up: /* bring the plex up */
|
|
update_plex_state(plex->plexno); /* it'll come up if it can */
|
|
break;
|
|
|
|
case plex_down: /* want to take it down */
|
|
/*
|
|
* If we're the only one, or the only one
|
|
* which is up, we need force to do it.
|
|
*/
|
|
if (((vps == volplex_onlyus)
|
|
|| (vps == volplex_onlyusup))
|
|
&& (!(flags & setstate_force)))
|
|
return 0; /* can't do it */
|
|
plex->state = state; /* do it */
|
|
invalidate_subdisks(plex, sd_down); /* and down all up subdisks */
|
|
break;
|
|
|
|
/*
|
|
* This is only requested internally.
|
|
* Trust ourselves
|
|
*/
|
|
case plex_faulty:
|
|
plex->state = state; /* do it */
|
|
invalidate_subdisks(plex, sd_crashed); /* and crash all up subdisks */
|
|
break;
|
|
|
|
case plex_initializing:
|
|
/* XXX consider what safeguards we need here */
|
|
if ((flags & setstate_force) == 0)
|
|
return 0;
|
|
plex->state = state; /* do it */
|
|
break;
|
|
|
|
/* What's this? */
|
|
default:
|
|
return 0;
|
|
}
|
|
if (plex->state != oldstate) /* we've changed, */
|
|
log(LOG_INFO, /* tell them about it */
|
|
"vinum: %s is %s\n",
|
|
plex->name,
|
|
plex_state(plex->state));
|
|
/*
|
|
* Now see what we have left, and whether
|
|
* we're taking the volume down
|
|
*/
|
|
if (plex->volno >= 0) /* we have a volume */
|
|
update_volume_state(plex->volno); /* update its state */
|
|
if ((flags & setstate_configuring) == 0) /* save config now */
|
|
save_config(); /* yes: save the updated configuration */
|
|
return 1;
|
|
}
|
|
|
|
/* Update the state of a plex dependent on its plexes. */
|
|
int
|
|
set_volume_state(int volno, enum volumestate state, enum setstateflags flags)
|
|
{
|
|
struct volume *vol = &VOL[volno]; /* point to our volume */
|
|
|
|
if (vol->state == volume_unallocated) /* no volume to do anything with, */
|
|
return 0;
|
|
if (vol->state == state) /* we're there already */
|
|
return 1;
|
|
|
|
if (state == volume_up) /* want to come up */
|
|
update_volume_state(volno);
|
|
else if (state == volume_down) { /* want to go down */
|
|
if (((vol->flags & VF_OPEN) == 0) /* not open */
|
|
||((flags & setstate_force) != 0)) { /* or we're forcing */
|
|
vol->state = volume_down;
|
|
log(LOG_INFO,
|
|
"vinum: volume %s is %s\n",
|
|
vol->name,
|
|
volume_state(vol->state));
|
|
if ((flags & setstate_configuring) == 0) /* save config now */
|
|
save_config(); /* yes: save the updated configuration */
|
|
return 1;
|
|
}
|
|
}
|
|
return 0; /* no change */
|
|
}
|
|
|
|
/* Set the state of a subdisk based on its environment */
|
|
void
|
|
update_sd_state(int sdno)
|
|
{
|
|
struct sd *sd;
|
|
struct drive *drive;
|
|
enum sdstate oldstate;
|
|
|
|
sd = &SD[sdno];
|
|
oldstate = sd->state;
|
|
drive = &DRIVE[sd->driveno];
|
|
|
|
if (drive->state == drive_up) {
|
|
switch (sd->state) {
|
|
case sd_down:
|
|
case sd_crashed:
|
|
sd->state = sd_reborn; /* back up again with no loss */
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
} else { /* down or worse */
|
|
switch (sd->state) {
|
|
case sd_up:
|
|
case sd_reborn:
|
|
case sd_reviving:
|
|
case sd_empty:
|
|
sd->state = sd_crashed; /* lost our drive */
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
if (sd->state != oldstate) /* state has changed, */
|
|
log(LOG_INFO, /* say so */
|
|
"vinum: %s is %s\n",
|
|
sd->name,
|
|
sd_state(sd->state));
|
|
if (sd->plexno >= 0) /* we're part of a plex, */
|
|
update_plex_state(sd->plexno); /* update its state */
|
|
}
|
|
|
|
/*
|
|
* Force a plex and all its subdisks
|
|
* into an 'up' state. This is a helper
|
|
* for update_plex_state.
|
|
*/
|
|
void
|
|
forceup(int plexno)
|
|
{
|
|
struct plex *plex;
|
|
int sdno;
|
|
|
|
plex = &PLEX[plexno]; /* point to the plex */
|
|
plex->state = plex_up; /* and bring it up */
|
|
|
|
/* change the subdisks to up state */
|
|
for (sdno = 0; sdno < plex->subdisks; sdno++) {
|
|
SD[plex->sdnos[sdno]].state = sd_up;
|
|
log(LOG_INFO, /* tell them about it */
|
|
"vinum: %s is up\n",
|
|
SD[plex->sdnos[sdno]].name);
|
|
}
|
|
}
|
|
|
|
/* Set the state of a plex based on its environment */
|
|
void
|
|
update_plex_state(int plexno)
|
|
{
|
|
struct plex *plex; /* point to our plex */
|
|
enum plexstate oldstate;
|
|
enum sdstates statemap; /* get a map of the subdisk states */
|
|
enum volplexstate vps; /* how do we compare with the other plexes? */
|
|
|
|
plex = &PLEX[plexno]; /* point to our plex */
|
|
oldstate = plex->state;
|
|
statemap = sdstatemap(plex); /* get a map of the subdisk states */
|
|
vps = vpstate(plex); /* how do we compare with the other plexes? */
|
|
|
|
if (statemap & sd_initstate) /* something initializing? */
|
|
plex->state = plex_initializing; /* yup, that makes the plex the same */
|
|
else if (statemap == sd_upstate)
|
|
/*
|
|
* All the subdisks are up. This also means that
|
|
* they are consistent, so we can just bring
|
|
* the plex up
|
|
*/
|
|
plex->state = plex_up;
|
|
else if (isparity(plex) /* RAID-4 or RAID-5 plex */
|
|
&&(plex->sddowncount == 1)) /* and exactly one subdisk down */
|
|
plex->state = plex_degraded; /* limping a bit */
|
|
else if (((statemap & ~sd_downstate) == sd_emptystate) /* all subdisks empty */
|
|
||((statemap & ~sd_downstate)
|
|
== (statemap & ~sd_downstate & (sd_initializedstate | sd_upstate)))) {
|
|
if ((vps & volplex_otherup) == 0) { /* no other plex is up */
|
|
struct volume *vol = &VOL[plex->volno]; /* possible volume to which it points */
|
|
|
|
/*
|
|
* If we're a striped or concat plex
|
|
* associated with a volume, none of whose
|
|
* plexes are up, and we're new and untested,
|
|
* and the volume has the setupstate bit set,
|
|
* we can pretend to be in a consistent state.
|
|
*
|
|
* We need to do this in one swell foop: on
|
|
* the next call we will no longer be just
|
|
* empty.
|
|
*
|
|
* This code assumes that all the other plexes
|
|
* are also capable of coming up (i.e. all the
|
|
* sds are up), but that's OK: we'll come back
|
|
* to this function for the remaining plexes
|
|
* in the volume.
|
|
*/
|
|
if ((plex->state == plex_init)
|
|
&& (plex->volno >= 0)
|
|
&& (vol->flags & VF_CONFIG_SETUPSTATE)) {
|
|
for (plexno = 0; plexno < vol->plexes; plexno++)
|
|
forceup(VOL[plex->volno].plex[plexno]);
|
|
} else if ((statemap == sd_initializedstate) /* if it's initialized (not empty) */
|
|
||(plex->organization == plex_concat) /* and we're not RAID-4 or RAID-5 */
|
|
||(plex->organization == plex_striped))
|
|
forceup(plexno); /* we'll do it */
|
|
/*
|
|
* This leaves a case where things don't get
|
|
* done: the plex is RAID-4 or RAID-5, and
|
|
* the subdisks are all empty. They need to
|
|
* be initialized first.
|
|
*/
|
|
} else {
|
|
if (statemap == sd_upstate) /* all subdisks up */
|
|
plex->state = plex_up; /* we can come up too */
|
|
else
|
|
plex->state = plex_faulty;
|
|
}
|
|
} else if ((statemap & (sd_upstate | sd_rebornstate)) == statemap) /* all up or reborn */
|
|
plex->state = plex_flaky;
|
|
else if (statemap & (sd_upstate | sd_rebornstate)) /* some up or reborn */
|
|
plex->state = plex_corrupt; /* corrupt */
|
|
else if (statemap & (sd_initstate | sd_emptystate)) /* some subdisks empty or initializing */
|
|
plex->state = plex_initializing;
|
|
else /* nothing at all up */
|
|
plex->state = plex_faulty;
|
|
|
|
if (plex->state != oldstate) /* state has changed, */
|
|
log(LOG_INFO, /* tell them about it */
|
|
"vinum: %s is %s\n",
|
|
plex->name,
|
|
plex_state(plex->state));
|
|
if (plex->volno >= 0) /* we're part of a volume, */
|
|
update_volume_state(plex->volno); /* update its state */
|
|
}
|
|
|
|
/* Set volume state based on its components */
|
|
void
|
|
update_volume_state(int volno)
|
|
{
|
|
struct volume *vol; /* our volume */
|
|
int plexno;
|
|
enum volumestate oldstate;
|
|
|
|
vol = &VOL[volno]; /* point to our volume */
|
|
oldstate = vol->state;
|
|
|
|
for (plexno = 0; plexno < vol->plexes; plexno++) {
|
|
struct plex *plex = &PLEX[vol->plex[plexno]]; /* point to the plex */
|
|
if (plex->state >= plex_corrupt) { /* something accessible, */
|
|
vol->state = volume_up;
|
|
break;
|
|
}
|
|
}
|
|
if (plexno == vol->plexes) /* didn't find an up plex */
|
|
vol->state = volume_down;
|
|
|
|
if (vol->state != oldstate) { /* state changed */
|
|
log(LOG_INFO, "vinum: %s is %s\n", vol->name, volume_state(vol->state));
|
|
save_config(); /* save the updated configuration */
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Called from request routines when they find
|
|
* a subdisk which is not kosher. Decide whether
|
|
* it warrants changing the state. Return
|
|
* REQUEST_DOWN if we can't use the subdisk,
|
|
* REQUEST_OK if we can.
|
|
*/
|
|
/*
|
|
* A prior version of this function checked the plex
|
|
* state as well. At the moment, consider plex states
|
|
* information for the user only. We'll ignore them
|
|
* and use the subdisk state only. The last version of
|
|
* this file with the old logic was 2.7. XXX
|
|
*/
|
|
enum requeststatus
|
|
checksdstate(struct sd *sd, struct request *rq, daddr_t diskaddr, daddr_t diskend)
|
|
{
|
|
struct plex *plex = &PLEX[sd->plexno];
|
|
int writeop = (rq->bp->b_iocmd == BIO_WRITE); /* note if we're writing */
|
|
|
|
switch (sd->state) {
|
|
/* We shouldn't get called if the subdisk is up */
|
|
case sd_up:
|
|
return REQUEST_OK;
|
|
|
|
case sd_reviving:
|
|
/*
|
|
* Access to a reviving subdisk depends on the
|
|
* organization of the plex:
|
|
*
|
|
* - If it's concatenated, access the subdisk
|
|
* up to its current revive point. If we
|
|
* want to write to the subdisk overlapping
|
|
* the current revive block, set the
|
|
* conflict flag in the request, asking the
|
|
* caller to put the request on the wait
|
|
* list, which will be attended to by
|
|
* revive_block when it's done.
|
|
* - if it's striped, we can't do it (we could
|
|
* do some hairy calculations, but it's
|
|
* unlikely to work).
|
|
* - if it's RAID-4 or RAID-5, we can do it as
|
|
* long as only one subdisk is down
|
|
*/
|
|
if (plex->state == plex_striped) /* plex is striped, */
|
|
return REQUEST_DOWN;
|
|
|
|
else if (isparity(plex)) { /* RAID-4 or RAID-5 plex */
|
|
if (plex->sddowncount > 1) /* with more than one sd down, */
|
|
return REQUEST_DOWN;
|
|
else
|
|
/*
|
|
* XXX We shouldn't do this if we can find a
|
|
* better way. Check the other plexes
|
|
* first, and return a DOWN if another
|
|
* plex will do it better
|
|
*/
|
|
return REQUEST_OK; /* OK, we'll find a way */
|
|
}
|
|
if (diskaddr > (sd->revived
|
|
+ sd->plexoffset
|
|
+ (sd->revive_blocksize >> DEV_BSHIFT))) /* we're beyond the end */
|
|
return REQUEST_DOWN;
|
|
else if (diskend > (sd->revived + sd->plexoffset)) { /* we finish beyond the end */
|
|
if (writeop) {
|
|
rq->flags |= XFR_REVIVECONFLICT; /* note a potential conflict */
|
|
rq->sdno = sd->sdno; /* and which sd last caused it */
|
|
} else
|
|
return REQUEST_DOWN;
|
|
}
|
|
return REQUEST_OK;
|
|
|
|
case sd_reborn:
|
|
if (writeop)
|
|
return REQUEST_OK; /* always write to a reborn disk */
|
|
else /* don't allow a read */
|
|
/*
|
|
* Handle the mapping. We don't want to reject
|
|
* a read request to a reborn subdisk if that's
|
|
* all we have. XXX
|
|
*/
|
|
return REQUEST_DOWN;
|
|
|
|
case sd_down:
|
|
if (writeop) /* writing to a consistent down disk */
|
|
set_sd_state(sd->sdno, sd_obsolete, setstate_force); /* it's not consistent now */
|
|
return REQUEST_DOWN;
|
|
|
|
case sd_crashed:
|
|
if (writeop) /* writing to a consistent down disk */
|
|
set_sd_state(sd->sdno, sd_stale, setstate_force); /* it's not consistent now */
|
|
return REQUEST_DOWN;
|
|
|
|
default:
|
|
return REQUEST_DOWN;
|
|
}
|
|
}
|
|
|
|
/* return a state map for the subdisks of a plex */
|
|
enum sdstates
|
|
sdstatemap(struct plex *plex)
|
|
{
|
|
int sdno;
|
|
enum sdstates statemap = 0; /* note the states we find */
|
|
|
|
plex->sddowncount = 0; /* no subdisks down yet */
|
|
for (sdno = 0; sdno < plex->subdisks; sdno++) {
|
|
struct sd *sd = &SD[plex->sdnos[sdno]]; /* point to the subdisk */
|
|
|
|
switch (sd->state) {
|
|
case sd_empty:
|
|
statemap |= sd_emptystate;
|
|
(plex->sddowncount)++; /* another unusable subdisk */
|
|
break;
|
|
|
|
case sd_init:
|
|
statemap |= sd_initstate;
|
|
(plex->sddowncount)++; /* another unusable subdisk */
|
|
break;
|
|
|
|
case sd_down:
|
|
statemap |= sd_downstate;
|
|
(plex->sddowncount)++; /* another unusable subdisk */
|
|
break;
|
|
|
|
case sd_crashed:
|
|
statemap |= sd_crashedstate;
|
|
(plex->sddowncount)++; /* another unusable subdisk */
|
|
break;
|
|
|
|
case sd_obsolete:
|
|
statemap |= sd_obsoletestate;
|
|
(plex->sddowncount)++; /* another unusable subdisk */
|
|
break;
|
|
|
|
case sd_stale:
|
|
statemap |= sd_stalestate;
|
|
(plex->sddowncount)++; /* another unusable subdisk */
|
|
break;
|
|
|
|
case sd_reborn:
|
|
statemap |= sd_rebornstate;
|
|
break;
|
|
|
|
case sd_up:
|
|
statemap |= sd_upstate;
|
|
break;
|
|
|
|
case sd_initializing:
|
|
statemap |= sd_initstate;
|
|
(plex->sddowncount)++; /* another unusable subdisk */
|
|
break;
|
|
|
|
case sd_initialized:
|
|
statemap |= sd_initializedstate;
|
|
(plex->sddowncount)++; /* another unusable subdisk */
|
|
break;
|
|
|
|
case sd_unallocated:
|
|
case sd_uninit:
|
|
case sd_reviving:
|
|
case sd_referenced:
|
|
statemap |= sd_otherstate;
|
|
(plex->sddowncount)++; /* another unusable subdisk */
|
|
}
|
|
}
|
|
return statemap;
|
|
}
|
|
|
|
/* determine the state of the volume relative to this plex */
|
|
enum volplexstate
|
|
vpstate(struct plex *plex)
|
|
{
|
|
struct volume *vol;
|
|
enum volplexstate state = volplex_onlyusdown; /* state to return */
|
|
int plexno;
|
|
|
|
if (plex->volno < 0) { /* not associated with a volume */
|
|
if (plex->state > plex_degraded)
|
|
return volplex_onlyus; /* just us */
|
|
else
|
|
return volplex_onlyusdown; /* assume the worst */
|
|
}
|
|
vol = &VOL[plex->volno]; /* point to our volume */
|
|
for (plexno = 0; plexno < vol->plexes; plexno++) {
|
|
if (&PLEX[vol->plex[plexno]] == plex) { /* us */
|
|
if (PLEX[vol->plex[plexno]].state >= plex_degraded) /* are we up? */
|
|
state |= volplex_onlyus; /* yes */
|
|
} else {
|
|
if (PLEX[vol->plex[plexno]].state >= plex_degraded) /* not us */
|
|
state |= volplex_otherup; /* and when they were up, they were up */
|
|
else
|
|
state |= volplex_alldown; /* and when they were down, they were down */
|
|
}
|
|
}
|
|
return state; /* and when they were only halfway up */
|
|
} /* they were neither up nor down */
|
|
|
|
/* Check if all bits b are set in a */
|
|
int allset(int a, int b);
|
|
|
|
int
|
|
allset(int a, int b)
|
|
{
|
|
return (a & b) == b;
|
|
}
|
|
|
|
/* Invalidate the subdisks belonging to a plex */
|
|
void
|
|
invalidate_subdisks(struct plex *plex, enum sdstate state)
|
|
{
|
|
int sdno;
|
|
|
|
for (sdno = 0; sdno < plex->subdisks; sdno++) { /* for each subdisk */
|
|
struct sd *sd = &SD[plex->sdnos[sdno]];
|
|
|
|
switch (sd->state) {
|
|
case sd_unallocated:
|
|
case sd_uninit:
|
|
case sd_init:
|
|
case sd_initializing:
|
|
case sd_initialized:
|
|
case sd_empty:
|
|
case sd_obsolete:
|
|
case sd_stale:
|
|
case sd_crashed:
|
|
case sd_down:
|
|
case sd_referenced:
|
|
break;
|
|
|
|
case sd_reviving:
|
|
case sd_reborn:
|
|
case sd_up:
|
|
set_sd_state(plex->sdnos[sdno], state, setstate_force);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Start an object, in other words do what we can to get it up.
|
|
* This is called from vinumioctl (VINUMSTART).
|
|
* Return error indications via ioctl_reply
|
|
*/
|
|
void
|
|
start_object(struct vinum_ioctl_msg *data)
|
|
{
|
|
int status;
|
|
int objindex = data->index; /* data gets overwritten */
|
|
struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) data; /* format for returning replies */
|
|
enum setstateflags flags;
|
|
|
|
if (data->force != 0) /* are we going to use force? */
|
|
flags = setstate_force; /* yes */
|
|
else
|
|
flags = setstate_none; /* no */
|
|
|
|
switch (data->type) {
|
|
case drive_object:
|
|
status = set_drive_state(objindex, drive_up, flags);
|
|
if (DRIVE[objindex].state != drive_up) /* set status on whether we really did it */
|
|
ioctl_reply->error = EBUSY;
|
|
else
|
|
ioctl_reply->error = 0;
|
|
break;
|
|
|
|
case sd_object:
|
|
if (DRIVE[SD[objindex].driveno].state != drive_up) {
|
|
ioctl_reply->error = EIO;
|
|
strcpy(ioctl_reply->msg, "Drive is down");
|
|
return;
|
|
}
|
|
if ((SD[objindex].state == sd_reviving) /* reviving, */
|
|
||(SD[objindex].state == sd_stale)) { /* or stale, will revive */
|
|
SD[objindex].state = sd_reviving; /* make sure we're reviving */
|
|
if (data->blocksize)
|
|
SD[objindex].revive_blocksize = data->blocksize;
|
|
ioctl_reply->error = revive_block(objindex); /* revive another block */
|
|
ioctl_reply->msg[0] = '\0'; /* no comment */
|
|
return;
|
|
} else if (SD[objindex].state == sd_initializing) { /* initializing, */
|
|
if (data->blocksize)
|
|
SD[objindex].init_blocksize = data->blocksize;
|
|
ioctl_reply->error = initsd(objindex, data->verify); /* initialize another block */
|
|
ioctl_reply->msg[0] = '\0'; /* no comment */
|
|
return;
|
|
}
|
|
status = set_sd_state(objindex, sd_up, flags); /* set state */
|
|
if (status != EAGAIN) { /* not first revive or initialize, */
|
|
if (SD[objindex].state != sd_up) /* set status on whether we really did it */
|
|
ioctl_reply->error = EBUSY;
|
|
else
|
|
ioctl_reply->error = 0;
|
|
} else
|
|
ioctl_reply->error = status;
|
|
break;
|
|
|
|
case plex_object:
|
|
status = set_plex_state(objindex, plex_up, flags);
|
|
if (PLEX[objindex].state != plex_up) /* set status on whether we really did it */
|
|
ioctl_reply->error = EBUSY;
|
|
else
|
|
ioctl_reply->error = 0;
|
|
break;
|
|
|
|
case volume_object:
|
|
status = set_volume_state(objindex, volume_up, flags);
|
|
if (VOL[objindex].state != volume_up) /* set status on whether we really did it */
|
|
ioctl_reply->error = EBUSY;
|
|
else
|
|
ioctl_reply->error = 0;
|
|
break;
|
|
|
|
default:
|
|
ioctl_reply->error = EINVAL;
|
|
strcpy(ioctl_reply->msg, "Invalid object type");
|
|
return;
|
|
}
|
|
/*
|
|
* There's no point in saying anything here:
|
|
* the userland program does it better
|
|
*/
|
|
ioctl_reply->msg[0] = '\0';
|
|
}
|
|
|
|
/*
|
|
* Stop an object, in other words do what we can to get it down
|
|
* This is called from vinumioctl (VINUMSTOP).
|
|
* Return error indications via ioctl_reply.
|
|
*/
|
|
void
|
|
stop_object(struct vinum_ioctl_msg *data)
|
|
{
|
|
int status = 1;
|
|
int objindex = data->index; /* save the number from change */
|
|
struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) data; /* format for returning replies */
|
|
|
|
switch (data->type) {
|
|
case drive_object:
|
|
status = set_drive_state(objindex, drive_down, data->force);
|
|
break;
|
|
|
|
case sd_object:
|
|
status = set_sd_state(objindex, sd_down, data->force);
|
|
break;
|
|
|
|
case plex_object:
|
|
status = set_plex_state(objindex, plex_down, data->force);
|
|
break;
|
|
|
|
case volume_object:
|
|
status = set_volume_state(objindex, volume_down, data->force);
|
|
break;
|
|
|
|
default:
|
|
ioctl_reply->error = EINVAL;
|
|
strcpy(ioctl_reply->msg, "Invalid object type");
|
|
return;
|
|
}
|
|
ioctl_reply->msg[0] = '\0';
|
|
if (status == 0) /* couldn't do it */
|
|
ioctl_reply->error = EBUSY;
|
|
else
|
|
ioctl_reply->error = 0;
|
|
}
|
|
|
|
/*
|
|
* VINUM_SETSTATE ioctl: set an object state.
|
|
* msg is the message passed by the user.
|
|
*/
|
|
void
|
|
setstate(struct vinum_ioctl_msg *msg)
|
|
{
|
|
int sdno;
|
|
struct sd *sd;
|
|
struct plex *plex;
|
|
struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) msg; /* format for returning replies */
|
|
|
|
switch (msg->state) {
|
|
case object_down:
|
|
stop_object(msg);
|
|
break;
|
|
|
|
case object_initializing:
|
|
switch (msg->type) {
|
|
case sd_object:
|
|
sd = &SD[msg->index];
|
|
if ((msg->index >= vinum_conf.subdisks_allocated)
|
|
|| (sd->state <= sd_referenced)) {
|
|
sprintf(ioctl_reply->msg, "Invalid subdisk %d", msg->index);
|
|
ioctl_reply->error = EFAULT;
|
|
return;
|
|
}
|
|
set_sd_state(msg->index, sd_initializing, msg->force);
|
|
if (sd->state != sd_initializing) {
|
|
strcpy(ioctl_reply->msg, "Can't set state");
|
|
ioctl_reply->error = EBUSY;
|
|
} else
|
|
ioctl_reply->error = 0;
|
|
break;
|
|
|
|
case plex_object:
|
|
plex = &PLEX[msg->index];
|
|
if ((msg->index >= vinum_conf.plexes_allocated)
|
|
|| (plex->state <= plex_unallocated)) {
|
|
sprintf(ioctl_reply->msg, "Invalid plex %d", msg->index);
|
|
ioctl_reply->error = EFAULT;
|
|
return;
|
|
}
|
|
set_plex_state(msg->index, plex_initializing, msg->force);
|
|
if (plex->state != plex_initializing) {
|
|
strcpy(ioctl_reply->msg, "Can't set state");
|
|
ioctl_reply->error = EBUSY;
|
|
} else {
|
|
ioctl_reply->error = 0;
|
|
for (sdno = 0; sdno < plex->subdisks; sdno++) {
|
|
sd = &SD[plex->sdnos[sdno]];
|
|
set_sd_state(plex->sdnos[sdno], sd_initializing, msg->force);
|
|
if (sd->state != sd_initializing) {
|
|
strcpy(ioctl_reply->msg, "Can't set state");
|
|
ioctl_reply->error = EBUSY;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
|
|
default:
|
|
strcpy(ioctl_reply->msg, "Invalid object");
|
|
ioctl_reply->error = EINVAL;
|
|
}
|
|
break;
|
|
|
|
case object_initialized:
|
|
if (msg->type == sd_object) {
|
|
sd = &SD[msg->index];
|
|
if ((msg->index >= vinum_conf.subdisks_allocated)
|
|
|| (sd->state <= sd_referenced)) {
|
|
sprintf(ioctl_reply->msg, "Invalid subdisk %d", msg->index);
|
|
ioctl_reply->error = EFAULT;
|
|
return;
|
|
}
|
|
set_sd_state(msg->index, sd_initialized, msg->force);
|
|
if (sd->state != sd_initializing) {
|
|
strcpy(ioctl_reply->msg, "Can't set state");
|
|
ioctl_reply->error = EBUSY;
|
|
} else
|
|
ioctl_reply->error = 0;
|
|
} else {
|
|
strcpy(ioctl_reply->msg, "Invalid object");
|
|
ioctl_reply->error = EINVAL;
|
|
}
|
|
break;
|
|
|
|
case object_up:
|
|
start_object(msg);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Brute force set state function. Don't look at
|
|
* any dependencies, just do it. This is mainly
|
|
* intended for testing and recovery.
|
|
*/
|
|
void
|
|
setstate_by_force(struct vinum_ioctl_msg *msg)
|
|
{
|
|
struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) msg; /* format for returning replies */
|
|
|
|
switch (msg->type) {
|
|
case drive_object:
|
|
DRIVE[msg->index].state = msg->state;
|
|
break;
|
|
|
|
case sd_object:
|
|
SD[msg->index].state = msg->state;
|
|
break;
|
|
|
|
case plex_object:
|
|
PLEX[msg->index].state = msg->state;
|
|
break;
|
|
|
|
case volume_object:
|
|
VOL[msg->index].state = msg->state;
|
|
break;
|
|
|
|
default:
|
|
}
|
|
ioctl_reply->error = 0;
|
|
}
|
|
/* Local Variables: */
|
|
/* fill-column: 50 */
|
|
/* End: */
|