1f6c52a059
goes into initialized state, not 'up'. This makes it easier to ensure consistency in multi-plex volumes. update_plex_state: redo transitions from empty and initialized subdisks to up or reviving, depending on the number of plexes. Reported-by: Bernd Walter <ticso@cicely.de> Remy Nonnenmacher <remy@synx.com>
1042 lines
32 KiB
C
1042 lines
32 KiB
C
/*-
|
|
* Copyright (c) 1997, 1998, 1999
|
|
* Nan Yang Computer Services Limited. All rights reserved.
|
|
*
|
|
* Parts copyright (c) 1997, 1998 Cybernet Corporation, NetMAX project.
|
|
*
|
|
* Written by Greg Lehey
|
|
*
|
|
* This software is distributed under the so-called ``Berkeley
|
|
* License'':
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* 3. All advertising materials mentioning features or use of this software
|
|
* must display the following acknowledgement:
|
|
* This product includes software developed by Nan Yang Computer
|
|
* Services Limited.
|
|
* 4. Neither the name of the Company nor the names of its contributors
|
|
* may be used to endorse or promote products derived from this software
|
|
* without specific prior written permission.
|
|
*
|
|
* This software is provided ``as is'', and any express or implied
|
|
* warranties, including, but not limited to, the implied warranties of
|
|
* merchantability and fitness for a particular purpose are disclaimed.
|
|
* In no event shall the company or contributors be liable for any
|
|
* direct, indirect, incidental, special, exemplary, or consequential
|
|
* damages (including, but not limited to, procurement of substitute
|
|
* goods or services; loss of use, data, or profits; or business
|
|
* interruption) however caused and on any theory of liability, whether
|
|
* in contract, strict liability, or tort (including negligence or
|
|
* otherwise) arising in any way out of the use of this software, even if
|
|
* advised of the possibility of such damage.
|
|
*
|
|
* $Id: vinumstate.c,v 2.12 1999/07/05 01:36:48 grog Exp grog $
|
|
*/
|
|
|
|
#include <dev/vinum/vinumhdr.h>
|
|
#include <dev/vinum/request.h>
|
|
|
|
/* Update drive state */
|
|
/* Return 1 if the state changes, otherwise 0 */
|
|
int
|
|
set_drive_state(int driveno, enum drivestate newstate, enum setstateflags flags)
|
|
{
|
|
struct drive *drive = &DRIVE[driveno];
|
|
int oldstate = drive->state;
|
|
int sdno;
|
|
|
|
if (drive->state == drive_unallocated) /* no drive to do anything with, */
|
|
return 0;
|
|
|
|
if (newstate == oldstate) /* don't change it if it's not different */
|
|
return 1; /* all OK */
|
|
if ((newstate == drive_down) /* the drive's going down */
|
|
&&(!(flags & setstate_force))
|
|
&& (drive->opencount != 0)) /* we can't do it */
|
|
return 0; /* don't do it */
|
|
drive->state = newstate; /* set the state */
|
|
if (drive->label.name[0] != '\0') /* we have a name, */
|
|
log(LOG_INFO,
|
|
"vinum: drive %s is %s\n",
|
|
drive->label.name,
|
|
drive_state(drive->state));
|
|
if ((drive->state == drive_up)
|
|
&& (drive->vp == NULL)) /* should be open, but we're not */
|
|
init_drive(drive, 1); /* which changes the state again */
|
|
if (newstate < drive_up) /* drive going down, */
|
|
queue_daemon_request(daemonrq_closedrive, /* get the daemon to close it */
|
|
(union daemoninfo) drive);
|
|
if (newstate != oldstate) { /* state has changed */
|
|
for (sdno = 0; sdno < vinum_conf.subdisks_allocated; sdno++) { /* find this drive's subdisks */
|
|
if ((SD[sdno].state >= sd_referenced)
|
|
&& (SD[sdno].driveno == driveno)) /* belongs to this drive */
|
|
update_sd_state(sdno); /* update the state */
|
|
}
|
|
}
|
|
if ((flags & setstate_configuring) == 0) /* configuring? */
|
|
save_config(); /* no: save the updated configuration now */
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* Try to set the subdisk state. Return 1 if state changed to
|
|
* what we wanted, -1 if it changed to something else, and 0
|
|
* if no change.
|
|
*
|
|
* This routine is called both from the user (up, down states only)
|
|
* and internally.
|
|
*
|
|
* The setstate_force bit in the flags enables the state change even
|
|
* if it could be dangerous to data consistency. It shouldn't allow
|
|
* nonsense.
|
|
*/
|
|
int
|
|
set_sd_state(int sdno, enum sdstate newstate, enum setstateflags flags)
|
|
{
|
|
struct sd *sd = &SD[sdno];
|
|
struct plex *plex;
|
|
struct volume *vol;
|
|
int oldstate = sd->state;
|
|
int status = 1; /* status to return */
|
|
|
|
if (newstate == oldstate) /* already there, */
|
|
return 1;
|
|
else if (sd->state == sd_unallocated) /* no subdisk to do anything with, */
|
|
return 0; /* can't do it */
|
|
|
|
if (sd->driveoffset < 0) { /* not allocated space */
|
|
sd->state = sd_down;
|
|
if (newstate != sd_down) {
|
|
if (sd->plexno >= 0)
|
|
sdstatemap(&PLEX[sd->plexno]); /* count up subdisks */
|
|
return -1;
|
|
}
|
|
} else { /* space allocated */
|
|
switch (newstate) {
|
|
case sd_down: /* take it down? */
|
|
/*
|
|
* If we're attached to a plex, and we're
|
|
* not reborn, we won't go down without
|
|
* use of force.
|
|
*/
|
|
if ((!flags & setstate_force)
|
|
&& (sd->plexno >= 0)
|
|
&& (sd->state != sd_reborn))
|
|
return 0; /* don't do it */
|
|
break;
|
|
|
|
case sd_initialized:
|
|
if ((sd->state == sd_initializing) /* we were initializing */
|
|
||(flags & setstate_force)) /* or we forced it */
|
|
break;
|
|
return 0; /* can't do it otherwise */
|
|
|
|
case sd_up:
|
|
if (DRIVE[sd->driveno].state != drive_up) /* can't bring the sd up if the drive isn't, */
|
|
return 0; /* not even by force */
|
|
if (flags & setstate_force) /* forcing it, */
|
|
break; /* just do it, and damn the consequences */
|
|
switch (sd->state) {
|
|
case sd_crashed:
|
|
case sd_reborn:
|
|
case sd_down: /* been down, no data lost */
|
|
/*
|
|
* If we're associated with a plex, and
|
|
* the plex isn't up, or we're the only
|
|
* subdisk in the plex, we can do it.
|
|
*/
|
|
if ((sd->plexno >= 0)
|
|
&& (((PLEX[sd->plexno].state < plex_firstup)
|
|
|| (PLEX[sd->plexno].subdisks > 1))))
|
|
break; /* do it */
|
|
if (oldstate != sd_reborn) {
|
|
sd->state = sd_reborn; /* here it is again */
|
|
log(LOG_INFO,
|
|
"vinum: subdisk %s is %s, not %s\n",
|
|
sd->name,
|
|
sd_state(sd->state),
|
|
sd_state(newstate));
|
|
}
|
|
status = -1;
|
|
break;
|
|
|
|
case sd_init: /* brand new */
|
|
if (flags & setstate_configuring) /* we're doing this while configuring */
|
|
break;
|
|
/* otherwise it's like being empty */
|
|
/* FALLTHROUGH */
|
|
|
|
case sd_empty:
|
|
/*
|
|
* If we're associated with a plex which is down, or which is
|
|
* the only one in the volume, and we're not a RAID-5 plex, we
|
|
* can come up without being inconsistent. Internally, we use
|
|
* the force flag to bring up a RAID-5 plex after
|
|
* initialization.
|
|
*/
|
|
if ((sd->plexno >= 0)
|
|
&& ((PLEX[sd->plexno].organization != plex_raid5)
|
|
|| (flags & setstate_force))
|
|
&& (((PLEX[sd->plexno].state < plex_firstup)
|
|
|| (PLEX[sd->plexno].subdisks > 1))))
|
|
break;
|
|
/* Otherwise it's just out of date */
|
|
/* FALLTHROUGH */
|
|
|
|
case sd_stale: /* out of date info, need reviving */
|
|
case sd_obsolete:
|
|
/*
|
|
* 1. If the subdisk is not part of a plex, bring it up, don't revive.
|
|
*
|
|
* 2. If the subdisk is part of a one-plex volume or an unattached plex,
|
|
* and it's not RAID-5, we *can't revive*. The subdisk doesn't
|
|
* change its state.
|
|
*
|
|
* 3. If the subdisk is part of a one-plex volume or an unattached plex,
|
|
* and it's RAID-5, but more than one subdisk is down, we *still
|
|
* can't revive*. The subdisk doesn't change its state.
|
|
*
|
|
* 4. If the subdisk is part of a multi-plex volume, we'll change to
|
|
* reviving and let the revive routines find out whether it will work
|
|
* or not. If they don't, the revive stops with an error message,
|
|
* but the state doesn't change (FWIW).
|
|
*/
|
|
if (sd->plexno < 0) /* no plex associated, */
|
|
break; /* bring it up */
|
|
plex = &PLEX[sd->plexno];
|
|
if (plex->volno >= 0) /* have a volume */
|
|
vol = &VOL[plex->volno];
|
|
else
|
|
vol = NULL;
|
|
/*
|
|
* We can't do it if:
|
|
*
|
|
* 1: we don't have a volume
|
|
* 2: we're the only plex in the volume
|
|
* 3: we're a RAID-5 plex, and more than one subdisk is down.
|
|
*/
|
|
if (((vol == NULL)
|
|
|| (vol->plexes == 1))
|
|
&& ((plex->organization != plex_raid5)
|
|
|| (plex->sddowncount > 1))) {
|
|
if (sd->state == sd_initializing) /* it's finished initializing */
|
|
sd->state = sd_initialized;
|
|
else
|
|
return 0; /* can't do it */
|
|
} else {
|
|
sd->state = sd_reviving; /* put in reviving state */
|
|
sd->revived = 0; /* nothing done yet */
|
|
status = EAGAIN; /* need to repeat */
|
|
}
|
|
break;
|
|
|
|
case sd_reviving:
|
|
if (flags & setstate_force) /* insist, */
|
|
break;
|
|
return EAGAIN; /* no, try again */
|
|
|
|
default: /* can't do it */
|
|
/*
|
|
* There's no way to bring subdisks up directly from
|
|
* other states. First they need to be initialized
|
|
* or revived.
|
|
*/
|
|
return 0;
|
|
}
|
|
break;
|
|
|
|
default: /* other ones, only internal with force */
|
|
if ((flags & setstate_force) == 0) /* no force? What's this? */
|
|
return 0; /* don't do it */
|
|
}
|
|
}
|
|
if (status == 1) { /* we can do it, */
|
|
sd->state = newstate;
|
|
if (flags & setstate_force)
|
|
log(LOG_INFO, "vinum: %s is %s by force\n", sd->name, sd_state(sd->state));
|
|
else
|
|
log(LOG_INFO, "vinum: %s is %s\n", sd->name, sd_state(sd->state));
|
|
} else /* we don't get here with status 0 */
|
|
log(LOG_INFO,
|
|
"vinum: %s is %s, not %s\n",
|
|
sd->name,
|
|
sd_state(sd->state),
|
|
sd_state(newstate));
|
|
if (sd->plexno >= 0) /* we belong to a plex */
|
|
update_plex_state(sd->plexno); /* update plex state */
|
|
if ((flags & setstate_configuring) == 0) /* save config now */
|
|
save_config();
|
|
return status;
|
|
}
|
|
|
|
/*
|
|
* Set the state of a plex dependent on its subdisks.
|
|
* This time round, we'll let plex state just reflect
|
|
* aggregate subdisk state, so this becomes an order of
|
|
* magnitude less complicated. In particular, ignore
|
|
* the requested state.
|
|
*/
|
|
int
|
|
set_plex_state(int plexno, enum plexstate state, enum setstateflags flags)
|
|
{
|
|
struct plex *plex; /* point to our plex */
|
|
enum plexstate oldstate;
|
|
enum volplexstate vps; /* how do we compare with the other plexes? */
|
|
|
|
plex = &PLEX[plexno]; /* point to our plex */
|
|
oldstate = plex->state;
|
|
|
|
/* If the plex isn't allocated, we can't do it. */
|
|
if (plex->state == plex_unallocated)
|
|
return 0;
|
|
|
|
/*
|
|
* If it's already in the the state we want,
|
|
* and it's not up, just return. If it's up,
|
|
* we still need to do some housekeeping.
|
|
*/
|
|
if ((state == oldstate)
|
|
&& (state != plex_up))
|
|
return 1;
|
|
vps = vpstate(plex); /* how do we compare with the other plexes? */
|
|
switch (state) {
|
|
/*
|
|
* We can't bring the plex up, even by force,
|
|
* unless it's ready. update_plex_state
|
|
* checks that
|
|
*/
|
|
case plex_up: /* bring the plex up */
|
|
update_plex_state(plex->plexno); /* it'll come up if it can */
|
|
break;
|
|
|
|
case plex_down: /* want to take it down */
|
|
/*
|
|
* If we're the only one, or the only one
|
|
* which is up, we need force to do it.
|
|
*/
|
|
if (((vps == volplex_onlyus)
|
|
|| (vps == volplex_onlyusup))
|
|
&& (!(flags & setstate_force)))
|
|
return 0; /* can't do it */
|
|
plex->state = state; /* do it */
|
|
invalidate_subdisks(plex, sd_down); /* and down all up subdisks */
|
|
break;
|
|
|
|
/*
|
|
* This is only requested internally.
|
|
* Trust ourselves
|
|
*/
|
|
case plex_faulty:
|
|
plex->state = state; /* do it */
|
|
invalidate_subdisks(plex, sd_crashed); /* and crash all up subdisks */
|
|
break;
|
|
|
|
case plex_initializing:
|
|
/* XXX consider what safeguards we need here */
|
|
if ((flags & setstate_force) == 0)
|
|
return 0;
|
|
plex->state = state; /* do it */
|
|
break;
|
|
|
|
/* What's this? */
|
|
default:
|
|
return 0;
|
|
}
|
|
if (plex->state != oldstate) /* we've changed, */
|
|
log(LOG_INFO, /* tell them about it */
|
|
"vinum: %s is %s\n",
|
|
plex->name,
|
|
plex_state(plex->state));
|
|
/*
|
|
* Now see what we have left, and whether
|
|
* we're taking the volume down
|
|
*/
|
|
if (plex->volno >= 0) /* we have a volume */
|
|
update_volume_state(plex->volno); /* update its state */
|
|
if ((flags & setstate_configuring) == 0) /* save config now */
|
|
save_config(); /* yes: save the updated configuration */
|
|
return 1;
|
|
}
|
|
|
|
/* Update the state of a plex dependent on its plexes. */
|
|
int
|
|
set_volume_state(int volno, enum volumestate state, enum setstateflags flags)
|
|
{
|
|
struct volume *vol = &VOL[volno]; /* point to our volume */
|
|
|
|
if (vol->state == volume_unallocated) /* no volume to do anything with, */
|
|
return 0;
|
|
if (vol->state == state) /* we're there already */
|
|
return 1;
|
|
|
|
if (state == volume_up) /* want to come up */
|
|
update_volume_state(volno);
|
|
else if (state == volume_down) { /* want to go down */
|
|
if (((vol->flags & VF_OPEN) == 0) /* not open */
|
|
||((flags & setstate_force) != 0)) { /* or we're forcing */
|
|
vol->state = volume_down;
|
|
log(LOG_INFO,
|
|
"vinum: volume %s is %s\n",
|
|
vol->name,
|
|
volume_state(vol->state));
|
|
if ((flags & setstate_configuring) == 0) /* save config now */
|
|
save_config(); /* yes: save the updated configuration */
|
|
return 1;
|
|
}
|
|
}
|
|
return 0; /* no change */
|
|
}
|
|
|
|
/* Set the state of a subdisk based on its environment */
|
|
void
|
|
update_sd_state(int sdno)
|
|
{
|
|
struct sd *sd;
|
|
struct drive *drive;
|
|
enum sdstate oldstate;
|
|
|
|
sd = &SD[sdno];
|
|
oldstate = sd->state;
|
|
drive = &DRIVE[sd->driveno];
|
|
|
|
if (drive->state == drive_up) {
|
|
switch (sd->state) {
|
|
case sd_down:
|
|
case sd_crashed:
|
|
sd->state = sd_reborn; /* back up again with no loss */
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
} else { /* down or worse */
|
|
switch (sd->state) {
|
|
case sd_up:
|
|
case sd_reborn:
|
|
case sd_reviving:
|
|
case sd_empty:
|
|
sd->state = sd_crashed; /* lost our drive */
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
if (sd->state != oldstate) /* state has changed, */
|
|
log(LOG_INFO, /* say so */
|
|
"vinum: %s is %s\n",
|
|
sd->name,
|
|
sd_state(sd->state));
|
|
if (sd->plexno >= 0) /* we're part of a plex, */
|
|
update_plex_state(sd->plexno); /* update its state */
|
|
}
|
|
|
|
/*
|
|
* Force a plex and all its subdisks
|
|
* into an 'up' state. This is a helper
|
|
* for update_plex_state.
|
|
*/
|
|
void
|
|
forceup(int plexno)
|
|
{
|
|
struct plex *plex;
|
|
int sdno;
|
|
|
|
plex = &PLEX[plexno]; /* point to the plex */
|
|
plex->state = plex_up; /* and bring it up */
|
|
|
|
/* change the subdisks to up state */
|
|
for (sdno = 0; sdno < plex->subdisks; sdno++) {
|
|
SD[plex->sdnos[sdno]].state = sd_up;
|
|
log(LOG_INFO, /* tell them about it */
|
|
"vinum: %s is up\n",
|
|
SD[plex->sdnos[sdno]].name);
|
|
}
|
|
}
|
|
|
|
/* Set the state of a plex based on its environment */
|
|
void
|
|
update_plex_state(int plexno)
|
|
{
|
|
struct plex *plex; /* point to our plex */
|
|
enum plexstate oldstate;
|
|
enum sdstates statemap; /* get a map of the subdisk states */
|
|
enum volplexstate vps; /* how do we compare with the other plexes? */
|
|
|
|
plex = &PLEX[plexno]; /* point to our plex */
|
|
oldstate = plex->state;
|
|
statemap = sdstatemap(plex); /* get a map of the subdisk states */
|
|
vps = vpstate(plex); /* how do we compare with the other plexes? */
|
|
|
|
if (statemap & sd_initstate) /* something initializing? */
|
|
plex->state = plex_initializing; /* yup, that makes the plex the same */
|
|
if ((statemap == sd_emptystate) /* all subdisks empty */
|
|
||(statemap == sd_initializedstate)) { /* or all initialized */
|
|
if ((vps & volplex_otherup) == 0) { /* no other plex is up */
|
|
struct volume *vol = &VOL[plex->volno]; /* possible volume to which it points */
|
|
|
|
/*
|
|
* If we're a striped or concat plex
|
|
* associated with a volume, none of whose
|
|
* plexes are up, and we're new and untested,
|
|
* and the volume has the setupstate bit set,
|
|
* we can pretend to be in a consistent state.
|
|
*
|
|
* We need to do this in one swell foop: on
|
|
* the next call we will no longer be just
|
|
* empty.
|
|
*
|
|
* This code assumes that all the other plexes
|
|
* are also capable of coming up (i.e. all the
|
|
* sds are up), but that's OK: we'll come back
|
|
* to this function for the remaining plexes
|
|
* in the volume.
|
|
*/
|
|
if ((plex->state == plex_init)
|
|
&& (plex->volno >= 0)
|
|
&& (vol->flags & VF_CONFIG_SETUPSTATE)) {
|
|
for (plexno = 0; plexno < vol->plexes; plexno++)
|
|
forceup(VOL[plex->volno].plex[plexno]);
|
|
} else if ((statemap == sd_initializedstate) /* if it's initialized (not empty) */
|
|
||(plex->organization == plex_concat) /* and we're not RAID-5 */
|
|
||(plex->organization == plex_striped))
|
|
forceup(plexno); /* we'll do it */
|
|
/*
|
|
* This leaves a case where things don't get
|
|
* done: the plex is RAID-5, and the subdisks
|
|
* are all empty. They need to be initialized
|
|
* first.
|
|
*/
|
|
} else { /* another plex is up */
|
|
int sdno;
|
|
|
|
plex->state = plex_faulty; /* and bring it up */
|
|
/* change the subdisks to up state */
|
|
for (sdno = 0; sdno < plex->subdisks; sdno++) {
|
|
SD[plex->sdnos[sdno]].state = sd_reviving;
|
|
log(LOG_INFO, /* tell them about it */
|
|
"vinum: %s is reviving\n",
|
|
SD[plex->sdnos[sdno]].name);
|
|
}
|
|
}
|
|
} else if (statemap == sd_upstate)
|
|
/*
|
|
* All the subdisks are up. This also means that
|
|
* they are consistent, so we can just bring
|
|
* the plex up
|
|
*/
|
|
plex->state = plex_up;
|
|
else if ((plex->organization == plex_raid5) /* raid 5 plex */
|
|
&&(plex->sddowncount == 1)) /* and exactly one subdisk down */
|
|
plex->state = plex_degraded; /* limping a bit */
|
|
else if ((statemap & (sd_upstate | sd_rebornstate)) == statemap) /* all up or reborn */
|
|
plex->state = plex_flaky;
|
|
else if (statemap & (sd_upstate | sd_rebornstate)) /* some up or reborn */
|
|
plex->state = plex_corrupt; /* corrupt */
|
|
else if (statemap & (sd_initstate | sd_emptystate)) /* some subdisks empty or initializing */
|
|
plex->state = plex_initializing;
|
|
else /* nothing at all up */
|
|
plex->state = plex_faulty;
|
|
|
|
if (plex->state != oldstate) /* state has changed, */
|
|
log(LOG_INFO, /* tell them about it */
|
|
"vinum: %s is %s\n",
|
|
plex->name,
|
|
plex_state(plex->state));
|
|
if (plex->volno >= 0) /* we're part of a volume, */
|
|
update_volume_state(plex->volno); /* update its state */
|
|
}
|
|
|
|
/* Set volume state based on its components */
|
|
void
|
|
update_volume_state(int volno)
|
|
{
|
|
struct volume *vol; /* our volume */
|
|
int plexno;
|
|
enum volumestate oldstate;
|
|
|
|
vol = &VOL[volno]; /* point to our volume */
|
|
oldstate = vol->state;
|
|
|
|
for (plexno = 0; plexno < vol->plexes; plexno++) {
|
|
struct plex *plex = &PLEX[vol->plex[plexno]]; /* point to the plex */
|
|
if (plex->state >= plex_corrupt) { /* something accessible, */
|
|
vol->state = volume_up;
|
|
break;
|
|
}
|
|
}
|
|
if (plexno == vol->plexes) /* didn't find an up plex */
|
|
vol->state = volume_down;
|
|
|
|
if (vol->state != oldstate) { /* state changed */
|
|
log(LOG_INFO, "vinum: %s is %s\n", vol->name, volume_state(vol->state));
|
|
save_config(); /* save the updated configuration */
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Helper for checksdstate. If this is a write
|
|
* operation, it's no necessarily the end of the
|
|
* world that we can't write: there could be
|
|
* another plex which can satisfy the operation.
|
|
* We must write everything we can, though, so we
|
|
* don't want to stop when we hit a subdisk which
|
|
* is down. Return a separate indication instead.
|
|
*/
|
|
enum requeststatus
|
|
sddownstate(struct request *rq)
|
|
{
|
|
if (rq->bp->b_flags & B_READ) /* read operation? */
|
|
return REQUEST_DOWN; /* OK, can't do it */
|
|
else
|
|
return REQUEST_DEGRADED;
|
|
}
|
|
|
|
/*
|
|
* Called from request routines when they find
|
|
* a subdisk which is not kosher. Decide whether
|
|
* it warrants changing the state. Return
|
|
* REQUEST_DOWN if we can't use the subdisk,
|
|
* REQUEST_OK if we can.
|
|
*/
|
|
/*
|
|
* A prior version of this function checked the plex
|
|
* state as well. At the moment, consider plex states
|
|
* information for the user only. We'll ignore them
|
|
* and use the subdisk state only. The last version of
|
|
* this file with the old logic was 2.7. XXX
|
|
*/
|
|
enum requeststatus
|
|
checksdstate(struct sd *sd, struct request *rq, daddr_t diskaddr, daddr_t diskend)
|
|
{
|
|
struct plex *plex = &PLEX[sd->plexno];
|
|
int writeop = (rq->bp->b_flags & B_READ) == 0; /* note if we're writing */
|
|
|
|
switch (sd->state) {
|
|
/* We shouldn't get called if the subdisk is up */
|
|
case sd_up:
|
|
return REQUEST_OK;
|
|
|
|
case sd_reviving:
|
|
/*
|
|
* Access to a reviving subdisk depends on the
|
|
* organization of the plex:
|
|
|
|
* - If it's concatenated, access the subdisk up to its current
|
|
* revive point. If we want to write to the subdisk overlapping the
|
|
* current revive block, set the conflict flag in the request, asking
|
|
* the caller to put the request on the wait list, which will be
|
|
* attended to by revive_block when it's done.
|
|
* - if it's striped, we can't do it (we could do some hairy
|
|
* calculations, but it's unlikely to work).
|
|
* - if it's RAID-5, we can do it as long as only one
|
|
* subdisk is down
|
|
*/
|
|
if (plex->state == plex_striped) /* plex is striped, */
|
|
return sddownstate(rq);
|
|
|
|
else if (plex->state == plex_raid5) { /* RAID5 plex */
|
|
if (plex->sddowncount > 1) /* with more than one sd down, */
|
|
return sddownstate(rq);
|
|
else
|
|
/*
|
|
* XXX We shouldn't do this if we can find a
|
|
* better way. Check the other plexes
|
|
* first, and return a DOWN if another
|
|
* plex will do it better
|
|
*/
|
|
return REQUEST_OK; /* OK, we'll find a way */
|
|
}
|
|
if (diskaddr > (sd->revived
|
|
+ sd->plexoffset
|
|
+ (sd->revive_blocksize >> DEV_BSHIFT))) /* we're beyond the end */
|
|
return sddownstate(rq);
|
|
else if (diskend > (sd->revived + sd->plexoffset)) { /* we finish beyond the end */
|
|
if (writeop) {
|
|
rq->flags |= XFR_REVIVECONFLICT; /* note a potential conflict */
|
|
rq->sdno = sd->sdno; /* and which sd last caused it */
|
|
} else
|
|
return sddownstate(rq);
|
|
}
|
|
return REQUEST_OK;
|
|
|
|
case sd_reborn:
|
|
if (writeop)
|
|
return REQUEST_OK; /* always write to a reborn disk */
|
|
else /* don't allow a read */
|
|
/*
|
|
* Handle the mapping. We don't want to reject
|
|
* a read request to a reborn subdisk if that's
|
|
* all we have. XXX
|
|
*/
|
|
return sddownstate(rq);
|
|
|
|
case sd_down:
|
|
if (writeop) /* writing to a consistent down disk */
|
|
set_sd_state(sd->sdno, sd_obsolete, setstate_force); /* it's not consistent now */
|
|
return sddownstate(rq);
|
|
|
|
case sd_crashed:
|
|
if (writeop) /* writing to a consistent down disk */
|
|
set_sd_state(sd->sdno, sd_stale, setstate_force); /* it's not consistent now */
|
|
return sddownstate(rq);
|
|
|
|
default:
|
|
return sddownstate(rq);
|
|
}
|
|
}
|
|
|
|
/* return a state map for the subdisks of a plex */
|
|
enum sdstates
|
|
sdstatemap(struct plex *plex)
|
|
{
|
|
int sdno;
|
|
enum sdstates statemap = 0; /* note the states we find */
|
|
|
|
plex->sddowncount = 0; /* no subdisks down yet */
|
|
for (sdno = 0; sdno < plex->subdisks; sdno++) {
|
|
struct sd *sd = &SD[plex->sdnos[sdno]]; /* point to the subdisk */
|
|
|
|
switch (sd->state) {
|
|
case sd_empty:
|
|
statemap |= sd_emptystate;
|
|
(plex->sddowncount)++; /* another unusable subdisk */
|
|
break;
|
|
|
|
case sd_init:
|
|
statemap |= sd_initstate;
|
|
(plex->sddowncount)++; /* another unusable subdisk */
|
|
break;
|
|
|
|
case sd_down:
|
|
statemap |= sd_downstate;
|
|
(plex->sddowncount)++; /* another unusable subdisk */
|
|
break;
|
|
|
|
case sd_crashed:
|
|
statemap |= sd_crashedstate;
|
|
(plex->sddowncount)++; /* another unusable subdisk */
|
|
break;
|
|
|
|
case sd_obsolete:
|
|
statemap |= sd_obsolete;
|
|
(plex->sddowncount)++; /* another unusable subdisk */
|
|
break;
|
|
|
|
case sd_stale:
|
|
statemap |= sd_stalestate;
|
|
(plex->sddowncount)++; /* another unusable subdisk */
|
|
break;
|
|
|
|
case sd_reborn:
|
|
statemap |= sd_rebornstate;
|
|
break;
|
|
|
|
case sd_up:
|
|
statemap |= sd_upstate;
|
|
break;
|
|
|
|
case sd_initializing:
|
|
statemap |= sd_initstate;
|
|
(plex->sddowncount)++; /* another unusable subdisk */
|
|
break;
|
|
|
|
case sd_initialized:
|
|
statemap |= sd_initializedstate;
|
|
(plex->sddowncount)++; /* another unusable subdisk */
|
|
break;
|
|
|
|
case sd_unallocated:
|
|
case sd_uninit:
|
|
case sd_reviving:
|
|
case sd_referenced:
|
|
statemap |= sd_otherstate;
|
|
(plex->sddowncount)++; /* another unusable subdisk */
|
|
}
|
|
}
|
|
return statemap;
|
|
}
|
|
|
|
/* determine the state of the volume relative to this plex */
|
|
enum volplexstate
|
|
vpstate(struct plex *plex)
|
|
{
|
|
struct volume *vol;
|
|
enum volplexstate state = volplex_onlyusdown; /* state to return */
|
|
int plexno;
|
|
|
|
if (plex->volno < 0) /* not associated with a volume */
|
|
return volplex_onlyusdown; /* assume the worst */
|
|
|
|
vol = &VOL[plex->volno]; /* point to our volume */
|
|
for (plexno = 0; plexno < vol->plexes; plexno++) {
|
|
if (&PLEX[vol->plex[plexno]] == plex) { /* us */
|
|
if (PLEX[vol->plex[plexno]].state >= plex_degraded) /* are we up? */
|
|
state |= volplex_onlyus; /* yes */
|
|
} else {
|
|
if (PLEX[vol->plex[plexno]].state >= plex_degraded) /* not us */
|
|
state |= volplex_otherup; /* and when they were up, they were up */
|
|
else
|
|
state |= volplex_alldown; /* and when they were down, they were down */
|
|
}
|
|
}
|
|
return state; /* and when they were only halfway up */
|
|
} /* they were neither up nor down */
|
|
|
|
/* Check if all bits b are set in a */
|
|
int allset(int a, int b);
|
|
|
|
int
|
|
allset(int a, int b)
|
|
{
|
|
return (a & b) == b;
|
|
}
|
|
|
|
/* Invalidate the subdisks belonging to a plex */
|
|
void
|
|
invalidate_subdisks(struct plex *plex, enum sdstate state)
|
|
{
|
|
int sdno;
|
|
|
|
for (sdno = 0; sdno < plex->subdisks; sdno++) { /* for each subdisk */
|
|
struct sd *sd = &SD[plex->sdnos[sdno]];
|
|
|
|
switch (sd->state) {
|
|
case sd_unallocated:
|
|
case sd_uninit:
|
|
case sd_init:
|
|
case sd_initializing:
|
|
case sd_initialized:
|
|
case sd_empty:
|
|
case sd_obsolete:
|
|
case sd_stale:
|
|
case sd_crashed:
|
|
case sd_down:
|
|
case sd_referenced:
|
|
break;
|
|
|
|
case sd_reviving:
|
|
case sd_reborn:
|
|
case sd_up:
|
|
set_sd_state(plex->sdnos[sdno], state, setstate_force);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Start an object, in other words do what we can to get it up.
|
|
* This is called from vinumioctl (VINUMSTART).
|
|
* Return error indications via ioctl_reply
|
|
*/
|
|
void
|
|
start_object(struct vinum_ioctl_msg *data)
|
|
{
|
|
int status;
|
|
int objindex = data->index; /* data gets overwritten */
|
|
struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) data; /* format for returning replies */
|
|
enum setstateflags flags;
|
|
|
|
if (data->force != 0) /* are we going to use force? */
|
|
flags = setstate_force; /* yes */
|
|
else
|
|
flags = setstate_none; /* no */
|
|
|
|
switch (data->type) {
|
|
case drive_object:
|
|
status = set_drive_state(objindex, drive_up, flags);
|
|
if (DRIVE[objindex].state != drive_up) /* set status on whether we really did it */
|
|
ioctl_reply->error = EBUSY;
|
|
else
|
|
ioctl_reply->error = 0;
|
|
break;
|
|
|
|
case sd_object:
|
|
if (SD[objindex].state == sd_reviving) { /* reviving, */
|
|
ioctl_reply->error = revive_block(objindex); /* revive another block */
|
|
ioctl_reply->msg[0] = '\0'; /* no comment */
|
|
return;
|
|
}
|
|
status = set_sd_state(objindex, sd_up, flags); /* set state */
|
|
if (status == EAGAIN) { /* first revive, */
|
|
ioctl_reply->error = revive_block(objindex); /* revive the first block */
|
|
ioctl_reply->error = EAGAIN;
|
|
} else {
|
|
if (SD[objindex].state != sd_up) /* set status on whether we really did it */
|
|
ioctl_reply->error = EBUSY;
|
|
else
|
|
ioctl_reply->error = 0;
|
|
}
|
|
break;
|
|
|
|
case plex_object:
|
|
status = set_plex_state(objindex, plex_up, flags);
|
|
if (PLEX[objindex].state != plex_up) /* set status on whether we really did it */
|
|
ioctl_reply->error = EBUSY;
|
|
else
|
|
ioctl_reply->error = 0;
|
|
break;
|
|
|
|
case volume_object:
|
|
status = set_volume_state(objindex, volume_up, flags);
|
|
if (VOL[objindex].state != volume_up) /* set status on whether we really did it */
|
|
ioctl_reply->error = EBUSY;
|
|
else
|
|
ioctl_reply->error = 0;
|
|
break;
|
|
|
|
default:
|
|
ioctl_reply->error = EINVAL;
|
|
strcpy(ioctl_reply->msg, "Invalid object type");
|
|
return;
|
|
}
|
|
/*
|
|
* There's no point in saying anything here:
|
|
* the userland program does it better
|
|
*/
|
|
ioctl_reply->msg[0] = '\0';
|
|
}
|
|
|
|
/*
|
|
* Stop an object, in other words do what we can to get it down
|
|
* This is called from vinumioctl (VINUMSTOP).
|
|
* Return error indications via ioctl_reply.
|
|
*/
|
|
void
|
|
stop_object(struct vinum_ioctl_msg *data)
|
|
{
|
|
int status = 1;
|
|
int objindex = data->index; /* save the number from change */
|
|
struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) data; /* format for returning replies */
|
|
|
|
switch (data->type) {
|
|
case drive_object:
|
|
status = set_drive_state(objindex, drive_down, data->force);
|
|
break;
|
|
|
|
case sd_object:
|
|
status = set_sd_state(objindex, sd_down, data->force);
|
|
break;
|
|
|
|
case plex_object:
|
|
status = set_plex_state(objindex, plex_down, data->force);
|
|
break;
|
|
|
|
case volume_object:
|
|
status = set_volume_state(objindex, volume_down, data->force);
|
|
break;
|
|
|
|
default:
|
|
ioctl_reply->error = EINVAL;
|
|
strcpy(ioctl_reply->msg, "Invalid object type");
|
|
return;
|
|
}
|
|
ioctl_reply->msg[0] = '\0';
|
|
if (status == 0) /* couldn't do it */
|
|
ioctl_reply->error = EBUSY;
|
|
else
|
|
ioctl_reply->error = 0;
|
|
}
|
|
|
|
/*
|
|
* VINUM_SETSTATE ioctl: set an object state
|
|
* msg is the message passed by the user
|
|
*/
|
|
void
|
|
setstate(struct vinum_ioctl_msg *msg)
|
|
{
|
|
int sdno;
|
|
struct sd *sd;
|
|
struct plex *plex;
|
|
struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) msg; /* format for returning replies */
|
|
|
|
switch (msg->state) {
|
|
case object_down:
|
|
stop_object(msg);
|
|
break;
|
|
|
|
case object_initializing:
|
|
switch (msg->type) {
|
|
case sd_object:
|
|
sd = &SD[msg->index];
|
|
if ((msg->index >= vinum_conf.subdisks_allocated)
|
|
|| (sd->state <= sd_referenced)) {
|
|
sprintf(ioctl_reply->msg, "Invalid subdisk %d", msg->index);
|
|
ioctl_reply->error = EFAULT;
|
|
return;
|
|
}
|
|
set_sd_state(msg->index, sd_initializing, msg->force);
|
|
if (sd->state != sd_initializing) {
|
|
strcpy(ioctl_reply->msg, "Can't set state");
|
|
ioctl_reply->error = EBUSY;
|
|
} else
|
|
ioctl_reply->error = 0;
|
|
break;
|
|
|
|
case plex_object:
|
|
plex = &PLEX[msg->index];
|
|
if ((msg->index >= vinum_conf.plexes_allocated)
|
|
|| (plex->state <= plex_unallocated)) {
|
|
sprintf(ioctl_reply->msg, "Invalid plex %d", msg->index);
|
|
ioctl_reply->error = EFAULT;
|
|
return;
|
|
}
|
|
set_plex_state(msg->index, plex_initializing, msg->force);
|
|
if (plex->state != plex_initializing) {
|
|
strcpy(ioctl_reply->msg, "Can't set state");
|
|
ioctl_reply->error = EBUSY;
|
|
} else {
|
|
ioctl_reply->error = 0;
|
|
for (sdno = 0; sdno < plex->subdisks; sdno++) {
|
|
sd = &SD[plex->sdnos[sdno]];
|
|
set_sd_state(plex->sdnos[sdno], sd_initializing, msg->force);
|
|
if (sd->state != sd_initializing) {
|
|
strcpy(ioctl_reply->msg, "Can't set state");
|
|
ioctl_reply->error = EBUSY;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
|
|
default:
|
|
strcpy(ioctl_reply->msg, "Invalid object");
|
|
ioctl_reply->error = EINVAL;
|
|
}
|
|
break;
|
|
|
|
case object_initialized:
|
|
if (msg->type == sd_object) {
|
|
sd = &SD[msg->index];
|
|
if ((msg->index >= vinum_conf.subdisks_allocated)
|
|
|| (sd->state <= sd_referenced)) {
|
|
sprintf(ioctl_reply->msg, "Invalid subdisk %d", msg->index);
|
|
ioctl_reply->error = EFAULT;
|
|
return;
|
|
}
|
|
set_sd_state(msg->index, sd_initialized, msg->force);
|
|
if (sd->state != sd_initializing) {
|
|
strcpy(ioctl_reply->msg, "Can't set state");
|
|
ioctl_reply->error = EBUSY;
|
|
} else
|
|
ioctl_reply->error = 0;
|
|
} else {
|
|
strcpy(ioctl_reply->msg, "Invalid object");
|
|
ioctl_reply->error = EINVAL;
|
|
}
|
|
break;
|
|
|
|
case object_up:
|
|
start_object(msg);
|
|
}
|
|
}
|
|
/* Local Variables: */
|
|
/* fill-column: 50 */
|
|
/* End: */
|