800 lines
27 KiB
C
800 lines
27 KiB
C
|
/*-
|
||
|
* Copyright (c) 1997, 1998
|
||
|
* Nan Yang Computer Services Limited. All rights reserved.
|
||
|
*
|
||
|
* This software is distributed under the so-called ``Berkeley
|
||
|
* License'':
|
||
|
*
|
||
|
* Redistribution and use in source and binary forms, with or without
|
||
|
* modification, are permitted provided that the following conditions
|
||
|
* are met:
|
||
|
* 1. Redistributions of source code must retain the above copyright
|
||
|
* notice, this list of conditions and the following disclaimer.
|
||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||
|
* notice, this list of conditions and the following disclaimer in the
|
||
|
* documentation and/or other materials provided with the distribution.
|
||
|
* 3. All advertising materials mentioning features or use of this software
|
||
|
* must display the following acknowledgement:
|
||
|
* This product includes software developed by Nan Yang Computer
|
||
|
* Services Limited.
|
||
|
* 4. Neither the name of the Company nor the names of its contributors
|
||
|
* may be used to endorse or promote products derived from this software
|
||
|
* without specific prior written permission.
|
||
|
*
|
||
|
* This software is provided ``as is'', and any express or implied
|
||
|
* warranties, including, but not limited to, the implied warranties of
|
||
|
* merchantability and fitness for a particular purpose are disclaimed.
|
||
|
* In no event shall the company or contributors be liable for any
|
||
|
* direct, indirect, incidental, special, exemplary, or consequential
|
||
|
* damages (including, but not limited to, procurement of substitute
|
||
|
* goods or services; loss of use, data, or profits; or business
|
||
|
* interruption) however caused and on any theory of liability, whether
|
||
|
* in contract, strict liability, or tort (including negligence or
|
||
|
* otherwise) arising in any way out of the use of this software, even if
|
||
|
* advised of the possibility of such damage.
|
||
|
*
|
||
|
* $Id: state.c,v 1.3 1998/11/02 04:10:45 grog Exp $
|
||
|
*/
|
||
|
|
||
|
#define REALLYKERNEL
|
||
|
#include "vinumhdr.h"
|
||
|
#include "request.h"
|
||
|
|
||
|
/* Update drive state */
|
||
|
/* Return 1 if the state changes, otherwise 0 */
|
||
|
int
|
||
|
set_drive_state(int driveno, enum drivestate state, int flags)
|
||
|
{
|
||
|
struct drive *drive = &DRIVE[driveno];
|
||
|
int oldstate = drive->state;
|
||
|
int sdno;
|
||
|
|
||
|
if (drive->state == drive_unallocated) /* no drive to do anything with, */
|
||
|
return 0;
|
||
|
|
||
|
if (state != oldstate) { /* don't change it if it's not different */
|
||
|
if (state == drive_down) { /* the drive's going down */
|
||
|
if ((flags & setstate_force) || (drive->opencount == 0)) { /* we can do it */
|
||
|
/* We can't call close() from an interrupt
|
||
|
* context. Instead, we do it when we
|
||
|
* next call strategy(). This will change
|
||
|
* when the vinum daemon comes onto the scene */
|
||
|
if (!(flags & setstate_noupdate)) /* we can close it */
|
||
|
close_drive(drive);
|
||
|
} else
|
||
|
return 0; /* don't do it */
|
||
|
}
|
||
|
drive->state = state; /* set the state */
|
||
|
printf("vinum: drive %s is %s\n", drive->label.name, drive_state(drive->state));
|
||
|
if (((drive->state == drive_up)
|
||
|
|| ((drive->state == drive_coming_up)))
|
||
|
&& (drive->vp == NULL)) /* should be open, but we're not */
|
||
|
init_drive(drive); /* which changes the state again */
|
||
|
if ((state != oldstate) /* state has changed */
|
||
|
&&((flags & setstate_norecurse) == 0)) { /* and we want to recurse, */
|
||
|
for (sdno = 0; sdno < vinum_conf.subdisks_used; sdno++) { /* find this drive's subdisks */
|
||
|
if (SD[sdno].driveno == driveno) /* belongs to this drive */
|
||
|
set_sd_state(sdno, sd_down, setstate_force | setstate_recursing); /* take it down */
|
||
|
}
|
||
|
}
|
||
|
if (flags & setstate_noupdate) /* don't update now, */
|
||
|
vinum_conf.flags |= VF_DIRTYCONFIG; /* wait until later */
|
||
|
else
|
||
|
save_config(); /* yes: save the updated configuration */
|
||
|
return 1;
|
||
|
}
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
/* Try to set the subdisk state. Return 1 if state changed to
|
||
|
* what we wanted, -1 if it changed to something else, and 0
|
||
|
* if no change.
|
||
|
*
|
||
|
* This routine is called both from the user (up, down states
|
||
|
* only) and internally.
|
||
|
*/
|
||
|
int
|
||
|
set_sd_state(int sdno, enum sdstate state, enum setstateflags flags)
|
||
|
{
|
||
|
struct sd *sd = &SD[sdno];
|
||
|
int oldstate = sd->state;
|
||
|
int status = 1; /* status to return */
|
||
|
|
||
|
if (state == oldstate)
|
||
|
return 0; /* no change */
|
||
|
|
||
|
if (sd->state == sd_unallocated) /* no subdisk to do anything with, */
|
||
|
return 0;
|
||
|
|
||
|
if (sd->driveoffset < 0) { /* not allocated space */
|
||
|
sd->state = sd_down;
|
||
|
if (state != sd_down)
|
||
|
return -1;
|
||
|
} else { /* space allocated */
|
||
|
switch (state) {
|
||
|
case sd_down:
|
||
|
if ((!flags & setstate_force) /* but gently */
|
||
|
&&(sd->plexno >= 0)) /* and we're attached to a plex, */
|
||
|
return 0; /* don't do it */
|
||
|
break;
|
||
|
|
||
|
case sd_up:
|
||
|
if (DRIVE[sd->driveno].state != drive_up) /* can't bring the sd up if the drive isn't, */
|
||
|
return 0; /* not even by force */
|
||
|
switch (sd->state) {
|
||
|
case sd_crashed:
|
||
|
case sd_down: /* been down, no data lost */
|
||
|
if ((sd->plexno) /* we're associated with a plex */
|
||
|
&&(((PLEX[sd->plexno].state < plex_firstup) /* and it's not up */
|
||
|
||(PLEX[sd->plexno].subdisks > 1)))) /* or it's the only one */
|
||
|
break;
|
||
|
/* XXX Get this right: make sure that other plexes in
|
||
|
* the volume cover this address space, otherwise
|
||
|
* we make this one sd_up */
|
||
|
sd->state = sd_reborn; /* here it is again */
|
||
|
printf("vinum: subdisk %s is %s, not %s\n", sd->name, sd_state(sd->state), sd_state(state));
|
||
|
status = -1;
|
||
|
break;
|
||
|
|
||
|
case sd_init: /* brand new */
|
||
|
if (flags & setstate_configuring) /* we're doing this while configuring */
|
||
|
break;
|
||
|
sd->state = sd_empty; /* nothing in it */
|
||
|
printf("vinum: subdisk %s is %s, not %s\n", sd->name, sd_state(sd->state), sd_state(state));
|
||
|
status = -1;
|
||
|
break;
|
||
|
|
||
|
case sd_initializing:
|
||
|
break; /* go on and do it */
|
||
|
|
||
|
case sd_empty:
|
||
|
if ((sd->plexno) /* we're associated with a plex */
|
||
|
&&(((PLEX[sd->plexno].state < plex_firstup) /* and it's not up */
|
||
|
||(PLEX[sd->plexno].subdisks > 1)))) /* or it's the only one */
|
||
|
break;
|
||
|
return 0; /* can't do it */
|
||
|
|
||
|
default: /* can't do it */
|
||
|
/* There's no way to bring subdisks up directly from
|
||
|
* other states. First they need to be initialized
|
||
|
* or revived */
|
||
|
return 0;
|
||
|
}
|
||
|
break;
|
||
|
|
||
|
default: /* other ones, only internal with force */
|
||
|
if (flags & setstate_force == 0) /* no force? What's this? */
|
||
|
return 0; /* don't do it */
|
||
|
}
|
||
|
}
|
||
|
sd->state = state;
|
||
|
printf("vinum: subdisk %s is %s\n", sd->name, sd_state(sd->state));
|
||
|
if ((flags & setstate_norecurse) == 0)
|
||
|
set_plex_state(sd->plexno, plex_up, setstate_recursing); /* update plex state */
|
||
|
if ((flags & (setstate_configuring | setstate_recursing)) == 0) { /* save config now */
|
||
|
if (setstate_noupdate) /* we can't update now, */
|
||
|
vinum_conf.flags |= VF_DIRTYCONFIG; /* wait until later */
|
||
|
else
|
||
|
save_config();
|
||
|
}
|
||
|
return status;
|
||
|
}
|
||
|
|
||
|
/* Called from request routines when they find
|
||
|
* a subdisk which is not kosher. Decide whether
|
||
|
* it warrants changing the state. Return
|
||
|
* REQUEST_DOWN if we can't use the subdisk,
|
||
|
* REQUEST_OK if we can. */
|
||
|
enum requeststatus
|
||
|
checksdstate(struct sd *sd, struct request *rq, daddr_t diskaddr, daddr_t diskend)
|
||
|
{
|
||
|
struct plex *plex = &PLEX[sd->plexno];
|
||
|
int writeop = (rq->bp->b_flags & B_READ) == 0; /* note if we're writing */
|
||
|
|
||
|
/* first, see if the plex wants to be accessed */
|
||
|
switch (plex->state) {
|
||
|
case plex_reviving:
|
||
|
/* When writing, we'll write anything that starts
|
||
|
* up to the current revive pointer, but we'll
|
||
|
* only accept a read which finishes before the
|
||
|
* current revive pointer.
|
||
|
*/
|
||
|
if ((writeop && (diskaddr > plex->revived)) /* write starts after current revive pointer */
|
||
|
||((!writeop) && (diskend >= plex->revived))) { /* or read ends after current revive pointer */
|
||
|
if (writeop) { /* writing to a consistent down disk */
|
||
|
if (DRIVE[sd->driveno].state == drive_up)
|
||
|
set_sd_state(sd->sdno, sd_stale, setstate_force); /* it's not consistent now */
|
||
|
else
|
||
|
set_sd_state(sd->sdno, sd_obsolete, setstate_force); /* it's not consistent now */
|
||
|
}
|
||
|
return REQUEST_DOWN; /* that part of the plex is still down */
|
||
|
} else if (diskend >= plex->revived) /* write finishes beyond revive pointer */
|
||
|
rq->flags |= XFR_REVIVECONFLICT; /* note a potential conflict */
|
||
|
/* FALLTHROUGH */
|
||
|
|
||
|
case plex_up:
|
||
|
case plex_degraded:
|
||
|
case plex_flaky:
|
||
|
/* We can access the plex: let's see
|
||
|
* how the subdisk feels */
|
||
|
switch (sd->state) {
|
||
|
case sd_up:
|
||
|
return REQUEST_OK;
|
||
|
|
||
|
case sd_reborn:
|
||
|
if (writeop)
|
||
|
return REQUEST_OK; /* always write to a reborn disk */
|
||
|
/* Handle the mapping. We don't want to reject
|
||
|
* a read request to a reborn subdisk if that's
|
||
|
* all we have. XXX */
|
||
|
return REQUEST_DOWN;
|
||
|
|
||
|
case sd_down:
|
||
|
case sd_crashed:
|
||
|
if (writeop) { /* writing to a consistent down disk */
|
||
|
if (DRIVE[sd->driveno].state == drive_up)
|
||
|
set_sd_state(sd->sdno, sd_stale, setstate_force); /* it's not consistent now */
|
||
|
else
|
||
|
set_sd_state(sd->sdno, sd_obsolete, setstate_force); /* it's not consistent now */
|
||
|
}
|
||
|
return REQUEST_DOWN; /* and it's down one way or another */
|
||
|
|
||
|
default:
|
||
|
return REQUEST_DOWN;
|
||
|
}
|
||
|
|
||
|
default:
|
||
|
return REQUEST_DOWN;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void
|
||
|
add_defective_region(struct plex *plex, off_t offset, size_t length)
|
||
|
{
|
||
|
/* XXX get this ordered, and coalesce regions if necessary */
|
||
|
if (++plex->defective_regions > plex->defective_region_count)
|
||
|
EXPAND(plex->defective_region,
|
||
|
struct plexregion,
|
||
|
plex->defective_region_count,
|
||
|
PLEX_REGION_TABLE_SIZE);
|
||
|
plex->defective_region[plex->defective_regions - 1].offset = offset;
|
||
|
plex->defective_region[plex->defective_regions - 1].length = length;
|
||
|
}
|
||
|
|
||
|
void
|
||
|
add_unmapped_region(struct plex *plex, off_t offset, size_t length)
|
||
|
{
|
||
|
if (++plex->unmapped_regions > plex->unmapped_region_count)
|
||
|
EXPAND(plex->unmapped_region,
|
||
|
struct plexregion,
|
||
|
plex->unmapped_region_count,
|
||
|
PLEX_REGION_TABLE_SIZE);
|
||
|
plex->unmapped_region[plex->unmapped_regions - 1].offset = offset;
|
||
|
plex->unmapped_region[plex->unmapped_regions - 1].length = length;
|
||
|
}
|
||
|
|
||
|
/* Rebuild a plex free list and set state if
|
||
|
* we have a configuration error */
|
||
|
void
|
||
|
rebuild_plex_unmappedlist(struct plex *plex)
|
||
|
{
|
||
|
int sdno;
|
||
|
struct sd *sd;
|
||
|
int lastsdend = 0; /* end offset of last subdisk */
|
||
|
|
||
|
if (plex->unmapped_region != NULL) { /* we're going to rebuild it */
|
||
|
Free(plex->unmapped_region);
|
||
|
plex->unmapped_region = NULL;
|
||
|
plex->unmapped_regions = 0;
|
||
|
plex->unmapped_region_count = 0;
|
||
|
}
|
||
|
if (plex->defective_region != NULL) {
|
||
|
Free(plex->defective_region);
|
||
|
plex->defective_region = NULL;
|
||
|
plex->defective_regions = 0;
|
||
|
plex->defective_region_count = 0;
|
||
|
}
|
||
|
for (sdno = 0; sdno < plex->subdisks; sdno++) {
|
||
|
sd = &SD[plex->sdnos[sdno]];
|
||
|
if (sd->plexoffset < lastsdend) { /* overlap */
|
||
|
printf("vinum: Plex %s, subdisk %s overlaps previous\n", plex->name, sd->name);
|
||
|
set_plex_state(plex->plexno, plex_down, setstate_force); /* don't allow that */
|
||
|
} else if (sd->plexoffset > lastsdend) /* gap */
|
||
|
add_unmapped_region(plex, lastsdend, sd->plexoffset - lastsdend);
|
||
|
else if (sd->state < sd_reborn) /* this part defective */
|
||
|
add_defective_region(plex, sd->plexoffset, sd->sectors);
|
||
|
lastsdend = sd->plexoffset + sd->sectors;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* return a state map for the subdisks of a plex */
|
||
|
enum sdstates
|
||
|
sdstatemap(struct plex *plex, int *sddowncount)
|
||
|
{
|
||
|
int sdno;
|
||
|
enum sdstates statemap = 0; /* note the states we find */
|
||
|
|
||
|
*sddowncount = 0; /* no subdisks down yet */
|
||
|
for (sdno = 0; sdno < plex->subdisks; sdno++) {
|
||
|
struct sd *sd = &SD[plex->sdnos[sdno]]; /* point to the subdisk */
|
||
|
|
||
|
switch (sd->state) {
|
||
|
case sd_empty:
|
||
|
statemap |= sd_emptystate;
|
||
|
(*sddowncount)++; /* another unusable subdisk */
|
||
|
break;
|
||
|
|
||
|
case sd_init:
|
||
|
statemap |= sd_initstate;
|
||
|
(*sddowncount)++; /* another unusable subdisk */
|
||
|
break;
|
||
|
|
||
|
case sd_down:
|
||
|
statemap |= sd_downstate;
|
||
|
(*sddowncount)++; /* another unusable subdisk */
|
||
|
break;
|
||
|
|
||
|
case sd_crashed:
|
||
|
statemap |= sd_crashedstate;
|
||
|
(*sddowncount)++; /* another unusable subdisk */
|
||
|
break;
|
||
|
|
||
|
case sd_obsolete:
|
||
|
statemap |= sd_obsolete;
|
||
|
(*sddowncount)++; /* another unusable subdisk */
|
||
|
break;
|
||
|
|
||
|
case sd_stale:
|
||
|
statemap |= sd_stalestate;
|
||
|
(*sddowncount)++; /* another unusable subdisk */
|
||
|
break;
|
||
|
|
||
|
case sd_reborn:
|
||
|
statemap |= sd_rebornstate;
|
||
|
break;
|
||
|
|
||
|
case sd_up:
|
||
|
statemap |= sd_upstate;
|
||
|
break;
|
||
|
|
||
|
default:
|
||
|
statemap |= sd_otherstate;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
return statemap;
|
||
|
}
|
||
|
|
||
|
/* determine the state of the volume relative to this plex */
|
||
|
enum volplexstate
|
||
|
vpstate(struct plex *plex)
|
||
|
{
|
||
|
struct volume *vol;
|
||
|
enum volplexstate state = volplex_onlyusdown; /* state to return */
|
||
|
int plexno;
|
||
|
|
||
|
if (plex->volno < 0) /* not associated with a volume */
|
||
|
return volplex_onlyusdown; /* assume the worst */
|
||
|
|
||
|
vol = &VOL[plex->volno]; /* point to our volume */
|
||
|
for (plexno = 0; plexno < vol->plexes; plexno++) {
|
||
|
if (&PLEX[vol->plex[plexno]] == plex) { /* us */
|
||
|
if (PLEX[vol->plex[plexno]].state == plex_up) /* are we up? */
|
||
|
state |= volplex_onlyus; /* yes */
|
||
|
} else {
|
||
|
if (PLEX[vol->plex[plexno]].state == plex_up) /* not us */
|
||
|
state |= volplex_otherup; /* and when they were up, they were up */
|
||
|
else
|
||
|
state |= volplex_alldown; /* and when they were down, they were down */
|
||
|
}
|
||
|
}
|
||
|
return state; /* and when they were only halfway up */
|
||
|
} /* they were neither up nor down */
|
||
|
|
||
|
/* Check if all bits b are set in a */
|
||
|
int allset(int a, int b);
|
||
|
|
||
|
int
|
||
|
allset(int a, int b)
|
||
|
{
|
||
|
return (a & b) == b;
|
||
|
}
|
||
|
|
||
|
/* Update the state of a plex dependent on its subdisks.
|
||
|
* Also rebuild the unmapped_region and defective_region table */
|
||
|
int
|
||
|
set_plex_state(int plexno, enum plexstate state, enum setstateflags flags)
|
||
|
{
|
||
|
int sddowncount = 0; /* number of down subdisks */
|
||
|
struct plex *plex = &PLEX[plexno]; /* point to our plex */
|
||
|
enum plexstate oldstate = plex->state;
|
||
|
enum volplexstate vps = vpstate(plex); /* how do we compare with the other plexes? */
|
||
|
enum sdstates statemap = sdstatemap(plex, &sddowncount); /* get a map of the subdisk states */
|
||
|
|
||
|
if ((flags & setstate_force) && (oldstate == state)) /* we're there already, */
|
||
|
return 0; /* no change */
|
||
|
|
||
|
if (plex->state == plex_unallocated) /* no plex to do anything with, */
|
||
|
return 0;
|
||
|
|
||
|
switch (state) {
|
||
|
case plex_up:
|
||
|
if ((plex->state == plex_initializing) /* we're initializing */
|
||
|
&&(statemap != sd_upstate)) /* but SDs aren't up yet */
|
||
|
return 0; /* do nothing */
|
||
|
|
||
|
/* We don't really care what our state was before
|
||
|
* if we want to come up. We rely entirely on the
|
||
|
* state of our subdisks and our volume */
|
||
|
switch (vps) {
|
||
|
case volplex_onlyusdown:
|
||
|
case volplex_alldown: /* another plex is down, and so are we */
|
||
|
if (statemap == sd_upstate) { /* all subdisks ready for action */
|
||
|
if ((plex->state == plex_init) /* we're brand spanking new */
|
||
|
&&(VOL[plex->volno].flags & VF_CONFIG_SETUPSTATE)) { /* and we consider that up */
|
||
|
/* Conceptually, an empty plex does not contain valid data,
|
||
|
* but normally we'll see this state when we have just
|
||
|
* created a plex, and it's either consistent from earlier,
|
||
|
* or we don't care about the previous contents (we're going
|
||
|
* to create a file system or use it for swap).
|
||
|
*
|
||
|
* We need to do this in one swell foop: on the next call
|
||
|
* we will no longer be just empty.
|
||
|
*
|
||
|
* We'll still come back to this function for the remaining
|
||
|
* plexes in the volume. They'll be up already, so that
|
||
|
* doesn't change anything, but it's not worth the additional
|
||
|
* code to stop doing it. */
|
||
|
struct volume *vol = &VOL[plex->volno];
|
||
|
int plexno;
|
||
|
|
||
|
for (plexno = 0; plexno < vol->plexes; plexno++)
|
||
|
PLEX[vol->plex[plexno]].state = plex_up;
|
||
|
}
|
||
|
plex->state = plex_up; /* bring up up, anyway */
|
||
|
} else
|
||
|
plex->state = plex_down;
|
||
|
break;
|
||
|
|
||
|
case volplex_onlyusup: /* only we are up: others are down */
|
||
|
case volplex_onlyus: /* we're up and alone */
|
||
|
if ((statemap == sd_upstate) /* subdisks all up */
|
||
|
||(statemap == sd_emptystate)) /* or all empty */
|
||
|
plex->state = plex_up; /* go for it */
|
||
|
else if ((statemap & (sd_upstate | sd_reborn)) == statemap) /* all up or reborn, */
|
||
|
plex->state = plex_flaky;
|
||
|
else if (statemap & (sd_upstate | sd_reborn)) /* some up or reborn, */
|
||
|
plex->state = plex_degraded; /* so far no corruption */
|
||
|
else
|
||
|
plex->state = plex_faulty;
|
||
|
break;
|
||
|
|
||
|
case volplex_otherup: /* another plex is up */
|
||
|
case volplex_otherupdown: /* other plexes are up and down */
|
||
|
{
|
||
|
int sdno;
|
||
|
struct sd *sd;
|
||
|
|
||
|
/* Is the data in all subdisks valid? */
|
||
|
/* XXX At the moment, subdisks make false
|
||
|
* claims about their validity. Replace this
|
||
|
* when they tell the truth */
|
||
|
/* No: we have invalid or down subdisks */
|
||
|
for (sdno = 0; sdno < plex->subdisks; sdno++) { /* look at these subdisks more carefully */
|
||
|
set_sd_state(plex->sdnos[sdno], /* try to get it up */
|
||
|
sd_up,
|
||
|
setstate_norecurse | setstate_noupdate);
|
||
|
sd = &SD[plex->sdnos[sdno]]; /* point to subdisk */
|
||
|
/* we can make a stale subdisk up here, because
|
||
|
* we're in the process of bringing it up.
|
||
|
* This wouldn't work in set_sd_state, because
|
||
|
* it would allow bypassing the revive */
|
||
|
if (((sd->state == sd_stale)
|
||
|
|| (sd->state == sd_obsolete))
|
||
|
&& (DRIVE[sd->driveno].state == drive_up))
|
||
|
sd->state = sd_up;
|
||
|
}
|
||
|
statemap = sdstatemap(plex, &sddowncount); /* get the new state map */
|
||
|
/* Do we need reborn? They should now all be up */
|
||
|
if (statemap == (statemap & (sd_upstate | sd_rebornstate))) { /* got something we can use */
|
||
|
plex->state = plex_reviving; /* we need reviving */
|
||
|
return EAGAIN;
|
||
|
} else
|
||
|
plex->state = plex_down; /* still in error */
|
||
|
}
|
||
|
break;
|
||
|
|
||
|
case volplex_allup: /* all plexes are up */
|
||
|
case volplex_someup:
|
||
|
if ((statemap & (sd_upstate | sd_reborn)) == statemap) /* all up or reborn, */
|
||
|
break; /* no change */
|
||
|
else
|
||
|
plex->state = plex_degraded; /* we're not all there */
|
||
|
}
|
||
|
|
||
|
if (plex->state != oldstate)
|
||
|
break;
|
||
|
return 0; /* no change */
|
||
|
|
||
|
case plex_down: /* want to take it down */
|
||
|
if (((vps == volplex_onlyus) /* we're the only one up */
|
||
|
||(vps == volplex_onlyusup)) /* we're the only one up */
|
||
|
&&(!(flags & setstate_force))) /* and we don't want to use force */
|
||
|
return 0; /* can't do it */
|
||
|
plex->state = state; /* do it */
|
||
|
break;
|
||
|
|
||
|
/* This is only requested by the driver.
|
||
|
* Trust ourselves */
|
||
|
case plex_faulty:
|
||
|
plex->state = state; /* do it */
|
||
|
break;
|
||
|
|
||
|
case plex_initializing:
|
||
|
/* XXX consider what safeguards we need here */
|
||
|
if ((flags & setstate_force) == 0)
|
||
|
return 0;
|
||
|
plex->state = state; /* do it */
|
||
|
break;
|
||
|
|
||
|
/* What's this? */
|
||
|
default:
|
||
|
return 0;
|
||
|
}
|
||
|
printf("vinum: plex %s is %s\n", plex->name, plex_state(plex->state));
|
||
|
/* Now see what we have left, and whether
|
||
|
* we're taking the volume down */
|
||
|
if (plex->volno >= 0) { /* we have a volume */
|
||
|
struct volume *vol = &VOL[plex->volno];
|
||
|
|
||
|
vps = vpstate(plex); /* get our combined state again */
|
||
|
if ((flags & setstate_norecurse) == 0) { /* we can recurse */
|
||
|
if ((vol->state == volume_up)
|
||
|
&& (vps == volplex_alldown)) /* and we're all down */
|
||
|
set_volume_state(plex->volno, volume_down, setstate_recursing); /* take our volume down */
|
||
|
else if ((vol->state == volume_down)
|
||
|
&& (vps & (volplex_otherup | volplex_onlyusup))) /* and at least one is up */
|
||
|
set_volume_state(plex->volno, volume_up, setstate_recursing); /* bring our volume up */
|
||
|
}
|
||
|
}
|
||
|
if ((flags & (setstate_configuring | setstate_recursing)) == 0) { /* save config now */
|
||
|
if (flags & setstate_noupdate) /* don't update now, */
|
||
|
vinum_conf.flags |= VF_DIRTYCONFIG; /* wait until later */
|
||
|
else
|
||
|
save_config(); /* yes: save the updated configuration */
|
||
|
}
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
/* Update the state of a plex dependent on its plexes.
|
||
|
* Also rebuild the unmapped_region and defective_region table */
|
||
|
int
|
||
|
set_volume_state(int volno, enum volumestate state, enum setstateflags flags)
|
||
|
{
|
||
|
int plexno;
|
||
|
enum plexstates {
|
||
|
plex_downstate = 1, /* found a plex which is down */
|
||
|
plex_degradedstate = 2, /* found a plex which is halfway up */
|
||
|
plex_upstate = 4 /* found a plex which is completely up */
|
||
|
};
|
||
|
|
||
|
int plexstatemap = 0; /* note the states we find */
|
||
|
struct volume *vol = &VOL[volno]; /* point to our volume */
|
||
|
|
||
|
if (vol->state == state) /* we're there already */
|
||
|
return 0; /* no change */
|
||
|
if (vol->state == volume_unallocated) /* no volume to do anything with, */
|
||
|
return 0;
|
||
|
|
||
|
for (plexno = 0; plexno < vol->plexes; plexno++) {
|
||
|
struct plex *plex = &PLEX[vol->plex[plexno]]; /* point to the plex */
|
||
|
switch (plex->state) {
|
||
|
case plex_degraded:
|
||
|
case plex_flaky:
|
||
|
case plex_reviving:
|
||
|
plexstatemap |= plex_degradedstate;
|
||
|
break;
|
||
|
|
||
|
case plex_up:
|
||
|
plexstatemap |= plex_upstate;
|
||
|
break;
|
||
|
|
||
|
default:
|
||
|
plexstatemap |= plex_downstate;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (state == volume_up) { /* want to come up */
|
||
|
if (plexstatemap & plex_upstate) { /* we have a plex which is completely up */
|
||
|
vol->state = volume_up; /* did it */
|
||
|
printf("vinum: volume %s is %s\n", vol->name, volume_state(vol->state));
|
||
|
if ((flags & (setstate_configuring | setstate_recursing)) == 0) { /* save config now */
|
||
|
if (flags & setstate_noupdate) /* don't update now, */
|
||
|
vinum_conf.flags |= VF_DIRTYCONFIG; /* wait until later */
|
||
|
else
|
||
|
save_config(); /* yes: save the updated configuration */
|
||
|
}
|
||
|
return 1;
|
||
|
}
|
||
|
/* Here we should check whether we have enough
|
||
|
* coverage for the complete volume. Writeme XXX */
|
||
|
} else if (state == volume_down) { /* want to go down */
|
||
|
if ((vol->opencount == 0) /* not open */
|
||
|
||(flags & setstate_force != 0)) { /* or we're forcing */
|
||
|
vol->state = volume_down;
|
||
|
printf("vinum: volume %s is %s\n", vol->name, volume_state(vol->state));
|
||
|
if ((flags & (setstate_configuring | setstate_recursing)) == 0) { /* save config now */
|
||
|
if (flags & setstate_noupdate) /* don't update now, */
|
||
|
vinum_conf.flags |= VF_DIRTYCONFIG; /* wait until later */
|
||
|
else
|
||
|
save_config(); /* yes: save the updated configuration */
|
||
|
}
|
||
|
return 1;
|
||
|
}
|
||
|
}
|
||
|
return 0; /* no change */
|
||
|
}
|
||
|
|
||
|
/* Start an object, in other words do what we can to get it up.
|
||
|
* This is called from vinumioctl (VINUMSTART).
|
||
|
* Return error indications via ioctl_reply
|
||
|
*/
|
||
|
void
|
||
|
start_object(struct vinum_ioctl_msg *data)
|
||
|
{
|
||
|
int status;
|
||
|
int realstatus; /* what we really have */
|
||
|
int objindex = data->index; /* data gets overwritten */
|
||
|
struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) data; /* format for returning replies */
|
||
|
|
||
|
switch (data->type) {
|
||
|
case drive_object:
|
||
|
status = set_drive_state(objindex, drive_up, setstate_none);
|
||
|
realstatus = DRIVE[objindex].state == drive_up; /* set status on whether we really did it */
|
||
|
break;
|
||
|
|
||
|
case sd_object:
|
||
|
status = set_sd_state(objindex, sd_up, setstate_none); /* set state */
|
||
|
realstatus = SD[objindex].state == sd_up; /* set status on whether we really did it */
|
||
|
break;
|
||
|
|
||
|
case plex_object:
|
||
|
if (PLEX[objindex].state == plex_reviving) { /* reviving, */
|
||
|
ioctl_reply->error = revive_block(objindex); /* revive another block */
|
||
|
ioctl_reply->msg[0] = '\0'; /* no comment */
|
||
|
return;
|
||
|
}
|
||
|
status = set_plex_state(objindex, plex_up, setstate_none);
|
||
|
realstatus = PLEX[objindex].state == plex_up; /* set status on whether we really did it */
|
||
|
break;
|
||
|
|
||
|
case volume_object:
|
||
|
status = set_volume_state(objindex, volume_up, setstate_none);
|
||
|
realstatus = VOL[objindex].state == volume_up; /* set status on whether we really did it */
|
||
|
break;
|
||
|
|
||
|
default:
|
||
|
ioctl_reply->error = EINVAL;
|
||
|
strcpy(ioctl_reply->msg, "Invalid object type");
|
||
|
return;
|
||
|
}
|
||
|
/* There's no point in saying anything here:
|
||
|
* the userland program does it better */
|
||
|
ioctl_reply->msg[0] = '\0';
|
||
|
if (realstatus == 0) /* couldn't do it */
|
||
|
ioctl_reply->error = EINVAL;
|
||
|
else
|
||
|
ioctl_reply->error = 0;
|
||
|
}
|
||
|
|
||
|
/* Stop an object, in other words do what we can to get it down
|
||
|
* This is called from vinumioctl (VINUMSTOP).
|
||
|
* Return error indications via ioctl_reply.
|
||
|
*/
|
||
|
void
|
||
|
stop_object(struct vinum_ioctl_msg *data)
|
||
|
{
|
||
|
int status = 1;
|
||
|
int objindex = data->index; /* save the number from change */
|
||
|
struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) data; /* format for returning replies */
|
||
|
|
||
|
switch (data->type) {
|
||
|
case drive_object:
|
||
|
status = set_drive_state(objindex, drive_down, data->force);
|
||
|
break;
|
||
|
|
||
|
case sd_object:
|
||
|
status = set_sd_state(objindex, sd_down, data->force);
|
||
|
break;
|
||
|
|
||
|
case plex_object:
|
||
|
status = set_plex_state(objindex, plex_down, data->force);
|
||
|
break;
|
||
|
|
||
|
case volume_object:
|
||
|
status = set_volume_state(objindex, volume_down, data->force);
|
||
|
break;
|
||
|
|
||
|
default:
|
||
|
ioctl_reply->error = EINVAL;
|
||
|
strcpy(ioctl_reply->msg, "Invalid object type");
|
||
|
return;
|
||
|
}
|
||
|
ioctl_reply->msg[0] = '\0';
|
||
|
if (status == 0) /* couldn't do it */
|
||
|
ioctl_reply->error = EINVAL;
|
||
|
else
|
||
|
ioctl_reply->error = 0;
|
||
|
}
|
||
|
|
||
|
/* VINUM_SETSTATE ioctl: set an object state
|
||
|
* msg is the message passed by the user */
|
||
|
void
|
||
|
setstate(struct vinum_ioctl_msg *msg)
|
||
|
{
|
||
|
int sdno;
|
||
|
struct sd *sd;
|
||
|
struct plex *plex;
|
||
|
struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) msg; /* format for returning replies */
|
||
|
|
||
|
switch (msg->state) {
|
||
|
case object_down:
|
||
|
stop_object(msg);
|
||
|
break;
|
||
|
|
||
|
case object_initializing:
|
||
|
switch (msg->type) {
|
||
|
case sd_object:
|
||
|
sd = &SD[msg->index];
|
||
|
if ((msg->index >= vinum_conf.subdisks_used)
|
||
|
|| (sd->state == sd_unallocated)) {
|
||
|
sprintf(ioctl_reply->msg, "Invalid subdisk %d", msg->index);
|
||
|
ioctl_reply->error = EFAULT;
|
||
|
return;
|
||
|
}
|
||
|
set_sd_state(msg->index, sd_initializing, msg->force);
|
||
|
if (sd->state != sd_initializing) {
|
||
|
strcpy(ioctl_reply->msg, "Can't set state");
|
||
|
ioctl_reply->error = EINVAL;
|
||
|
} else
|
||
|
ioctl_reply->error = 0;
|
||
|
break;
|
||
|
|
||
|
case plex_object:
|
||
|
plex = &PLEX[msg->index];
|
||
|
if ((msg->index >= vinum_conf.plexes_used)
|
||
|
|| (plex->state == plex_unallocated)) {
|
||
|
sprintf(ioctl_reply->msg, "Invalid subdisk %d", msg->index);
|
||
|
ioctl_reply->error = EFAULT;
|
||
|
return;
|
||
|
}
|
||
|
set_plex_state(msg->index, plex_initializing, msg->force);
|
||
|
if (plex->state != plex_initializing) {
|
||
|
strcpy(ioctl_reply->msg, "Can't set state");
|
||
|
ioctl_reply->error = EINVAL;
|
||
|
} else {
|
||
|
ioctl_reply->error = 0;
|
||
|
for (sdno = 0; sdno < plex->subdisks; sdno++) {
|
||
|
sd = &SD[plex->sdnos[sdno]];
|
||
|
set_sd_state(plex->sdnos[sdno], sd_initializing, msg->force);
|
||
|
if (sd->state != sd_initializing) {
|
||
|
strcpy(ioctl_reply->msg, "Can't set state");
|
||
|
ioctl_reply->error = EINVAL;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
break;
|
||
|
|
||
|
default:
|
||
|
strcpy(ioctl_reply->msg, "Invalid object");
|
||
|
ioctl_reply->error = EINVAL;
|
||
|
}
|
||
|
break;
|
||
|
|
||
|
case object_up:
|
||
|
start_object(msg);
|
||
|
}
|
||
|
}
|