set_drive_state: update subdisk states more correctly.

set_sd_state: update the state of a subdisk in a multi-plex volume
more correctly.

update_plex_state: Bring the plex up correctly when the last subdisk
comes up.

checksdstate: Update comments.

vpstate: Don't return an "up" state on a degraded, unattached plex.

start_object: Return a sensible error message when trying to revive a
subdisk whose drive is down.  Previously it returned EBUSY.

Approved-by:  jkh
This commit is contained in:
grog 2000-02-29 06:16:06 +00:00
parent 5b134f56e8
commit 4d73d96e4c

View File

@ -68,19 +68,19 @@ set_drive_state(int driveno, enum drivestate newstate, enum setstateflags flags)
"vinum: drive %s is %s\n",
drive->label.name,
drive_state(drive->state));
if ((drive->state == drive_up)
&& (drive->vp == NULL)) /* should be open, but we're not */
init_drive(drive, 1); /* which changes the state again */
if (newstate < drive_up) /* drive going down, */
queue_daemon_request(daemonrq_closedrive, /* get the daemon to close it */
(union daemoninfo) drive);
if (newstate != oldstate) { /* state has changed */
if (drive->state != oldstate) { /* state has changed */
for (sdno = 0; sdno < vinum_conf.subdisks_allocated; sdno++) { /* find this drive's subdisks */
if ((SD[sdno].state >= sd_referenced)
&& (SD[sdno].driveno == driveno)) /* belongs to this drive */
update_sd_state(sdno); /* update the state */
}
}
if (newstate == drive_up) { /* want to bring it up */
if ((drive->flags & VF_OPEN) == 0) /* should be open, but we're not */
init_drive(drive, 1); /* which changes the state again */
} else /* taking it down or worse */
queue_daemon_request(daemonrq_closedrive, /* get the daemon to close it */
(union daemoninfo) drive);
if ((flags & setstate_configuring) == 0) /* configuring? */
save_config(); /* no: save the updated configuration now */
return 1;
@ -145,6 +145,11 @@ set_sd_state(int sdno, enum sdstate newstate, enum setstateflags flags)
if (flags & setstate_force) /* forcing it, */
break; /* just do it, and damn the consequences */
switch (sd->state) {
/*
* Perform the necessary tests. To allow
* the state transition, just break out of
* the switch.
*/
case sd_crashed:
case sd_reborn:
case sd_down: /* been down, no data lost */
@ -160,7 +165,7 @@ set_sd_state(int sdno, enum sdstate newstate, enum setstateflags flags)
if (oldstate != sd_reborn) {
sd->state = sd_reborn; /* here it is again */
log(LOG_INFO,
"vinum: subdisk %s is %s, not %s\n",
"vinum: %s is %s, not %s\n",
sd->name,
sd_state(sd->state),
sd_state(newstate));
@ -177,21 +182,22 @@ set_sd_state(int sdno, enum sdstate newstate, enum setstateflags flags)
case sd_empty:
case sd_initialized:
/*
* If we're associated with a plex which
* is down, or which is the only one in
* the volume, and we're not a RAID-5
* plex, we can come up without being
* inconsistent. Internally, we use the
* force flag to bring up a RAID-5 plex
* after initialization.
* If we're not part of a plex, or the
* plex is not part of a volume with other
* plexes which are up, we can come up
* without being inconsistent.
*
* If we're part of a parity plex, we'll
* come up if the caller uses force. This
* is the way we bring them up after
* initialization.
*/
if ((sd->plexno >= 0)
&& ((PLEX[sd->plexno].organization != plex_raid5)
|| (flags & setstate_force))
&& ((PLEX[sd->plexno].state < plex_firstup)
|| (PLEX[sd->plexno].volno < 0)
|| (VOL[PLEX[sd->plexno].volno].plexes == 1)))
if ((sd->plexno < 0)
|| ((vpstate(&PLEX[sd->plexno]) & volplex_otherup) == 0)
|| (isparity((&PLEX[sd->plexno]))
&& (flags & setstate_force)))
break;
/* Otherwise it's just out of date */
/* FALLTHROUGH */
@ -204,16 +210,16 @@ set_sd_state(int sdno, enum sdstate newstate, enum setstateflags flags)
*
* 2. If the subdisk is part of a
* one-plex volume or an unattached
* plex, and it's not RAID-5, we
* *can't revive*. The subdisk
* doesn't change its state.
* plex, and it's not RAID-4 or
* RAID-5, we *can't revive*. The
* subdisk doesn't change its state.
*
* 3. If the subdisk is part of a
* one-plex volume or an unattached
* plex, and it's RAID-5, but more
* than one subdisk is down, we *still
* can't revive*. The subdisk doesn't
* change its state.
* plex, and it's RAID-4 or RAID-5,
* but more than one subdisk is down,
* we *still can't revive*. The
* subdisk doesn't change its state.
*
* 4. If the subdisk is part of a
* multi-plex volume, we'll change to
@ -236,11 +242,12 @@ set_sd_state(int sdno, enum sdstate newstate, enum setstateflags flags)
*
* 1: we don't have a volume
* 2: we're the only plex in the volume
* 3: we're a RAID-5 plex, and more than one subdisk is down.
* 3: we're a RAID-4 or RAID-5 plex, and
* more than one subdisk is down.
*/
if (((vol == NULL)
|| (vol->plexes == 1))
&& ((plex->organization != plex_raid5)
&& ((!isparity(plex))
|| (plex->sddowncount > 1))) {
if (sd->state == sd_initializing) /* it's finished initializing */
sd->state = sd_initialized;
@ -493,9 +500,20 @@ update_plex_state(int plexno)
if (statemap & sd_initstate) /* something initializing? */
plex->state = plex_initializing; /* yup, that makes the plex the same */
else if ((statemap == sd_emptystate) /* all subdisks empty */
||(statemap == sd_initializedstate)) { /* or all initialized */
if ((vps & volplex_otherup) == 0) { /* no other plex is up */
else if (statemap == sd_upstate)
/*
* All the subdisks are up. This also means that
* they are consistent, so we can just bring
* the plex up
*/
plex->state = plex_up;
else if (isparity(plex) /* RAID-4 or RAID-5 plex */
&&(plex->sddowncount == 1)) /* and exactly one subdisk down */
plex->state = plex_degraded; /* limping a bit */
else if (((statemap & ~sd_downstate) == sd_emptystate) /* all subdisks empty */
||((statemap & ~sd_downstate)
== (statemap & ~sd_downstate & (sd_initializedstate | sd_upstate)))) {
if ((vps & volplex_otherup) == 0) { /* no other plex is up */
struct volume *vol = &VOL[plex->volno]; /* possible volume to which it points */
/*
@ -521,38 +539,22 @@ update_plex_state(int plexno)
for (plexno = 0; plexno < vol->plexes; plexno++)
forceup(VOL[plex->volno].plex[plexno]);
} else if ((statemap == sd_initializedstate) /* if it's initialized (not empty) */
||(plex->organization == plex_concat) /* and we're not RAID-5 */
||(plex->organization == plex_concat) /* and we're not RAID-4 or RAID-5 */
||(plex->organization == plex_striped))
forceup(plexno); /* we'll do it */
/*
* This leaves a case where things don't get
* done: the plex is RAID-5, and the subdisks
* are all empty. They need to be initialized
* first.
* done: the plex is RAID-4 or RAID-5, and
* the subdisks are all empty. They need to
* be initialized first.
*/
} else { /* another plex is up */
int sdno;
plex->state = plex_faulty; /* and bring it up */
/* change the subdisks to up state */
for (sdno = 0; sdno < plex->subdisks; sdno++) {
SD[plex->sdnos[sdno]].state = sd_stale;
log(LOG_INFO, /* tell them about it */
"vinum: %s must be revived\n",
SD[plex->sdnos[sdno]].name);
}
} else {
if (statemap == sd_upstate) /* all subdisks up */
plex->state = plex_up; /* we can come up too */
else
plex->state = plex_faulty;
}
} else if (statemap == sd_upstate)
/*
* All the subdisks are up. This also means that
* they are consistent, so we can just bring
* the plex up
*/
plex->state = plex_up;
else if ((plex->organization == plex_raid5) /* raid 5 plex */
&&(plex->sddowncount == 1)) /* and exactly one subdisk down */
plex->state = plex_degraded; /* limping a bit */
else if ((statemap & (sd_upstate | sd_rebornstate)) == statemap) /* all up or reborn */
} else if ((statemap & (sd_upstate | sd_rebornstate)) == statemap) /* all up or reborn */
plex->state = plex_flaky;
else if (statemap & (sd_upstate | sd_rebornstate)) /* some up or reborn */
plex->state = plex_corrupt; /* corrupt */
@ -626,21 +628,25 @@ checksdstate(struct sd *sd, struct request *rq, daddr_t diskaddr, daddr_t disken
/*
* Access to a reviving subdisk depends on the
* organization of the plex:
* - If it's concatenated, access the subdisk up to its current
* revive point. If we want to write to the subdisk overlapping the
* current revive block, set the conflict flag in the request, asking
* the caller to put the request on the wait list, which will be
* attended to by revive_block when it's done.
* - if it's striped, we can't do it (we could do some hairy
* calculations, but it's unlikely to work).
* - if it's RAID-5, we can do it as long as only one
* subdisk is down
*
* - If it's concatenated, access the subdisk
* up to its current revive point. If we
* want to write to the subdisk overlapping
* the current revive block, set the
* conflict flag in the request, asking the
* caller to put the request on the wait
* list, which will be attended to by
* revive_block when it's done.
* - if it's striped, we can't do it (we could
* do some hairy calculations, but it's
* unlikely to work).
* - if it's RAID-4 or RAID-5, we can do it as
* long as only one subdisk is down
*/
if (plex->state == plex_striped) /* plex is striped, */
return REQUEST_DOWN;
else if (plex->state == plex_raid5) { /* RAID5 plex */
else if (isparity(plex)) { /* RAID-4 or RAID-5 plex */
if (plex->sddowncount > 1) /* with more than one sd down, */
return REQUEST_DOWN;
else
@ -770,9 +776,12 @@ vpstate(struct plex *plex)
enum volplexstate state = volplex_onlyusdown; /* state to return */
int plexno;
if (plex->volno < 0) /* not associated with a volume */
return volplex_onlyusdown; /* assume the worst */
if (plex->volno < 0) { /* not associated with a volume */
if (plex->state > plex_degraded)
return volplex_onlyus; /* just us */
else
return volplex_onlyusdown; /* assume the worst */
}
vol = &VOL[plex->volno]; /* point to our volume */
for (plexno = 0; plexno < vol->plexes; plexno++) {
if (&PLEX[vol->plex[plexno]] == plex) { /* us */
@ -856,6 +865,11 @@ start_object(struct vinum_ioctl_msg *data)
break;
case sd_object:
if (DRIVE[SD[objindex].driveno].state != drive_up) {
ioctl_reply->error = EIO;
strcpy(ioctl_reply->msg, "Drive is down");
return;
}
if ((SD[objindex].state == sd_reviving) /* reviving, */
||(SD[objindex].state == sd_stale)) { /* or stale, will revive */
SD[objindex].state = sd_reviving; /* make sure we're reviving */
@ -877,7 +891,8 @@ start_object(struct vinum_ioctl_msg *data)
ioctl_reply->error = EBUSY;
else
ioctl_reply->error = 0;
}
} else
ioctl_reply->error = status;
break;
case plex_object:
@ -950,8 +965,8 @@ stop_object(struct vinum_ioctl_msg *data)
}
/*
* VINUM_SETSTATE ioctl: set an object state
* msg is the message passed by the user
* VINUM_SETSTATE ioctl: set an object state.
* msg is the message passed by the user.
*/
void
setstate(struct vinum_ioctl_msg *msg)