mpr(4): Handle mprsas_alloc_tm() errors on device removal.

SAS9305-16e with firmware 16.00.01.00 report HighPriorityCredit of
only 8, while for comparison some other combinations I have report
100 or even 128.  In case of large JBOD detach requirement to send
target reset command to each target same time overflows the limit,
and without adequate handling makes devices stuck in half-detached
state, preventing later re-attach.

To handle that in case of allocation error mark the target with new
MPRSAS_TARGET_TOREMOVE flag, and retry the removal attempt next time
something else free high priority command.  With this patch I can
successfully detach/attach 102 disk JBOD from/to the SAS9305-16e.

MFC after:	2 weeks
Sponsored by:	iXsystems, Inc.
This commit is contained in:
Alexander Motin 2021-08-20 09:46:51 -04:00
parent 719b5397c2
commit e3c5965c25
4 changed files with 40 additions and 6 deletions

View File

@ -412,6 +412,34 @@ mprsas_remove_volume(struct mpr_softc *sc, struct mpr_command *tm)
mprsas_free_tm(sc, tm);
}
/*
* Retry mprsas_prepare_remove() if some previous attempt failed to allocate
* high priority command due to limit reached.
*/
void
mprsas_prepare_remove_retry(struct mprsas_softc *sassc)
{
struct mprsas_target *target;
int i;
if ((sassc->flags & MPRSAS_TOREMOVE) == 0)
return;
for (i = 0; i < sassc->maxtargets; i++) {
target = &sassc->targets[i];
if ((target->flags & MPRSAS_TARGET_TOREMOVE) == 0)
continue;
if (TAILQ_EMPTY(&sassc->sc->high_priority_req_list))
return;
target->flags &= ~MPRSAS_TARGET_TOREMOVE;
if (target->flags & MPR_TARGET_FLAGS_VOLUME)
mprsas_prepare_volume_remove(sassc, target->handle);
else
mprsas_prepare_remove(sassc, target->handle);
}
sassc->flags &= ~MPRSAS_TOREMOVE;
}
/*
* No Need to call "MPI2_SAS_OP_REMOVE_DEVICE" For Volume removal.
* Otherwise Volume Delete is same as Bare Drive Removal.
@ -440,8 +468,8 @@ mprsas_prepare_volume_remove(struct mprsas_softc *sassc, uint16_t handle)
cm = mprsas_alloc_tm(sc);
if (cm == NULL) {
mpr_dprint(sc, MPR_ERROR,
"%s: command alloc failure\n", __func__);
targ->flags |= MPRSAS_TARGET_TOREMOVE;
sassc->flags |= MPRSAS_TOREMOVE;
return;
}
@ -506,8 +534,8 @@ mprsas_prepare_remove(struct mprsas_softc *sassc, uint16_t handle)
tm = mprsas_alloc_tm(sc);
if (tm == NULL) {
mpr_dprint(sc, MPR_ERROR, "%s: command alloc failure\n",
__func__);
targ->flags |= MPRSAS_TARGET_TOREMOVE;
sassc->flags |= MPRSAS_TOREMOVE;
return;
}

View File

@ -57,8 +57,7 @@ struct mprsas_target {
#define MPR_TARGET_FLAGS_RAID_COMPONENT (1 << 4)
#define MPR_TARGET_FLAGS_VOLUME (1 << 5)
#define MPR_TARGET_IS_SATA_SSD (1 << 6)
#define MPRSAS_TARGET_INRECOVERY (MPRSAS_TARGET_INABORT | \
MPRSAS_TARGET_INRESET | MPRSAS_TARGET_INCHIPRESET)
#define MPRSAS_TARGET_TOREMOVE (1 << 7)
uint16_t tid;
SLIST_HEAD(, mprsas_lun) luns;
@ -95,6 +94,7 @@ struct mprsas_softc {
#define MPRSAS_DISCOVERY_TIMEOUT_PENDING (1 << 2)
#define MPRSAS_QUEUE_FROZEN (1 << 3)
#define MPRSAS_SHUTDOWN (1 << 4)
#define MPRSAS_TOREMOVE (1 << 5)
u_int maxtargets;
struct mprsas_target *targets;
struct cam_devq *devq;

View File

@ -1428,6 +1428,7 @@ mprsas_volume_add(struct mpr_softc *sc, u16 handle)
targ->tid = id;
targ->handle = handle;
targ->devname = wwid;
targ->flags = MPR_TARGET_FLAGS_VOLUME;
TAILQ_INIT(&targ->commands);
TAILQ_INIT(&targ->timedout_commands);
while (!SLIST_EMPTY(&targ->luns)) {

View File

@ -668,6 +668,8 @@ mpr_alloc_command(struct mpr_softc *sc)
return (cm);
}
void mprsas_prepare_remove_retry(struct mprsas_softc *sassc);
static __inline void
mpr_free_high_priority_command(struct mpr_softc *sc, struct mpr_command *cm)
{
@ -691,6 +693,9 @@ mpr_free_high_priority_command(struct mpr_softc *sc, struct mpr_command *cm)
mpr_free_chain(sc, chain);
}
TAILQ_INSERT_TAIL(&sc->high_priority_req_list, cm, cm_link);
if (sc->sassc)
mprsas_prepare_remove_retry(sc->sassc);
}
static __inline struct mpr_command *