We can't release the refcount outside of the periph lock.

We're dropping the periph lock then dropping the refcount. However,
that violates the locking protocol and is racy. This seems to be
the cause of weird occasional panics with a bogus assert.

Sponsored by: Netflix
Differential Revision: https://reviews.freebsd.org/D15517
This commit is contained in:
Warner Losh 2018-05-24 16:31:18 +00:00
parent 5f77b8a88b
commit b1988d44b3

View File

@ -336,6 +336,8 @@ ndaclose(struct disk *dp)
while (softc->refcount != 0)
cam_periph_sleep(periph, &softc->refcount, PRIBIO, "ndaclose", 1);
KASSERT(softc->outstanding_cmds == 0,
("nda %d outstanding commands", softc->outstanding_cmds));
cam_periph_unlock(periph);
cam_periph_release(periph);
return (0);
@ -986,10 +988,11 @@ ndastart(struct cam_periph *periph, union ccb *start_ccb)
out:
start_ccb->ccb_h.flags |= CAM_UNLOCKED;
softc->outstanding_cmds++;
softc->refcount++;
softc->refcount++; /* For submission only */
cam_periph_unlock(periph);
xpt_action(start_ccb);
cam_periph_lock(periph);
softc->refcount--; /* Submission done */
/* May have more work to do, so ensure we stay scheduled */
ndaschedule(periph);
@ -1085,6 +1088,7 @@ ndadone(struct cam_periph *periph, union ccb *done_ccb)
bp1 = TAILQ_FIRST(&queue);
cam_iosched_bio_complete(softc->cam_iosched, bp1, done_ccb);
xpt_release_ccb(done_ccb);
softc->outstanding_cmds--;
ndaschedule(periph);
cam_periph_unlock(periph);
while ((bp2 = TAILQ_FIRST(&queue)) != NULL) {
@ -1100,11 +1104,6 @@ ndadone(struct cam_periph *periph, union ccb *done_ccb)
biodone(bp2);
}
}
/*
* Release the periph refcount taken in mdastart() for each CCB.
*/
KASSERT(softc->refcount >= 1, ("ndadone softc %p refcount %d", softc, softc->refcount));
softc->refcount--;
return;
}
case NDA_CCB_DUMP: