freebsd-dev/sys/cam/scsi/scsi_pt.c

633 lines
15 KiB
C
Raw Normal View History

/*-
* Implementation of SCSI Processor Target Peripheral driver for CAM.
*
* Copyright (c) 1998 Justin T. Gibbs.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions, and the following disclaimer,
* without modification, immediately at the beginning of the file.
* 2. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
2003-06-10 18:14:05 +00:00
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/queue.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/types.h>
#include <sys/bio.h>
#include <sys/devicestat.h>
#include <sys/malloc.h>
#include <sys/conf.h>
#include <sys/ptio.h>
#include <cam/cam.h>
#include <cam/cam_ccb.h>
#include <cam/cam_periph.h>
#include <cam/cam_xpt_periph.h>
#include <cam/cam_debug.h>
#include <cam/scsi/scsi_all.h>
#include <cam/scsi/scsi_message.h>
#include <cam/scsi/scsi_pt.h>
#include "opt_pt.h"
typedef enum {
PT_STATE_PROBE,
PT_STATE_NORMAL
} pt_state;
typedef enum {
PT_FLAG_NONE = 0x00,
PT_FLAG_OPEN = 0x01,
PT_FLAG_DEVICE_INVALID = 0x02,
PT_FLAG_RETRY_UA = 0x04
} pt_flags;
typedef enum {
PT_CCB_BUFFER_IO = 0x01,
PT_CCB_RETRY_UA = 0x04,
PT_CCB_BUFFER_IO_UA = PT_CCB_BUFFER_IO|PT_CCB_RETRY_UA
} pt_ccb_state;
/* Offsets into our private area for storing information */
#define ccb_state ppriv_field0
#define ccb_bp ppriv_ptr1
struct pt_softc {
struct bio_queue_head bio_queue;
struct devstat *device_stats;
LIST_HEAD(, ccb_hdr) pending_ccbs;
pt_state state;
pt_flags flags;
union ccb saved_ccb;
int io_timeout;
struct cdev *dev;
};
static d_open_t ptopen;
static d_close_t ptclose;
static d_strategy_t ptstrategy;
static periph_init_t ptinit;
static void ptasync(void *callback_arg, u_int32_t code,
struct cam_path *path, void *arg);
static periph_ctor_t ptctor;
Fix a problem with the way we handled device invalidation when attaching to a device failed. In theory, the same steps that happen when we get an AC_LOST_DEVICE async notification should have been taken when a driver fails to attach. In practice, that wasn't the case. This only affected the da, cd and ch drivers, but the fix affects all peripheral drivers. There were several possible problems: - In the da driver, we didn't remove the peripheral's softc from the da driver's linked list of softcs. Once the peripheral and softc got removed, we'd get a kernel panic the next time the timeout routine called dasendorderedtag(). - In the da, cd and possibly ch drivers, we didn't remove the peripheral's devstat structure from the devstat queue. Once the peripheral and softc were removed, this could cause a panic if anyone tried to access device statistics. (one component of the linked list wouldn't exist anymore) - In the cd driver, we didn't take the peripheral off the changer run queue if it was scheduled to run. In practice, it's highly unlikely, and maybe impossible that the peripheral would have been on the changer run queue at that stage of the probe process. The fix is: - Add a new peripheral callback function (the "oninvalidate" function) that is called the first time cam_periph_invalidate() is called for a peripheral. - Create new foooninvalidate() routines for each peripheral driver. This routine is always called at splsoftcam(), and contains all the stuff that used to be in the AC_LOST_DEVICE case of the async callback handler. - Move the devstat cleanup call to the destructor/cleanup routines, since some of the drivers do I/O in their close routines. - Make sure that when we're flushing the buffer queue, we traverse it at splbio(). - Add a check for the invalid flag in the pt driver's open routine. Reviewed by: gibbs
1998-10-22 22:16:56 +00:00
static periph_oninv_t ptoninvalidate;
static periph_dtor_t ptdtor;
static periph_start_t ptstart;
static void ptdone(struct cam_periph *periph,
union ccb *done_ccb);
static d_ioctl_t ptioctl;
static int pterror(union ccb *ccb, u_int32_t cam_flags,
u_int32_t sense_flags);
void scsi_send_receive(struct ccb_scsiio *csio, u_int32_t retries,
void (*cbfcnp)(struct cam_periph *, union ccb *),
u_int tag_action, int readop, u_int byte2,
u_int32_t xfer_len, u_int8_t *data_ptr,
u_int8_t sense_len, u_int32_t timeout);
static struct periph_driver ptdriver =
{
ptinit, "pt",
TAILQ_HEAD_INITIALIZER(ptdriver.units), /* generation */ 0
};
PERIPHDRIVER_DECLARE(pt, ptdriver);
static struct cdevsw pt_cdevsw = {
.d_version = D_VERSION,
.d_flags = 0,
.d_open = ptopen,
.d_close = ptclose,
.d_read = physread,
.d_write = physwrite,
.d_ioctl = ptioctl,
.d_strategy = ptstrategy,
.d_name = "pt",
};
#ifndef SCSI_PT_DEFAULT_TIMEOUT
#define SCSI_PT_DEFAULT_TIMEOUT 60
#endif
static int
ptopen(struct cdev *dev, int flags, int fmt, struct thread *td)
{
struct cam_periph *periph;
struct pt_softc *softc;
int error = 0;
periph = (struct cam_periph *)dev->si_drv1;
if (cam_periph_acquire(periph) != CAM_REQ_CMP)
return (ENXIO);
softc = (struct pt_softc *)periph->softc;
cam_periph_lock(periph);
Fix a problem with the way we handled device invalidation when attaching to a device failed. In theory, the same steps that happen when we get an AC_LOST_DEVICE async notification should have been taken when a driver fails to attach. In practice, that wasn't the case. This only affected the da, cd and ch drivers, but the fix affects all peripheral drivers. There were several possible problems: - In the da driver, we didn't remove the peripheral's softc from the da driver's linked list of softcs. Once the peripheral and softc got removed, we'd get a kernel panic the next time the timeout routine called dasendorderedtag(). - In the da, cd and possibly ch drivers, we didn't remove the peripheral's devstat structure from the devstat queue. Once the peripheral and softc were removed, this could cause a panic if anyone tried to access device statistics. (one component of the linked list wouldn't exist anymore) - In the cd driver, we didn't take the peripheral off the changer run queue if it was scheduled to run. In practice, it's highly unlikely, and maybe impossible that the peripheral would have been on the changer run queue at that stage of the probe process. The fix is: - Add a new peripheral callback function (the "oninvalidate" function) that is called the first time cam_periph_invalidate() is called for a peripheral. - Create new foooninvalidate() routines for each peripheral driver. This routine is always called at splsoftcam(), and contains all the stuff that used to be in the AC_LOST_DEVICE case of the async callback handler. - Move the devstat cleanup call to the destructor/cleanup routines, since some of the drivers do I/O in their close routines. - Make sure that when we're flushing the buffer queue, we traverse it at splbio(). - Add a check for the invalid flag in the pt driver's open routine. Reviewed by: gibbs
1998-10-22 22:16:56 +00:00
if (softc->flags & PT_FLAG_DEVICE_INVALID) {
Work around a race condition in devfs by changing the way closes are handled in most CAM peripheral drivers that are not handled by GEOM's disk class. The usual character driver open and close semantics are that the driver gets N open calls, but only one close, when the last caller closes the device. CAM peripheral drivers expect that behavior to be honored to the letter, and the CAM peripheral driver code (specifically cam_periph_release_locked_busses()) panics if it is done incorrectly. Since devfs has to drop its locks while it calls a driver's close routine, and it does not have a way to delay or prevent open calls while it is calling the close routine, there is a race. The sequence of events, simplified a bit, is: - devfs acquires a lock - devfs checks the reference count, and if it is 1, continues to close. - devfs releases the lock - 2nd process open call on the device happens here - devfs calls the driver's close routine - devfs acquires a lock - devfs decrements the reference count - devfs releases the lock - 2nd process close call on the device happens here At the second close, we get a panic in cam_periph_release_locked_busses(), complaining that peripheral has been released when the reference count is already 0. This is because we have gotten two closes in a row, which should not happen. The fix is to add the D_TRACKCLOSE flag to the driver's cdevsw, so that we get a close() call for each open(). That does happen reliably, so we can make sure that our reference counts are correct. Note that the sa(4) and pt(4) drivers only allow one context through the open routine. So these drivers aren't exposed to the same race condition. scsi_ch.c, scsi_enc.c, scsi_enc_internal.h, scsi_pass.c, scsi_sg.c: For these drivers, change the open() routine to increment the reference count for every open, and just decrement the reference count in the close. Call cam_periph_release_locked() in some scenarios to avoid additional lock and unlock calls. scsi_pt.c: Call cam_periph_release_locked() in some scenarios to avoid additional lock and unlock calls. MFC after: 3 days
2012-05-27 06:11:09 +00:00
cam_periph_release_locked(periph);
cam_periph_unlock(periph);
Fix a problem with the way we handled device invalidation when attaching to a device failed. In theory, the same steps that happen when we get an AC_LOST_DEVICE async notification should have been taken when a driver fails to attach. In practice, that wasn't the case. This only affected the da, cd and ch drivers, but the fix affects all peripheral drivers. There were several possible problems: - In the da driver, we didn't remove the peripheral's softc from the da driver's linked list of softcs. Once the peripheral and softc got removed, we'd get a kernel panic the next time the timeout routine called dasendorderedtag(). - In the da, cd and possibly ch drivers, we didn't remove the peripheral's devstat structure from the devstat queue. Once the peripheral and softc were removed, this could cause a panic if anyone tried to access device statistics. (one component of the linked list wouldn't exist anymore) - In the cd driver, we didn't take the peripheral off the changer run queue if it was scheduled to run. In practice, it's highly unlikely, and maybe impossible that the peripheral would have been on the changer run queue at that stage of the probe process. The fix is: - Add a new peripheral callback function (the "oninvalidate" function) that is called the first time cam_periph_invalidate() is called for a peripheral. - Create new foooninvalidate() routines for each peripheral driver. This routine is always called at splsoftcam(), and contains all the stuff that used to be in the AC_LOST_DEVICE case of the async callback handler. - Move the devstat cleanup call to the destructor/cleanup routines, since some of the drivers do I/O in their close routines. - Make sure that when we're flushing the buffer queue, we traverse it at splbio(). - Add a check for the invalid flag in the pt driver's open routine. Reviewed by: gibbs
1998-10-22 22:16:56 +00:00
return(ENXIO);
}
if ((softc->flags & PT_FLAG_OPEN) == 0)
softc->flags |= PT_FLAG_OPEN;
else {
error = EBUSY;
cam_periph_release(periph);
}
CAM_DEBUG(periph->path, CAM_DEBUG_TRACE,
("ptopen: dev=%s\n", devtoname(dev)));
cam_periph_unlock(periph);
return (error);
}
static int
ptclose(struct cdev *dev, int flag, int fmt, struct thread *td)
{
struct cam_periph *periph;
struct pt_softc *softc;
periph = (struct cam_periph *)dev->si_drv1;
if (periph == NULL)
return (ENXIO);
softc = (struct pt_softc *)periph->softc;
cam_periph_lock(periph);
softc->flags &= ~PT_FLAG_OPEN;
Work around a race condition in devfs by changing the way closes are handled in most CAM peripheral drivers that are not handled by GEOM's disk class. The usual character driver open and close semantics are that the driver gets N open calls, but only one close, when the last caller closes the device. CAM peripheral drivers expect that behavior to be honored to the letter, and the CAM peripheral driver code (specifically cam_periph_release_locked_busses()) panics if it is done incorrectly. Since devfs has to drop its locks while it calls a driver's close routine, and it does not have a way to delay or prevent open calls while it is calling the close routine, there is a race. The sequence of events, simplified a bit, is: - devfs acquires a lock - devfs checks the reference count, and if it is 1, continues to close. - devfs releases the lock - 2nd process open call on the device happens here - devfs calls the driver's close routine - devfs acquires a lock - devfs decrements the reference count - devfs releases the lock - 2nd process close call on the device happens here At the second close, we get a panic in cam_periph_release_locked_busses(), complaining that peripheral has been released when the reference count is already 0. This is because we have gotten two closes in a row, which should not happen. The fix is to add the D_TRACKCLOSE flag to the driver's cdevsw, so that we get a close() call for each open(). That does happen reliably, so we can make sure that our reference counts are correct. Note that the sa(4) and pt(4) drivers only allow one context through the open routine. So these drivers aren't exposed to the same race condition. scsi_ch.c, scsi_enc.c, scsi_enc_internal.h, scsi_pass.c, scsi_sg.c: For these drivers, change the open() routine to increment the reference count for every open, and just decrement the reference count in the close. Call cam_periph_release_locked() in some scenarios to avoid additional lock and unlock calls. scsi_pt.c: Call cam_periph_release_locked() in some scenarios to avoid additional lock and unlock calls. MFC after: 3 days
2012-05-27 06:11:09 +00:00
cam_periph_release_locked(periph);
cam_periph_unlock(periph);
return (0);
}
/*
* Actually translate the requested transfer into one the physical driver
* can understand. The transfer is described by a buf and will include
* only one physical transfer.
*/
static void
ptstrategy(struct bio *bp)
{
struct cam_periph *periph;
struct pt_softc *softc;
periph = (struct cam_periph *)bp->bio_dev->si_drv1;
bp->bio_resid = bp->bio_bcount;
if (periph == NULL) {
biofinish(bp, NULL, ENXIO);
return;
}
cam_periph_lock(periph);
softc = (struct pt_softc *)periph->softc;
/*
* If the device has been made invalid, error out
*/
if ((softc->flags & PT_FLAG_DEVICE_INVALID)) {
cam_periph_unlock(periph);
biofinish(bp, NULL, ENXIO);
return;
}
/*
* Place it in the queue of disk activities for this disk
*/
bioq_insert_tail(&softc->bio_queue, bp);
/*
* Schedule ourselves for performing the work.
*/
xpt_schedule(periph, CAM_PRIORITY_NORMAL);
cam_periph_unlock(periph);
return;
}
static void
ptinit(void)
{
cam_status status;
/*
* Install a global async callback. This callback will
* receive async callbacks like "new device found".
*/
status = xpt_register_async(AC_FOUND_DEVICE, ptasync, NULL, NULL);
if (status != CAM_REQ_CMP) {
printf("pt: Failed to attach master async callback "
"due to status 0x%x!\n", status);
}
}
static cam_status
ptctor(struct cam_periph *periph, void *arg)
{
struct pt_softc *softc;
struct ccb_getdev *cgd;
struct ccb_pathinq cpi;
cgd = (struct ccb_getdev *)arg;
if (cgd == NULL) {
printf("ptregister: no getdev CCB, can't register device\n");
return(CAM_REQ_CMP_ERR);
}
softc = (struct pt_softc *)malloc(sizeof(*softc),M_DEVBUF,M_NOWAIT);
if (softc == NULL) {
printf("daregister: Unable to probe new device. "
"Unable to allocate softc\n");
return(CAM_REQ_CMP_ERR);
}
bzero(softc, sizeof(*softc));
LIST_INIT(&softc->pending_ccbs);
softc->state = PT_STATE_NORMAL;
bioq_init(&softc->bio_queue);
softc->io_timeout = SCSI_PT_DEFAULT_TIMEOUT * 1000;
periph->softc = softc;
bzero(&cpi, sizeof(cpi));
xpt_setup_ccb(&cpi.ccb_h, periph->path, CAM_PRIORITY_NORMAL);
cpi.ccb_h.func_code = XPT_PATH_INQ;
xpt_action((union ccb *)&cpi);
cam_periph_unlock(periph);
softc->device_stats = devstat_new_entry("pt",
periph->unit_number, 0,
DEVSTAT_NO_BLOCKSIZE,
SID_TYPE(&cgd->inq_data) |
XPORT_DEVSTAT_TYPE(cpi.transport),
DEVSTAT_PRIORITY_OTHER);
softc->dev = make_dev(&pt_cdevsw, periph->unit_number, UID_ROOT,
GID_OPERATOR, 0600, "%s%d", periph->periph_name,
periph->unit_number);
2007-04-19 18:14:33 +00:00
cam_periph_lock(periph);
softc->dev->si_drv1 = periph;
/*
* Add async callbacks for bus reset and
* bus device reset calls. I don't bother
* checking if this fails as, in most cases,
* the system will function just fine without
* them and the only alternative would be to
* not attach the device on failure.
*/
xpt_register_async(AC_SENT_BDR | AC_BUS_RESET | AC_LOST_DEVICE,
ptasync, periph, periph->path);
/* Tell the user we've attached to the device */
xpt_announce_periph(periph, NULL);
return(CAM_REQ_CMP);
}
Fix a problem with the way we handled device invalidation when attaching to a device failed. In theory, the same steps that happen when we get an AC_LOST_DEVICE async notification should have been taken when a driver fails to attach. In practice, that wasn't the case. This only affected the da, cd and ch drivers, but the fix affects all peripheral drivers. There were several possible problems: - In the da driver, we didn't remove the peripheral's softc from the da driver's linked list of softcs. Once the peripheral and softc got removed, we'd get a kernel panic the next time the timeout routine called dasendorderedtag(). - In the da, cd and possibly ch drivers, we didn't remove the peripheral's devstat structure from the devstat queue. Once the peripheral and softc were removed, this could cause a panic if anyone tried to access device statistics. (one component of the linked list wouldn't exist anymore) - In the cd driver, we didn't take the peripheral off the changer run queue if it was scheduled to run. In practice, it's highly unlikely, and maybe impossible that the peripheral would have been on the changer run queue at that stage of the probe process. The fix is: - Add a new peripheral callback function (the "oninvalidate" function) that is called the first time cam_periph_invalidate() is called for a peripheral. - Create new foooninvalidate() routines for each peripheral driver. This routine is always called at splsoftcam(), and contains all the stuff that used to be in the AC_LOST_DEVICE case of the async callback handler. - Move the devstat cleanup call to the destructor/cleanup routines, since some of the drivers do I/O in their close routines. - Make sure that when we're flushing the buffer queue, we traverse it at splbio(). - Add a check for the invalid flag in the pt driver's open routine. Reviewed by: gibbs
1998-10-22 22:16:56 +00:00
static void
ptoninvalidate(struct cam_periph *periph)
{
struct pt_softc *softc;
softc = (struct pt_softc *)periph->softc;
/*
* De-register any async callbacks.
*/
xpt_register_async(0, ptasync, periph, periph->path);
Fix a problem with the way we handled device invalidation when attaching to a device failed. In theory, the same steps that happen when we get an AC_LOST_DEVICE async notification should have been taken when a driver fails to attach. In practice, that wasn't the case. This only affected the da, cd and ch drivers, but the fix affects all peripheral drivers. There were several possible problems: - In the da driver, we didn't remove the peripheral's softc from the da driver's linked list of softcs. Once the peripheral and softc got removed, we'd get a kernel panic the next time the timeout routine called dasendorderedtag(). - In the da, cd and possibly ch drivers, we didn't remove the peripheral's devstat structure from the devstat queue. Once the peripheral and softc were removed, this could cause a panic if anyone tried to access device statistics. (one component of the linked list wouldn't exist anymore) - In the cd driver, we didn't take the peripheral off the changer run queue if it was scheduled to run. In practice, it's highly unlikely, and maybe impossible that the peripheral would have been on the changer run queue at that stage of the probe process. The fix is: - Add a new peripheral callback function (the "oninvalidate" function) that is called the first time cam_periph_invalidate() is called for a peripheral. - Create new foooninvalidate() routines for each peripheral driver. This routine is always called at splsoftcam(), and contains all the stuff that used to be in the AC_LOST_DEVICE case of the async callback handler. - Move the devstat cleanup call to the destructor/cleanup routines, since some of the drivers do I/O in their close routines. - Make sure that when we're flushing the buffer queue, we traverse it at splbio(). - Add a check for the invalid flag in the pt driver's open routine. Reviewed by: gibbs
1998-10-22 22:16:56 +00:00
softc->flags |= PT_FLAG_DEVICE_INVALID;
/*
* Return all queued I/O with ENXIO.
* XXX Handle any transactions queued to the card
* with XPT_ABORT_CCB.
*/
bioq_flush(&softc->bio_queue, NULL, ENXIO);
Fix a problem with the way we handled device invalidation when attaching to a device failed. In theory, the same steps that happen when we get an AC_LOST_DEVICE async notification should have been taken when a driver fails to attach. In practice, that wasn't the case. This only affected the da, cd and ch drivers, but the fix affects all peripheral drivers. There were several possible problems: - In the da driver, we didn't remove the peripheral's softc from the da driver's linked list of softcs. Once the peripheral and softc got removed, we'd get a kernel panic the next time the timeout routine called dasendorderedtag(). - In the da, cd and possibly ch drivers, we didn't remove the peripheral's devstat structure from the devstat queue. Once the peripheral and softc were removed, this could cause a panic if anyone tried to access device statistics. (one component of the linked list wouldn't exist anymore) - In the cd driver, we didn't take the peripheral off the changer run queue if it was scheduled to run. In practice, it's highly unlikely, and maybe impossible that the peripheral would have been on the changer run queue at that stage of the probe process. The fix is: - Add a new peripheral callback function (the "oninvalidate" function) that is called the first time cam_periph_invalidate() is called for a peripheral. - Create new foooninvalidate() routines for each peripheral driver. This routine is always called at splsoftcam(), and contains all the stuff that used to be in the AC_LOST_DEVICE case of the async callback handler. - Move the devstat cleanup call to the destructor/cleanup routines, since some of the drivers do I/O in their close routines. - Make sure that when we're flushing the buffer queue, we traverse it at splbio(). - Add a check for the invalid flag in the pt driver's open routine. Reviewed by: gibbs
1998-10-22 22:16:56 +00:00
}
static void
ptdtor(struct cam_periph *periph)
{
Fix a problem with the way we handled device invalidation when attaching to a device failed. In theory, the same steps that happen when we get an AC_LOST_DEVICE async notification should have been taken when a driver fails to attach. In practice, that wasn't the case. This only affected the da, cd and ch drivers, but the fix affects all peripheral drivers. There were several possible problems: - In the da driver, we didn't remove the peripheral's softc from the da driver's linked list of softcs. Once the peripheral and softc got removed, we'd get a kernel panic the next time the timeout routine called dasendorderedtag(). - In the da, cd and possibly ch drivers, we didn't remove the peripheral's devstat structure from the devstat queue. Once the peripheral and softc were removed, this could cause a panic if anyone tried to access device statistics. (one component of the linked list wouldn't exist anymore) - In the cd driver, we didn't take the peripheral off the changer run queue if it was scheduled to run. In practice, it's highly unlikely, and maybe impossible that the peripheral would have been on the changer run queue at that stage of the probe process. The fix is: - Add a new peripheral callback function (the "oninvalidate" function) that is called the first time cam_periph_invalidate() is called for a peripheral. - Create new foooninvalidate() routines for each peripheral driver. This routine is always called at splsoftcam(), and contains all the stuff that used to be in the AC_LOST_DEVICE case of the async callback handler. - Move the devstat cleanup call to the destructor/cleanup routines, since some of the drivers do I/O in their close routines. - Make sure that when we're flushing the buffer queue, we traverse it at splbio(). - Add a check for the invalid flag in the pt driver's open routine. Reviewed by: gibbs
1998-10-22 22:16:56 +00:00
struct pt_softc *softc;
softc = (struct pt_softc *)periph->softc;
devstat_remove_entry(softc->device_stats);
cam_periph_unlock(periph);
destroy_dev(softc->dev);
cam_periph_lock(periph);
Fix a problem with the way we handled device invalidation when attaching to a device failed. In theory, the same steps that happen when we get an AC_LOST_DEVICE async notification should have been taken when a driver fails to attach. In practice, that wasn't the case. This only affected the da, cd and ch drivers, but the fix affects all peripheral drivers. There were several possible problems: - In the da driver, we didn't remove the peripheral's softc from the da driver's linked list of softcs. Once the peripheral and softc got removed, we'd get a kernel panic the next time the timeout routine called dasendorderedtag(). - In the da, cd and possibly ch drivers, we didn't remove the peripheral's devstat structure from the devstat queue. Once the peripheral and softc were removed, this could cause a panic if anyone tried to access device statistics. (one component of the linked list wouldn't exist anymore) - In the cd driver, we didn't take the peripheral off the changer run queue if it was scheduled to run. In practice, it's highly unlikely, and maybe impossible that the peripheral would have been on the changer run queue at that stage of the probe process. The fix is: - Add a new peripheral callback function (the "oninvalidate" function) that is called the first time cam_periph_invalidate() is called for a peripheral. - Create new foooninvalidate() routines for each peripheral driver. This routine is always called at splsoftcam(), and contains all the stuff that used to be in the AC_LOST_DEVICE case of the async callback handler. - Move the devstat cleanup call to the destructor/cleanup routines, since some of the drivers do I/O in their close routines. - Make sure that when we're flushing the buffer queue, we traverse it at splbio(). - Add a check for the invalid flag in the pt driver's open routine. Reviewed by: gibbs
1998-10-22 22:16:56 +00:00
free(softc, M_DEVBUF);
}
static void
ptasync(void *callback_arg, u_int32_t code, struct cam_path *path, void *arg)
{
struct cam_periph *periph;
periph = (struct cam_periph *)callback_arg;
switch (code) {
case AC_FOUND_DEVICE:
{
struct ccb_getdev *cgd;
cam_status status;
cgd = (struct ccb_getdev *)arg;
if (cgd == NULL)
break;
Separate the parallel scsi knowledge out of the core of the XPT, and modularize it so that new transports can be created. Add a transport for SATA Add a periph+protocol layer for ATA Add a driver for AHCI-compliant hardware. Add a maxio field to CAM so that drivers can advertise their max I/O capability. Modify various drivers so that they are insulated from the value of MAXPHYS. The new ATA/SATA code supports AHCI-compliant hardware, and will override the classic ATA driver if it is loaded as a module at boot time or compiled into the kernel. The stack now support NCQ (tagged queueing) for increased performance on modern SATA drives. It also supports port multipliers. ATA drives are accessed via 'ada' device nodes. ATAPI drives are accessed via 'cd' device nodes. They can all be enumerated and manipulated via camcontrol, just like SCSI drives. SCSI commands are not translated to their ATA equivalents; ATA native commands are used throughout the entire stack, including camcontrol. See the camcontrol manpage for further details. Testing this code may require that you update your fstab, and possibly modify your BIOS to enable AHCI functionality, if available. This code is very experimental at the moment. The userland ABI/API has changed, so applications will need to be recompiled. It may change further in the near future. The 'ada' device name may also change as more infrastructure is completed in this project. The goal is to eventually put all CAM busses and devices until newbus, allowing for interesting topology and management options. Few functional changes will be seen with existing SCSI/SAS/FC drivers, though the userland ABI has still changed. In the future, transports specific modules for SAS and FC may appear in order to better support the topologies and capabilities of these technologies. The modularization of CAM and the addition of the ATA/SATA modules is meant to break CAM out of the mold of being specific to SCSI, letting it grow to be a framework for arbitrary transports and protocols. It also allows drivers to be written to support discrete hardware without jeopardizing the stability of non-related hardware. While only an AHCI driver is provided now, a Silicon Image driver is also in the works. Drivers for ICH1-4, ICH5-6, PIIX, classic IDE, and any other hardware is possible and encouraged. Help with new transports is also encouraged. Submitted by: scottl, mav Approved by: re
2009-07-10 08:18:08 +00:00
if (cgd->protocol != PROTO_SCSI)
break;
if (SID_QUAL(&cgd->inq_data) != SID_QUAL_LU_CONNECTED)
break;
if (SID_TYPE(&cgd->inq_data) != T_PROCESSOR)
break;
/*
* Allocate a peripheral instance for
* this device and start the probe
* process.
*/
Fix a problem with the way we handled device invalidation when attaching to a device failed. In theory, the same steps that happen when we get an AC_LOST_DEVICE async notification should have been taken when a driver fails to attach. In practice, that wasn't the case. This only affected the da, cd and ch drivers, but the fix affects all peripheral drivers. There were several possible problems: - In the da driver, we didn't remove the peripheral's softc from the da driver's linked list of softcs. Once the peripheral and softc got removed, we'd get a kernel panic the next time the timeout routine called dasendorderedtag(). - In the da, cd and possibly ch drivers, we didn't remove the peripheral's devstat structure from the devstat queue. Once the peripheral and softc were removed, this could cause a panic if anyone tried to access device statistics. (one component of the linked list wouldn't exist anymore) - In the cd driver, we didn't take the peripheral off the changer run queue if it was scheduled to run. In practice, it's highly unlikely, and maybe impossible that the peripheral would have been on the changer run queue at that stage of the probe process. The fix is: - Add a new peripheral callback function (the "oninvalidate" function) that is called the first time cam_periph_invalidate() is called for a peripheral. - Create new foooninvalidate() routines for each peripheral driver. This routine is always called at splsoftcam(), and contains all the stuff that used to be in the AC_LOST_DEVICE case of the async callback handler. - Move the devstat cleanup call to the destructor/cleanup routines, since some of the drivers do I/O in their close routines. - Make sure that when we're flushing the buffer queue, we traverse it at splbio(). - Add a check for the invalid flag in the pt driver's open routine. Reviewed by: gibbs
1998-10-22 22:16:56 +00:00
status = cam_periph_alloc(ptctor, ptoninvalidate, ptdtor,
ptstart, "pt", CAM_PERIPH_BIO,
Merge CAM locking changes from the projects/camlock branch to radically reduce lock congestion and improve SMP scalability of the SCSI/ATA stack, preparing the ground for the coming next GEOM direct dispatch support. Replace big per-SIM locks with bunch of smaller ones: - per-LUN locks to protect device and peripheral drivers state; - per-target locks to protect list of LUNs on target; - per-bus locks to protect reference counting; - per-send queue locks to protect queue of CCBs to be sent; - per-done queue locks to protect queue of completed CCBs; - remaining per-SIM locks now protect only HBA driver internals. While holding LUN lock it is allowed (while not recommended for performance reasons) to take SIM lock. The opposite acquisition order is forbidden. All the other locks are leaf locks, that can be taken anywhere, but should not be cascaded. Many functions, such as: xpt_action(), xpt_done(), xpt_async(), xpt_create_path(), etc. are no longer require (but allow) SIM lock to be held. To keep compatibility and solve cases where SIM lock can't be dropped, all xpt_async() calls in addition to xpt_done() calls are queued to completion threads for async processing in clean environment without SIM lock held. Instead of single CAM SWI thread, used for commands completion processing before, use multiple (depending on number of CPUs) threads. Load balanced between them using "hash" of the device B:T:L address. HBA drivers that can drop SIM lock during completion processing and have sufficient number of completion threads to efficiently scale to multiple CPUs can use new function xpt_done_direct() to avoid extra context switch. Make ahci(4) driver to use this mechanism depending on hardware setup. Sponsored by: iXsystems, Inc. MFC after: 2 months
2013-10-21 12:00:26 +00:00
path, ptasync,
Fix a problem with the way we handled device invalidation when attaching to a device failed. In theory, the same steps that happen when we get an AC_LOST_DEVICE async notification should have been taken when a driver fails to attach. In practice, that wasn't the case. This only affected the da, cd and ch drivers, but the fix affects all peripheral drivers. There were several possible problems: - In the da driver, we didn't remove the peripheral's softc from the da driver's linked list of softcs. Once the peripheral and softc got removed, we'd get a kernel panic the next time the timeout routine called dasendorderedtag(). - In the da, cd and possibly ch drivers, we didn't remove the peripheral's devstat structure from the devstat queue. Once the peripheral and softc were removed, this could cause a panic if anyone tried to access device statistics. (one component of the linked list wouldn't exist anymore) - In the cd driver, we didn't take the peripheral off the changer run queue if it was scheduled to run. In practice, it's highly unlikely, and maybe impossible that the peripheral would have been on the changer run queue at that stage of the probe process. The fix is: - Add a new peripheral callback function (the "oninvalidate" function) that is called the first time cam_periph_invalidate() is called for a peripheral. - Create new foooninvalidate() routines for each peripheral driver. This routine is always called at splsoftcam(), and contains all the stuff that used to be in the AC_LOST_DEVICE case of the async callback handler. - Move the devstat cleanup call to the destructor/cleanup routines, since some of the drivers do I/O in their close routines. - Make sure that when we're flushing the buffer queue, we traverse it at splbio(). - Add a check for the invalid flag in the pt driver's open routine. Reviewed by: gibbs
1998-10-22 22:16:56 +00:00
AC_FOUND_DEVICE, cgd);
if (status != CAM_REQ_CMP
&& status != CAM_REQ_INPROG)
printf("ptasync: Unable to attach to new device "
"due to status 0x%x\n", status);
break;
}
case AC_SENT_BDR:
case AC_BUS_RESET:
{
struct pt_softc *softc;
struct ccb_hdr *ccbh;
softc = (struct pt_softc *)periph->softc;
/*
* Don't fail on the expected unit attention
* that will occur.
*/
softc->flags |= PT_FLAG_RETRY_UA;
LIST_FOREACH(ccbh, &softc->pending_ccbs, periph_links.le)
ccbh->ccb_state |= PT_CCB_RETRY_UA;
}
/* FALLTHROUGH */
default:
cam_periph_async(periph, code, path, arg);
break;
}
}
static void
ptstart(struct cam_periph *periph, union ccb *start_ccb)
{
struct pt_softc *softc;
struct bio *bp;
softc = (struct pt_softc *)periph->softc;
CAM_DEBUG(periph->path, CAM_DEBUG_TRACE, ("ptstart\n"));
/*
* See if there is a buf with work for us to do..
*/
bp = bioq_first(&softc->bio_queue);
Merge CAM locking changes from the projects/camlock branch to radically reduce lock congestion and improve SMP scalability of the SCSI/ATA stack, preparing the ground for the coming next GEOM direct dispatch support. Replace big per-SIM locks with bunch of smaller ones: - per-LUN locks to protect device and peripheral drivers state; - per-target locks to protect list of LUNs on target; - per-bus locks to protect reference counting; - per-send queue locks to protect queue of CCBs to be sent; - per-done queue locks to protect queue of completed CCBs; - remaining per-SIM locks now protect only HBA driver internals. While holding LUN lock it is allowed (while not recommended for performance reasons) to take SIM lock. The opposite acquisition order is forbidden. All the other locks are leaf locks, that can be taken anywhere, but should not be cascaded. Many functions, such as: xpt_action(), xpt_done(), xpt_async(), xpt_create_path(), etc. are no longer require (but allow) SIM lock to be held. To keep compatibility and solve cases where SIM lock can't be dropped, all xpt_async() calls in addition to xpt_done() calls are queued to completion threads for async processing in clean environment without SIM lock held. Instead of single CAM SWI thread, used for commands completion processing before, use multiple (depending on number of CPUs) threads. Load balanced between them using "hash" of the device B:T:L address. HBA drivers that can drop SIM lock during completion processing and have sufficient number of completion threads to efficiently scale to multiple CPUs can use new function xpt_done_direct() to avoid extra context switch. Make ahci(4) driver to use this mechanism depending on hardware setup. Sponsored by: iXsystems, Inc. MFC after: 2 months
2013-10-21 12:00:26 +00:00
if (bp == NULL) {
xpt_release_ccb(start_ccb);
} else {
bioq_remove(&softc->bio_queue, bp);
devstat_start_transaction_bio(softc->device_stats, bp);
scsi_send_receive(&start_ccb->csio,
/*retries*/4,
ptdone,
MSG_SIMPLE_Q_TAG,
bp->bio_cmd == BIO_READ,
/*byte2*/0,
bp->bio_bcount,
bp->bio_data,
/*sense_len*/SSD_FULL_SIZE,
/*timeout*/softc->io_timeout);
start_ccb->ccb_h.ccb_state = PT_CCB_BUFFER_IO_UA;
/*
* Block out any asynchronous callbacks
* while we touch the pending ccb list.
*/
LIST_INSERT_HEAD(&softc->pending_ccbs, &start_ccb->ccb_h,
periph_links.le);
start_ccb->ccb_h.ccb_bp = bp;
bp = bioq_first(&softc->bio_queue);
xpt_action(start_ccb);
if (bp != NULL) {
/* Have more work to do, so ensure we stay scheduled */
xpt_schedule(periph, CAM_PRIORITY_NORMAL);
}
}
}
static void
ptdone(struct cam_periph *periph, union ccb *done_ccb)
{
struct pt_softc *softc;
struct ccb_scsiio *csio;
softc = (struct pt_softc *)periph->softc;
CAM_DEBUG(periph->path, CAM_DEBUG_TRACE, ("ptdone\n"));
csio = &done_ccb->csio;
switch (csio->ccb_h.ccb_state) {
case PT_CCB_BUFFER_IO:
case PT_CCB_BUFFER_IO_UA:
{
struct bio *bp;
bp = (struct bio *)done_ccb->ccb_h.ccb_bp;
if ((done_ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
int error;
int sf;
if ((csio->ccb_h.ccb_state & PT_CCB_RETRY_UA) != 0)
sf = SF_RETRY_UA;
else
sf = 0;
Rewrite of the CAM error recovery code. Some of the major changes include: - The SCSI error handling portion of cam_periph_error() has been broken out into a number of subfunctions to better modularize the code that handles the hierarchy of SCSI errors. As a result, the code is now much easier to read. - String handling and error printing has been significantly revamped. We now use sbufs to do string formatting instead of using printfs (for the kernel) and snprintf/strncat (for userland) as before. There is a new catchall error printing routine, cam_error_print() and its string-based counterpart, cam_error_string() that allow the kernel and userland applications to pass in a CCB and have errors printed out properly, whether or not they're SCSI errors. Among other things, this helped eliminate a fair amount of duplicate code in camcontrol. We now print out more information than before, including the CAM status and SCSI status and the error recovery action taken to remedy the problem. - sbufs are now available in userland, via libsbuf. This change was necessary since most of the error printing code is shared between libcam and the kernel. - A new transfer settings interface is included in this checkin. This code is #ifdef'ed out, and is primarily intended to aid discussion with HBA driver authors on the final form the interface should take. There is example code in the ahc(4) driver that implements the HBA driver side of the new interface. The new transfer settings code won't be enabled until we're ready to switch all HBA drivers over to the new interface. src/Makefile.inc1, lib/Makefile: Add libsbuf. It must be built before libcam, since libcam uses sbuf routines. libcam/Makefile: libcam now depends on libsbuf. libsbuf/Makefile: Add a makefile for libsbuf. This pulls in the sbuf sources from sys/kern. bsd.libnames.mk: Add LIBSBUF. camcontrol/Makefile: Add -lsbuf. Since camcontrol is statically linked, we can't depend on the dynamic linker to pull in libsbuf. camcontrol.c: Use cam_error_print() instead of checking for CAM_SCSI_STATUS_ERROR on every failed CCB. sbuf.9: Change the prototypes for sbuf_cat() and sbuf_cpy() so that the source string is now a const char *. This is more in line wth the standard system string functions, and helps eliminate warnings when dealing with a const source buffer. Fix a typo. cam.c: Add description strings for the various CAM error status values, as well as routines to look up those strings. Add new cam_error_string() and cam_error_print() routines for userland and the kernel. cam.h: Add a new CAM flag, CAM_RETRY_SELTO. Add enumerated types for the various options available with cam_error_print() and cam_error_string(). cam_ccb.h: Add new transfer negotiation structures/types. Change inq_len in the ccb_getdev structure to be "reserved". This field has never been filled in, and will be removed when we next bump the CAM version. cam_debug.h: Fix typo. cam_periph.c: Modularize cam_periph_error(). The SCSI error handling part of cam_periph_error() is now in camperiphscsistatuserror() and camperiphscsisenseerror(). In cam_periph_lock(), increase the reference count on the periph while we wait for our lock attempt to succeed so that the periph won't go away while we're sleeping. cam_xpt.c: Add new transfer negotiation code. (ifdefed out) Add a new function, xpt_path_string(). This is a string/sbuf analog to xpt_print_path(). scsi_all.c: Revamp string handing and error printing code. We now use sbufs for much of the string formatting code. More of that code is shared between userland the kernel. scsi_all.h: Get rid of SS_TURSTART, it wasn't terribly useful in the first place. Add a new error action, SS_REQSENSE. (Send a request sense and then retry the command.) This is useful when the controller hasn't performed autosense for some reason. Change the default actions around a bit. scsi_cd.c, scsi_da.c, scsi_pt.c, scsi_ses.c: SF_RETRY_SELTO -> CAM_RETRY_SELTO. Selection timeouts shouldn't be covered by a sense flag. scsi_pass.[ch]: SF_RETRY_SELTO -> CAM_RETRY_SELTO. Get rid of the last vestiges of a read/write interface. libkern/bsearch.c, sys/libkern.h, conf/files: Add bsearch.c, which is needed for some of the new table lookup routines. aic7xxx_freebsd.c: Define AHC_NEW_TRAN_SETTINGS if CAM_NEW_TRAN_CODE is defined. sbuf.h, subr_sbuf.c: Add the appropriate #ifdefs so sbufs can compile and run in userland. Change sbuf_printf() to use vsnprintf() instead of kvprintf(), which is only available in the kernel. Change the source string for sbuf_cpy() and sbuf_cat() to be a const char *. Add __BEGIN_DECLS and __END_DECLS around function prototypes since they're now exported to userland. kdump/mkioctls: Include stdio.h before cam.h since cam.h now includes a function with a FILE * argument. Submitted by: gibbs (mostly) Reviewed by: jdp, marcel (libsbuf makefile changes) Reviewed by: des (sbuf changes) Reviewed by: ken
2001-03-27 05:45:52 +00:00
error = pterror(done_ccb, CAM_RETRY_SELTO, sf);
if (error == ERESTART) {
/*
* A retry was scheuled, so
* just return.
*/
return;
}
if (error != 0) {
if (error == ENXIO) {
/*
* Catastrophic error. Mark our device
* as invalid.
*/
xpt_print(periph->path,
"Invalidating device\n");
softc->flags |= PT_FLAG_DEVICE_INVALID;
}
/*
* return all queued I/O with EIO, so that
* the client can retry these I/Os in the
* proper order should it attempt to recover.
*/
bioq_flush(&softc->bio_queue, NULL, EIO);
bp->bio_error = error;
bp->bio_resid = bp->bio_bcount;
bp->bio_flags |= BIO_ERROR;
} else {
bp->bio_resid = csio->resid;
bp->bio_error = 0;
if (bp->bio_resid != 0) {
/* Short transfer ??? */
bp->bio_flags |= BIO_ERROR;
}
}
if ((done_ccb->ccb_h.status & CAM_DEV_QFRZN) != 0)
cam_release_devq(done_ccb->ccb_h.path,
/*relsim_flags*/0,
/*reduction*/0,
/*timeout*/0,
/*getcount_only*/0);
} else {
bp->bio_resid = csio->resid;
if (bp->bio_resid != 0)
bp->bio_flags |= BIO_ERROR;
}
/*
* Block out any asynchronous callbacks
* while we touch the pending ccb list.
*/
LIST_REMOVE(&done_ccb->ccb_h, periph_links.le);
biofinish(bp, softc->device_stats, 0);
break;
}
}
xpt_release_ccb(done_ccb);
}
static int
pterror(union ccb *ccb, u_int32_t cam_flags, u_int32_t sense_flags)
{
struct pt_softc *softc;
struct cam_periph *periph;
periph = xpt_path_periph(ccb->ccb_h.path);
softc = (struct pt_softc *)periph->softc;
return(cam_periph_error(ccb, cam_flags, sense_flags,
&softc->saved_ccb));
}
static int
ptioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *td)
{
struct cam_periph *periph;
struct pt_softc *softc;
int error = 0;
periph = (struct cam_periph *)dev->si_drv1;
if (periph == NULL)
return(ENXIO);
softc = (struct pt_softc *)periph->softc;
cam_periph_lock(periph);
switch(cmd) {
case PTIOCGETTIMEOUT:
if (softc->io_timeout >= 1000)
*(int *)addr = softc->io_timeout / 1000;
else
*(int *)addr = 0;
break;
case PTIOCSETTIMEOUT:
if (*(int *)addr < 1) {
error = EINVAL;
break;
}
softc->io_timeout = *(int *)addr * 1000;
break;
default:
error = cam_periph_ioctl(periph, cmd, addr, pterror);
break;
}
cam_periph_unlock(periph);
return(error);
}
void
scsi_send_receive(struct ccb_scsiio *csio, u_int32_t retries,
void (*cbfcnp)(struct cam_periph *, union ccb *),
u_int tag_action, int readop, u_int byte2,
u_int32_t xfer_len, u_int8_t *data_ptr, u_int8_t sense_len,
u_int32_t timeout)
{
struct scsi_send_receive *scsi_cmd;
scsi_cmd = (struct scsi_send_receive *)&csio->cdb_io.cdb_bytes;
scsi_cmd->opcode = readop ? RECEIVE : SEND;
scsi_cmd->byte2 = byte2;
scsi_ulto3b(xfer_len, scsi_cmd->xfer_len);
scsi_cmd->control = 0;
cam_fill_csio(csio,
retries,
cbfcnp,
/*flags*/readop ? CAM_DIR_IN : CAM_DIR_OUT,
tag_action,
data_ptr,
xfer_len,
sense_len,
sizeof(*scsi_cmd),
timeout);
}