diff --git a/sys/cam/cam_ccb.h b/sys/cam/cam_ccb.h index 4b9b8e6c8829..6286c43f50eb 100644 --- a/sys/cam/cam_ccb.h +++ b/sys/cam/cam_ccb.h @@ -41,6 +41,7 @@ #include #include #include +#include /* General allocation length definitions for CCB structures */ #define IOCDBLEN CAM_MAX_CDBLEN /* Space for CDB bytes/pointer */ @@ -265,6 +266,7 @@ typedef enum { PROTO_ATAPI, /* AT Attachment Packetized Interface */ PROTO_SATAPM, /* SATA Port Multiplier */ PROTO_SEMB, /* SATA Enclosure Management Bridge */ + PROTO_NVME, /* NVME */ } cam_proto; typedef enum { @@ -280,6 +282,7 @@ typedef enum { XPORT_SATA, /* Serial AT Attachment */ XPORT_ISCSI, /* iSCSI */ XPORT_SRP, /* SCSI RDMA Protocol */ + XPORT_NVME, /* NVMe over PCIe */ } cam_xport; #define XPORT_IS_ATA(t) ((t) == XPORT_ATA || (t) == XPORT_SATA) @@ -782,6 +785,19 @@ struct ccb_relsim { u_int32_t qfrozen_cnt; }; +/* + * NVMe I/O Request CCB used for the XPT_NVME_IO function code. + */ +struct ccb_nvmeio { + struct ccb_hdr ccb_h; + union ccb *next_ccb; /* Ptr for next CCB for action */ + struct nvme_command cmd; /* NVME command, per NVME standard */ + struct nvme_completion cpl; /* NVME completion, per NVME standard */ + uint8_t *data_ptr; /* Ptr to the data buf/SG list */ + uint32_t dxfer_len; /* Data transfer length */ + uint32_t resid; /* Transfer residual length: 2's comp unused ?*/ +}; + /* * Definitions for the asynchronous callback CCB fields. */ @@ -1234,6 +1250,7 @@ union ccb { struct ccb_ataio ataio; struct ccb_dev_advinfo cdai; struct ccb_async casync; + struct ccb_nvmeio nvmeio; }; #define CCB_CLEAR_ALL_EXCEPT_HDR(ccbp) \ @@ -1249,6 +1266,12 @@ cam_fill_csio(struct ccb_scsiio *csio, u_int32_t retries, u_int8_t sense_len, u_int8_t cdb_len, u_int32_t timeout); +static __inline void +cam_fill_nvmeio(struct ccb_nvmeio *nvmeio, u_int32_t retries, + void (*cbfcnp)(struct cam_periph *, union ccb *), + u_int32_t flags, u_int8_t *data_ptr, u_int32_t dxfer_len, + u_int32_t timeout); + static __inline void cam_fill_ctio(struct ccb_scsiio *csio, u_int32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb *), @@ -1370,6 +1393,20 @@ cam_ccb_status(union ccb *ccb) void cam_calc_geometry(struct ccb_calc_geometry *ccg, int extended); +static __inline void +cam_fill_nvmeio(struct ccb_nvmeio *nvmeio, u_int32_t retries, + void (*cbfcnp)(struct cam_periph *, union ccb *), + u_int32_t flags, u_int8_t *data_ptr, u_int32_t dxfer_len, + u_int32_t timeout) +{ + nvmeio->ccb_h.func_code = XPT_NVME_IO; + nvmeio->ccb_h.flags = flags; + nvmeio->ccb_h.retry_count = retries; + nvmeio->ccb_h.cbfcnp = cbfcnp; + nvmeio->ccb_h.timeout = timeout; + nvmeio->data_ptr = data_ptr; + nvmeio->dxfer_len = dxfer_len; +} __END_DECLS #endif /* _CAM_CAM_CCB_H */ diff --git a/sys/cam/cam_xpt_internal.h b/sys/cam/cam_xpt_internal.h index 23d6d34cff79..b0624af88db5 100644 --- a/sys/cam/cam_xpt_internal.h +++ b/sys/cam/cam_xpt_internal.h @@ -117,6 +117,8 @@ struct cam_ed { STAILQ_ENTRY(cam_ed) highpowerq_entry; struct mtx device_mtx; struct task device_destroy_task; + const struct nvme_controller_data *nvme_cdata; + const struct nvme_namespace_data *nvme_data; }; /* @@ -167,6 +169,7 @@ struct cam_path { struct xpt_xport * scsi_get_xport(void); struct xpt_xport * ata_get_xport(void); +struct xpt_xport * nvme_get_xport(void); struct cam_ed * xpt_alloc_device(struct cam_eb *bus, struct cam_et *target, diff --git a/sys/cam/nvme/nvme_all.c b/sys/cam/nvme/nvme_all.c new file mode 100644 index 000000000000..3904891b3301 --- /dev/null +++ b/sys/cam/nvme/nvme_all.c @@ -0,0 +1,124 @@ +/*- + * Copyright (c) 2015 Netflix, Inc + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer, + * without modification, immediately at the beginning of the file. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include + +#ifdef _KERNEL +#include "opt_scsi.h" + +#include +#include +#include +#include +#else +#include +#include +#include +#include +#ifndef min +#define min(a,b) (((a)<(b))?(a):(b)) +#endif +#endif + +#include +#include +#include +#include +#include +#include +#include + +void +nvme_ns_cmd(struct ccb_nvmeio *nvmeio, uint8_t cmd, uint32_t nsid, + uint32_t cdw10, uint32_t cdw11, uint32_t cdw12, uint32_t cdw13, + uint32_t cdw14, uint32_t cdw15) +{ + bzero(&nvmeio->cmd, sizeof(struct nvme_command)); + nvmeio->cmd.opc = cmd; + nvmeio->cmd.nsid = nsid; + nvmeio->cmd.cdw10 = cdw10; + nvmeio->cmd.cdw11 = cdw11; + nvmeio->cmd.cdw12 = cdw12; + nvmeio->cmd.cdw13 = cdw13; + nvmeio->cmd.cdw14 = cdw14; + nvmeio->cmd.cdw15 = cdw15; +} + +int +nvme_identify_match(caddr_t identbuffer, caddr_t table_entry) +{ + return 0; +} + + +void +nvme_print_ident(const struct nvme_controller_data *cdata, + const struct nvme_namespace_data *data) +{ + printf("I'm a pretty NVME drive\n"); +} + +/* XXX need to do nvme admin opcodes too, but those aren't used yet by nda */ +static const char * +nvme_opc2str[] = { + "FLUSH", + "WRITE", + "READ", + "RSVD-3", + "WRITE_UNCORRECTABLE", + "COMPARE", + "RSVD-6", + "RSVD-7", + "DATASET_MANAGEMENT" +}; + +const char * +nvme_op_string(const struct nvme_command *cmd) +{ + if (cmd->opc > nitems(nvme_opc2str)) + return "UNKNOWN"; + + return nvme_opc2str[cmd->opc]; +} + +const char * +nvme_cmd_string(const struct nvme_command *cmd, char *cmd_string, size_t len) +{ + /* + * cid, rsvd areas and mptr not printed, since they are used + * only internally by the SIM. + */ + snprintf(cmd_string, len, + "opc=%x fuse=%x nsid=%x prp1=%llx prp2=%llx cdw=%x %x %x %x %x %x", + cmd->opc, cmd->fuse, cmd->nsid, + (unsigned long long)cmd->prp1, (unsigned long long)cmd->prp2, + cmd->cdw10, cmd->cdw11, cmd->cdw12, cmd->cdw13, cmd->cdw14, cmd->cdw15); + + return cmd_string; +} diff --git a/sys/cam/nvme/nvme_all.h b/sys/cam/nvme/nvme_all.h new file mode 100644 index 000000000000..3cff74d32864 --- /dev/null +++ b/sys/cam/nvme/nvme_all.h @@ -0,0 +1,48 @@ +/*- + * Copyright (c) 2015 Netflix, Inc + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer, + * without modification, immediately at the beginning of the file. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef CAM_NVME_NVME_ALL_H +#define CAM_NVME_NVME_ALL_H 1 + +#include + +struct ccb_nvmeio; + +#define NVME_REV_1 1 /* Supports NVMe 1.2 or earlier */ + +void nvme_ns_cmd(struct ccb_nvmeio *nvmeio, uint8_t cmd, uint32_t nsid, + uint32_t cdw10, uint32_t cdw11, uint32_t cdw12, uint32_t cdw13, + uint32_t cdw14, uint32_t cdw15); + +int nvme_identify_match(caddr_t identbuffer, caddr_t table_entry); + +void nvme_print_ident(const struct nvme_controller_data *, const struct nvme_namespace_data *); +const char *nvme_op_string(const struct nvme_command *); +const char *nvme_cmd_string(const struct nvme_command *, char *, size_t); + +#endif /* CAM_NVME_NVME_ALL_H */ diff --git a/sys/cam/nvme/nvme_da.c b/sys/cam/nvme/nvme_da.c new file mode 100644 index 000000000000..9628530edb20 --- /dev/null +++ b/sys/cam/nvme/nvme_da.c @@ -0,0 +1,1152 @@ +/*- + * Copyright (c) 2015 Netflix, Inc + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer, + * without modification, immediately at the beginning of the file. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Derived from ata_da.c: + * Copyright (c) 2009 Alexander Motin + */ + +#include +__FBSDID("$FreeBSD$"); + +#include + +#ifdef _KERNEL +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#endif /* _KERNEL */ + +#ifndef _KERNEL +#include +#include +#endif /* _KERNEL */ + +#include +#include +#include +#include +#include +#include + +#include + +typedef enum { + NDA_STATE_NORMAL +} nda_state; + +typedef enum { + NDA_FLAG_OPEN = 0x0001, + NDA_FLAG_DIRTY = 0x0002, + NDA_FLAG_SCTX_INIT = 0x0004, +} nda_flags; + +typedef enum { + NDA_Q_4K = 0x01, + NDA_Q_NONE = 0x00, +} nda_quirks; + +#define NDA_Q_BIT_STRING \ + "\020" \ + "\001Bit 0" + +typedef enum { + NDA_CCB_BUFFER_IO = 0x01, + NDA_CCB_DUMP = 0x02, + NDA_CCB_TRIM = 0x03, + NDA_CCB_TYPE_MASK = 0x0F, +} nda_ccb_state; + +/* Offsets into our private area for storing information */ +#define ccb_state ppriv_field0 +#define ccb_bp ppriv_ptr1 + +struct trim_request { + TAILQ_HEAD(, bio) bps; +}; +struct nda_softc { + struct cam_iosched_softc *cam_iosched; + int outstanding_cmds; /* Number of active commands */ + int refcount; /* Active xpt_action() calls */ + nda_state state; + nda_flags flags; + nda_quirks quirks; + int unmappedio; + uint32_t nsid; /* Namespace ID for this nda device */ + struct disk *disk; + struct task sysctl_task; + struct sysctl_ctx_list sysctl_ctx; + struct sysctl_oid *sysctl_tree; + struct trim_request trim_req; +#ifdef CAM_IO_STATS + struct sysctl_ctx_list sysctl_stats_ctx; + struct sysctl_oid *sysctl_stats_tree; + u_int timeouts; + u_int errors; + u_int invalidations; +#endif +}; + +/* Need quirk table */ + +static disk_strategy_t ndastrategy; +static dumper_t ndadump; +static periph_init_t ndainit; +static void ndaasync(void *callback_arg, u_int32_t code, + struct cam_path *path, void *arg); +static void ndasysctlinit(void *context, int pending); +static periph_ctor_t ndaregister; +static periph_dtor_t ndacleanup; +static periph_start_t ndastart; +static periph_oninv_t ndaoninvalidate; +static void ndadone(struct cam_periph *periph, + union ccb *done_ccb); +static int ndaerror(union ccb *ccb, u_int32_t cam_flags, + u_int32_t sense_flags); +static void ndashutdown(void *arg, int howto); +static void ndasuspend(void *arg); + +#ifndef NDA_DEFAULT_SEND_ORDERED +#define NDA_DEFAULT_SEND_ORDERED 1 +#endif +#ifndef NDA_DEFAULT_TIMEOUT +#define NDA_DEFAULT_TIMEOUT 30 /* Timeout in seconds */ +#endif +#ifndef NDA_DEFAULT_RETRY +#define NDA_DEFAULT_RETRY 4 +#endif + + +//static int nda_retry_count = NDA_DEFAULT_RETRY; +static int nda_send_ordered = NDA_DEFAULT_SEND_ORDERED; +static int nda_default_timeout = NDA_DEFAULT_TIMEOUT; + +/* + * All NVMe media is non-rotational, so all nvme device instances + * share this to implement the sysctl. + */ +static int nda_rotating_media = 0; + +static SYSCTL_NODE(_kern_cam, OID_AUTO, nda, CTLFLAG_RD, 0, + "CAM Direct Access Disk driver"); + +static struct periph_driver ndadriver = +{ + ndainit, "nda", + TAILQ_HEAD_INITIALIZER(ndadriver.units), /* generation */ 0 +}; + +PERIPHDRIVER_DECLARE(nda, ndadriver); + +static MALLOC_DEFINE(M_NVMEDA, "nvme_da", "nvme_da buffers"); + +/* + * nice wrappers. Maybe these belong in nvme_all.c instead of + * here, but this is the only place that uses these. Should + * we ever grow another NVME periph, we should move them + * all there wholesale. + */ + +static void +nda_nvme_flush(struct nda_softc *softc, struct ccb_nvmeio *nvmeio) +{ + cam_fill_nvmeio(nvmeio, + 0, /* retries */ + ndadone, /* cbfcnp */ + CAM_DIR_NONE, /* flags */ + NULL, /* data_ptr */ + 0, /* dxfer_len */ + nda_default_timeout * 1000); /* timeout 5s */ + nvme_ns_flush_cmd(&nvmeio->cmd, softc->nsid); +} + +static void +nda_nvme_trim(struct nda_softc *softc, struct ccb_nvmeio *nvmeio, + void *payload, uint32_t num_ranges) +{ + cam_fill_nvmeio(nvmeio, + 0, /* retries */ + ndadone, /* cbfcnp */ + CAM_DIR_OUT, /* flags */ + payload, /* data_ptr */ + num_ranges * sizeof(struct nvme_dsm_range), /* dxfer_len */ + nda_default_timeout * 1000); /* timeout 5s */ + nvme_ns_trim_cmd(&nvmeio->cmd, softc->nsid, num_ranges); +} + +static void +nda_nvme_write(struct nda_softc *softc, struct ccb_nvmeio *nvmeio, + void *payload, uint64_t lba, uint32_t len, uint32_t count) +{ + cam_fill_nvmeio(nvmeio, + 0, /* retries */ + ndadone, /* cbfcnp */ + CAM_DIR_OUT, /* flags */ + payload, /* data_ptr */ + len, /* dxfer_len */ + nda_default_timeout * 1000); /* timeout 5s */ + nvme_ns_write_cmd(&nvmeio->cmd, softc->nsid, lba, count); +} + +static void +nda_nvme_rw_bio(struct nda_softc *softc, struct ccb_nvmeio *nvmeio, + struct bio *bp, uint32_t rwcmd) +{ + int flags = rwcmd == NVME_OPC_READ ? CAM_DIR_IN : CAM_DIR_OUT; + void *payload; + uint64_t lba; + uint32_t count; + + if (bp->bio_flags & BIO_UNMAPPED) { + flags |= CAM_DATA_BIO; + payload = bp; + } else { + payload = bp->bio_data; + } + + lba = bp->bio_pblkno; + count = bp->bio_bcount / softc->disk->d_sectorsize; + + cam_fill_nvmeio(nvmeio, + 0, /* retries */ + ndadone, /* cbfcnp */ + flags, /* flags */ + payload, /* data_ptr */ + bp->bio_bcount, /* dxfer_len */ + nda_default_timeout * 1000); /* timeout 5s */ + nvme_ns_rw_cmd(&nvmeio->cmd, rwcmd, softc->nsid, lba, count); +} + +static int +ndaopen(struct disk *dp) +{ + struct cam_periph *periph; + struct nda_softc *softc; + int error; + + periph = (struct cam_periph *)dp->d_drv1; + if (cam_periph_acquire(periph) != CAM_REQ_CMP) { + return(ENXIO); + } + + cam_periph_lock(periph); + if ((error = cam_periph_hold(periph, PRIBIO|PCATCH)) != 0) { + cam_periph_unlock(periph); + cam_periph_release(periph); + return (error); + } + + CAM_DEBUG(periph->path, CAM_DEBUG_TRACE | CAM_DEBUG_PERIPH, + ("ndaopen\n")); + + softc = (struct nda_softc *)periph->softc; + softc->flags |= NDA_FLAG_OPEN; + + cam_periph_unhold(periph); + cam_periph_unlock(periph); + return (0); +} + +static int +ndaclose(struct disk *dp) +{ + struct cam_periph *periph; + struct nda_softc *softc; + union ccb *ccb; + int error; + + periph = (struct cam_periph *)dp->d_drv1; + softc = (struct nda_softc *)periph->softc; + cam_periph_lock(periph); + + CAM_DEBUG(periph->path, CAM_DEBUG_TRACE | CAM_DEBUG_PERIPH, + ("ndaclose\n")); + + if ((softc->flags & NDA_FLAG_DIRTY) != 0 && + (periph->flags & CAM_PERIPH_INVALID) == 0 && + cam_periph_hold(periph, PRIBIO) == 0) { + + ccb = cam_periph_getccb(periph, CAM_PRIORITY_NORMAL); + nda_nvme_flush(softc, &ccb->nvmeio); + error = cam_periph_runccb(ccb, ndaerror, /*cam_flags*/0, + /*sense_flags*/0, softc->disk->d_devstat); + + if (error != 0) + xpt_print(periph->path, "Synchronize cache failed\n"); + else + softc->flags &= ~NDA_FLAG_DIRTY; + xpt_release_ccb(ccb); + cam_periph_unhold(periph); + } + + softc->flags &= ~NDA_FLAG_OPEN; + + while (softc->refcount != 0) + cam_periph_sleep(periph, &softc->refcount, PRIBIO, "ndaclose", 1); + cam_periph_unlock(periph); + cam_periph_release(periph); + return (0); +} + +static void +ndaschedule(struct cam_periph *periph) +{ + struct nda_softc *softc = (struct nda_softc *)periph->softc; + + if (softc->state != NDA_STATE_NORMAL) + return; + + cam_iosched_schedule(softc->cam_iosched, periph); +} + +/* + * Actually translate the requested transfer into one the physical driver + * can understand. The transfer is described by a buf and will include + * only one physical transfer. + */ +static void +ndastrategy(struct bio *bp) +{ + struct cam_periph *periph; + struct nda_softc *softc; + + periph = (struct cam_periph *)bp->bio_disk->d_drv1; + softc = (struct nda_softc *)periph->softc; + + cam_periph_lock(periph); + + CAM_DEBUG(periph->path, CAM_DEBUG_TRACE, ("ndastrategy(%p)\n", bp)); + + /* + * If the device has been made invalid, error out + */ + if ((periph->flags & CAM_PERIPH_INVALID) != 0) { + cam_periph_unlock(periph); + biofinish(bp, NULL, ENXIO); + return; + } + + /* + * Place it in the queue of disk activities for this disk + */ + cam_iosched_queue_work(softc->cam_iosched, bp); + + /* + * Schedule ourselves for performing the work. + */ + ndaschedule(periph); + cam_periph_unlock(periph); + + return; +} + +static int +ndadump(void *arg, void *virtual, vm_offset_t physical, off_t offset, size_t length) +{ + struct cam_periph *periph; + struct nda_softc *softc; + u_int secsize; + union ccb ccb; + struct disk *dp; + uint64_t lba; + uint32_t count; + int error = 0; + + dp = arg; + periph = dp->d_drv1; + softc = (struct nda_softc *)periph->softc; + cam_periph_lock(periph); + secsize = softc->disk->d_sectorsize; + lba = offset / secsize; + count = length / secsize; + + if ((periph->flags & CAM_PERIPH_INVALID) != 0) { + cam_periph_unlock(periph); + return (ENXIO); + } + + if (length > 0) { + xpt_setup_ccb(&ccb.ccb_h, periph->path, CAM_PRIORITY_NORMAL); + ccb.ccb_h.ccb_state = NDA_CCB_DUMP; + nda_nvme_write(softc, &ccb.nvmeio, virtual, lba, length, count); + xpt_polled_action(&ccb); + + error = cam_periph_error(&ccb, + 0, SF_NO_RECOVERY | SF_NO_RETRY, NULL); + if ((ccb.ccb_h.status & CAM_DEV_QFRZN) != 0) + cam_release_devq(ccb.ccb_h.path, /*relsim_flags*/0, + /*reduction*/0, /*timeout*/0, /*getcount_only*/0); + if (error != 0) + printf("Aborting dump due to I/O error.\n"); + + cam_periph_unlock(periph); + return (error); + } + + /* Flush */ + xpt_setup_ccb(&ccb.ccb_h, periph->path, CAM_PRIORITY_NORMAL); + + ccb.ccb_h.ccb_state = NDA_CCB_DUMP; + nda_nvme_flush(softc, &ccb.nvmeio); + xpt_polled_action(&ccb); + + error = cam_periph_error(&ccb, + 0, SF_NO_RECOVERY | SF_NO_RETRY, NULL); + if ((ccb.ccb_h.status & CAM_DEV_QFRZN) != 0) + cam_release_devq(ccb.ccb_h.path, /*relsim_flags*/0, + /*reduction*/0, /*timeout*/0, /*getcount_only*/0); + if (error != 0) + xpt_print(periph->path, "flush cmd failed\n"); + cam_periph_unlock(periph); + return (error); +} + +static void +ndainit(void) +{ + cam_status status; + + /* + * Install a global async callback. This callback will + * receive async callbacks like "new device found". + */ + status = xpt_register_async(AC_FOUND_DEVICE, ndaasync, NULL, NULL); + + if (status != CAM_REQ_CMP) { + printf("nda: Failed to attach master async callback " + "due to status 0x%x!\n", status); + } else if (nda_send_ordered) { + + /* Register our event handlers */ + if ((EVENTHANDLER_REGISTER(power_suspend, ndasuspend, + NULL, EVENTHANDLER_PRI_LAST)) == NULL) + printf("ndainit: power event registration failed!\n"); + if ((EVENTHANDLER_REGISTER(shutdown_post_sync, ndashutdown, + NULL, SHUTDOWN_PRI_DEFAULT)) == NULL) + printf("ndainit: shutdown event registration failed!\n"); + } +} + +/* + * Callback from GEOM, called when it has finished cleaning up its + * resources. + */ +static void +ndadiskgonecb(struct disk *dp) +{ + struct cam_periph *periph; + + periph = (struct cam_periph *)dp->d_drv1; + + cam_periph_release(periph); +} + +static void +ndaoninvalidate(struct cam_periph *periph) +{ + struct nda_softc *softc; + + softc = (struct nda_softc *)periph->softc; + + /* + * De-register any async callbacks. + */ + xpt_register_async(0, ndaasync, periph, periph->path); +#ifdef CAM_IO_STATS + softc->invalidations++; +#endif + + /* + * Return all queued I/O with ENXIO. + * XXX Handle any transactions queued to the card + * with XPT_ABORT_CCB. + */ + cam_iosched_flush(softc->cam_iosched, NULL, ENXIO); + + disk_gone(softc->disk); +} + +static void +ndacleanup(struct cam_periph *periph) +{ + struct nda_softc *softc; + + softc = (struct nda_softc *)periph->softc; + + cam_periph_unlock(periph); + + cam_iosched_fini(softc->cam_iosched); + + /* + * If we can't free the sysctl tree, oh well... + */ + if ((softc->flags & NDA_FLAG_SCTX_INIT) != 0) { +#ifdef CAM_IO_STATS + if (sysctl_ctx_free(&softc->sysctl_stats_ctx) != 0) + xpt_print(periph->path, + "can't remove sysctl stats context\n"); +#endif + if (sysctl_ctx_free(&softc->sysctl_ctx) != 0) + xpt_print(periph->path, + "can't remove sysctl context\n"); + } + + disk_destroy(softc->disk); + free(softc, M_DEVBUF); + cam_periph_lock(periph); +} + +static void +ndaasync(void *callback_arg, u_int32_t code, + struct cam_path *path, void *arg) +{ + struct cam_periph *periph; + + periph = (struct cam_periph *)callback_arg; + switch (code) { + case AC_FOUND_DEVICE: + { + struct ccb_getdev *cgd; + cam_status status; + + cgd = (struct ccb_getdev *)arg; + if (cgd == NULL) + break; + + if (cgd->protocol != PROTO_NVME) + break; + + /* + * Allocate a peripheral instance for + * this device and start the probe + * process. + */ + status = cam_periph_alloc(ndaregister, ndaoninvalidate, + ndacleanup, ndastart, + "nda", CAM_PERIPH_BIO, + path, ndaasync, + AC_FOUND_DEVICE, cgd); + + if (status != CAM_REQ_CMP + && status != CAM_REQ_INPROG) + printf("ndaasync: Unable to attach to new device " + "due to status 0x%x\n", status); + break; + } + case AC_ADVINFO_CHANGED: + { + uintptr_t buftype; + + buftype = (uintptr_t)arg; + if (buftype == CDAI_TYPE_PHYS_PATH) { + struct nda_softc *softc; + + softc = periph->softc; + disk_attr_changed(softc->disk, "GEOM::physpath", + M_NOWAIT); + } + break; + } + case AC_LOST_DEVICE: + default: + cam_periph_async(periph, code, path, arg); + break; + } +} + +static void +ndasysctlinit(void *context, int pending) +{ + struct cam_periph *periph; + struct nda_softc *softc; + char tmpstr[80], tmpstr2[80]; + + periph = (struct cam_periph *)context; + + /* periph was held for us when this task was enqueued */ + if ((periph->flags & CAM_PERIPH_INVALID) != 0) { + cam_periph_release(periph); + return; + } + + softc = (struct nda_softc *)periph->softc; + snprintf(tmpstr, sizeof(tmpstr), "CAM NDA unit %d", periph->unit_number); + snprintf(tmpstr2, sizeof(tmpstr2), "%d", periph->unit_number); + + sysctl_ctx_init(&softc->sysctl_ctx); + softc->flags |= NDA_FLAG_SCTX_INIT; + softc->sysctl_tree = SYSCTL_ADD_NODE(&softc->sysctl_ctx, + SYSCTL_STATIC_CHILDREN(_kern_cam_nda), OID_AUTO, tmpstr2, + CTLFLAG_RD, 0, tmpstr); + if (softc->sysctl_tree == NULL) { + printf("ndasysctlinit: unable to allocate sysctl tree\n"); + cam_periph_release(periph); + return; + } + + SYSCTL_ADD_INT(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree), + OID_AUTO, "unmapped_io", CTLFLAG_RD | CTLFLAG_MPSAFE, + &softc->unmappedio, 0, "Unmapped I/O leaf"); + + SYSCTL_ADD_INT(&softc->sysctl_ctx, + SYSCTL_CHILDREN(softc->sysctl_tree), + OID_AUTO, + "rotating", + CTLFLAG_RD | CTLFLAG_MPSAFE, + &nda_rotating_media, + 0, + "Rotating media"); + +#ifdef CAM_IO_STATS + softc->sysctl_stats_tree = SYSCTL_ADD_NODE(&softc->sysctl_stats_ctx, + SYSCTL_CHILDREN(softc->sysctl_tree), OID_AUTO, "stats", + CTLFLAG_RD, 0, "Statistics"); + if (softc->sysctl_stats_tree == NULL) { + printf("ndasysctlinit: unable to allocate sysctl tree for stats\n"); + cam_periph_release(periph); + return; + } + SYSCTL_ADD_INT(&softc->sysctl_stats_ctx, + SYSCTL_CHILDREN(softc->sysctl_stats_tree), + OID_AUTO, "timeouts", CTLFLAG_RD | CTLFLAG_MPSAFE, + &softc->timeouts, 0, + "Device timeouts reported by the SIM"); + SYSCTL_ADD_INT(&softc->sysctl_stats_ctx, + SYSCTL_CHILDREN(softc->sysctl_stats_tree), + OID_AUTO, "errors", CTLFLAG_RD | CTLFLAG_MPSAFE, + &softc->errors, 0, + "Transport errors reported by the SIM."); + SYSCTL_ADD_INT(&softc->sysctl_stats_ctx, + SYSCTL_CHILDREN(softc->sysctl_stats_tree), + OID_AUTO, "pack_invalidations", CTLFLAG_RD | CTLFLAG_MPSAFE, + &softc->invalidations, 0, + "Device pack invalidations."); +#endif + + cam_iosched_sysctl_init(softc->cam_iosched, &softc->sysctl_ctx, + softc->sysctl_tree); + + cam_periph_release(periph); +} + +static int +ndagetattr(struct bio *bp) +{ + int ret; + struct cam_periph *periph; + + periph = (struct cam_periph *)bp->bio_disk->d_drv1; + cam_periph_lock(periph); + ret = xpt_getattr(bp->bio_data, bp->bio_length, bp->bio_attribute, + periph->path); + cam_periph_unlock(periph); + if (ret == 0) + bp->bio_completed = bp->bio_length; + return ret; +} + +static cam_status +ndaregister(struct cam_periph *periph, void *arg) +{ + struct nda_softc *softc; + struct disk *disk; + struct ccb_pathinq cpi; + struct ccb_getdev *cgd; + const struct nvme_namespace_data *nsd; + const struct nvme_controller_data *cd; + char announce_buf[80]; +// caddr_t match; + u_int maxio; + int quirks; + + cgd = (struct ccb_getdev *)arg; + if (cgd == NULL) { + printf("ndaregister: no getdev CCB, can't register device\n"); + return(CAM_REQ_CMP_ERR); + } + nsd = cgd->nvme_data; + cd = cgd->nvme_cdata; + + softc = (struct nda_softc *)malloc(sizeof(*softc), M_DEVBUF, + M_NOWAIT | M_ZERO); + + if (softc == NULL) { + printf("ndaregister: Unable to probe new device. " + "Unable to allocate softc\n"); + return(CAM_REQ_CMP_ERR); + } + + if (cam_iosched_init(&softc->cam_iosched, periph) != 0) { + printf("ndaregister: Unable to probe new device. " + "Unable to allocate iosched memory\n"); + return(CAM_REQ_CMP_ERR); + } + + /* ident_data parsing */ + + periph->softc = softc; + +#if 0 + /* + * See if this device has any quirks. + */ + match = cam_quirkmatch((caddr_t)&cgd->ident_data, + (caddr_t)nda_quirk_table, + sizeof(nda_quirk_table)/sizeof(*nda_quirk_table), + sizeof(*nda_quirk_table), ata_identify_match); + if (match != NULL) + softc->quirks = ((struct nda_quirk_entry *)match)->quirks; + else +#endif + softc->quirks = NDA_Q_NONE; + + bzero(&cpi, sizeof(cpi)); + xpt_setup_ccb(&cpi.ccb_h, periph->path, CAM_PRIORITY_NONE); + cpi.ccb_h.func_code = XPT_PATH_INQ; + xpt_action((union ccb *)&cpi); + + TASK_INIT(&softc->sysctl_task, 0, ndasysctlinit, periph); + + /* + * The name space ID is the lun, save it for later I/O + */ + softc->nsid = (uint16_t)xpt_path_lun_id(periph->path); + + /* + * Register this media as a disk + */ + (void)cam_periph_hold(periph, PRIBIO); + cam_periph_unlock(periph); + snprintf(announce_buf, sizeof(announce_buf), + "kern.cam.nda.%d.quirks", periph->unit_number); + quirks = softc->quirks; + TUNABLE_INT_FETCH(announce_buf, &quirks); + softc->quirks = quirks; + cam_iosched_set_sort_queue(softc->cam_iosched, 0); + softc->disk = disk = disk_alloc(); + strlcpy(softc->disk->d_descr, cd->mn, + MIN(sizeof(softc->disk->d_descr), sizeof(cd->mn))); + strlcpy(softc->disk->d_ident, cd->sn, + MIN(sizeof(softc->disk->d_ident), sizeof(cd->sn))); + disk->d_rotation_rate = 0; /* Spinning rust need not apply */ + disk->d_open = ndaopen; + disk->d_close = ndaclose; + disk->d_strategy = ndastrategy; + disk->d_getattr = ndagetattr; + disk->d_dump = ndadump; + disk->d_gone = ndadiskgonecb; + disk->d_name = "nda"; + disk->d_drv1 = periph; + disk->d_unit = periph->unit_number; + maxio = cpi.maxio; /* Honor max I/O size of SIM */ + if (maxio == 0) + maxio = DFLTPHYS; /* traditional default */ + else if (maxio > MAXPHYS) + maxio = MAXPHYS; /* for safety */ + disk->d_maxsize = maxio; + disk->d_sectorsize = 1 << nsd->lbaf[nsd->flbas.format].lbads; + disk->d_mediasize = (off_t)(disk->d_sectorsize * nsd->nsze); + disk->d_delmaxsize = disk->d_mediasize; + disk->d_flags = DISKFLAG_DIRECT_COMPLETION; +// if (cd->oncs.dsm) // XXX broken? + disk->d_flags |= DISKFLAG_CANDELETE; + if (cd->vwc.present) + disk->d_flags |= DISKFLAG_CANFLUSHCACHE; + if ((cpi.hba_misc & PIM_UNMAPPED) != 0) { + disk->d_flags |= DISKFLAG_UNMAPPED_BIO; + softc->unmappedio = 1; + } + /* + * d_ident and d_descr are both far bigger than the length of either + * the serial or model number strings. + */ + nvme_strvis(disk->d_descr, cd->mn, + sizeof(disk->d_descr), NVME_MODEL_NUMBER_LENGTH); + nvme_strvis(disk->d_ident, cd->sn, + sizeof(disk->d_ident), NVME_SERIAL_NUMBER_LENGTH); + disk->d_hba_vendor = cpi.hba_vendor; + disk->d_hba_device = cpi.hba_device; + disk->d_hba_subvendor = cpi.hba_subvendor; + disk->d_hba_subdevice = cpi.hba_subdevice; + disk->d_stripesize = disk->d_sectorsize; + disk->d_stripeoffset = 0; + disk->d_devstat = devstat_new_entry(periph->periph_name, + periph->unit_number, disk->d_sectorsize, + DEVSTAT_ALL_SUPPORTED, + DEVSTAT_TYPE_DIRECT | XPORT_DEVSTAT_TYPE(cpi.transport), + DEVSTAT_PRIORITY_DISK); + + /* + * Acquire a reference to the periph before we register with GEOM. + * We'll release this reference once GEOM calls us back (via + * ndadiskgonecb()) telling us that our provider has been freed. + */ + if (cam_periph_acquire(periph) != CAM_REQ_CMP) { + xpt_print(periph->path, "%s: lost periph during " + "registration!\n", __func__); + cam_periph_lock(periph); + return (CAM_REQ_CMP_ERR); + } + disk_create(softc->disk, DISK_VERSION); + cam_periph_lock(periph); + cam_periph_unhold(periph); + + snprintf(announce_buf, sizeof(announce_buf), + "%juMB (%ju %u byte sectors)", + (uintmax_t)((uintmax_t)disk->d_mediasize / (1024*1024)), + (uintmax_t)disk->d_mediasize / disk->d_sectorsize, + disk->d_sectorsize); + xpt_announce_periph(periph, announce_buf); + xpt_announce_quirks(periph, softc->quirks, NDA_Q_BIT_STRING); + + /* + * Create our sysctl variables, now that we know + * we have successfully attached. + */ + if (cam_periph_acquire(periph) == CAM_REQ_CMP) + taskqueue_enqueue(taskqueue_thread, &softc->sysctl_task); + + /* + * Register for device going away and info about the drive + * changing (though with NVMe, it can't) + */ + xpt_register_async(AC_LOST_DEVICE | AC_ADVINFO_CHANGED, + ndaasync, periph, periph->path); + + softc->state = NDA_STATE_NORMAL; + return(CAM_REQ_CMP); +} + +static void +ndastart(struct cam_periph *periph, union ccb *start_ccb) +{ + struct nda_softc *softc = (struct nda_softc *)periph->softc; + struct ccb_nvmeio *nvmeio = &start_ccb->nvmeio; + + CAM_DEBUG(periph->path, CAM_DEBUG_TRACE, ("ndastart\n")); + + switch (softc->state) { + case NDA_STATE_NORMAL: + { + struct bio *bp; + + bp = cam_iosched_next_bio(softc->cam_iosched); + CAM_DEBUG(periph->path, CAM_DEBUG_TRACE, ("ndastart: bio %p\n", bp)); + if (bp == NULL) { + xpt_release_ccb(start_ccb); + break; + } + + switch (bp->bio_cmd) { + case BIO_WRITE: + softc->flags |= NDA_FLAG_DIRTY; + /* FALLTHROUGH */ + case BIO_READ: + { +#ifdef NDA_TEST_FAILURE + int fail = 0; + + /* + * Support the failure ioctls. If the command is a + * read, and there are pending forced read errors, or + * if a write and pending write errors, then fail this + * operation with EIO. This is useful for testing + * purposes. Also, support having every Nth read fail. + * + * This is a rather blunt tool. + */ + if (bp->bio_cmd == BIO_READ) { + if (softc->force_read_error) { + softc->force_read_error--; + fail = 1; + } + if (softc->periodic_read_error > 0) { + if (++softc->periodic_read_count >= + softc->periodic_read_error) { + softc->periodic_read_count = 0; + fail = 1; + } + } + } else { + if (softc->force_write_error) { + softc->force_write_error--; + fail = 1; + } + } + if (fail) { + biofinish(bp, NULL, EIO); + xpt_release_ccb(start_ccb); + ndaschedule(periph); + return; + } +#endif + KASSERT((bp->bio_flags & BIO_UNMAPPED) == 0 || + round_page(bp->bio_bcount + bp->bio_ma_offset) / + PAGE_SIZE == bp->bio_ma_n, + ("Short bio %p", bp)); + nda_nvme_rw_bio(softc, &start_ccb->nvmeio, bp, bp->bio_cmd == BIO_READ ? + NVME_OPC_READ : NVME_OPC_WRITE); + break; + } + case BIO_DELETE: + { + struct nvme_dsm_range *dsm_range; + + dsm_range = + malloc(sizeof(*dsm_range), M_NVMEDA, M_ZERO | M_WAITOK); + dsm_range->length = + bp->bio_bcount / softc->disk->d_sectorsize; + dsm_range->starting_lba = + bp->bio_offset / softc->disk->d_sectorsize; + bp->bio_driver2 = dsm_range; + nda_nvme_trim(softc, &start_ccb->nvmeio, dsm_range, 1); + start_ccb->ccb_h.ccb_state = NDA_CCB_TRIM; + start_ccb->ccb_h.flags |= CAM_UNLOCKED; + cam_iosched_submit_trim(softc->cam_iosched); /* XXX */ + goto out; + } + case BIO_FLUSH: + nda_nvme_flush(softc, nvmeio); + break; + } + start_ccb->ccb_h.ccb_state = NDA_CCB_BUFFER_IO; + start_ccb->ccb_h.flags |= CAM_UNLOCKED; +out: + start_ccb->ccb_h.ccb_bp = bp; + softc->outstanding_cmds++; + softc->refcount++; + cam_periph_unlock(periph); + xpt_action(start_ccb); + cam_periph_lock(periph); + softc->refcount--; + + /* May have more work to do, so ensure we stay scheduled */ + ndaschedule(periph); + break; + } + } +} + +static void +ndadone(struct cam_periph *periph, union ccb *done_ccb) +{ + struct nda_softc *softc; + struct ccb_nvmeio *nvmeio = &done_ccb->nvmeio; + struct cam_path *path; + int state; + + softc = (struct nda_softc *)periph->softc; + path = done_ccb->ccb_h.path; + + CAM_DEBUG(path, CAM_DEBUG_TRACE, ("ndadone\n")); + + state = nvmeio->ccb_h.ccb_state & NDA_CCB_TYPE_MASK; + switch (state) { + case NDA_CCB_BUFFER_IO: + case NDA_CCB_TRIM: + { + struct bio *bp; + int error; + + cam_periph_lock(periph); + bp = (struct bio *)done_ccb->ccb_h.ccb_bp; + if ((done_ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) { + error = ndaerror(done_ccb, 0, 0); + if (error == ERESTART) { + /* A retry was scheduled, so just return. */ + cam_periph_unlock(periph); + return; + } + if ((done_ccb->ccb_h.status & CAM_DEV_QFRZN) != 0) + cam_release_devq(path, + /*relsim_flags*/0, + /*reduction*/0, + /*timeout*/0, + /*getcount_only*/0); + } else { + if ((done_ccb->ccb_h.status & CAM_DEV_QFRZN) != 0) + panic("REQ_CMP with QFRZN"); + error = 0; + } + bp->bio_error = error; + if (error != 0) { + bp->bio_resid = bp->bio_bcount; + bp->bio_flags |= BIO_ERROR; + } else { + if (state == NDA_CCB_TRIM) + bp->bio_resid = 0; + else + bp->bio_resid = nvmeio->resid; + if (bp->bio_resid > 0) + bp->bio_flags |= BIO_ERROR; + } + if (state == NDA_CCB_TRIM) + free(bp->bio_driver2, M_NVMEDA); + softc->outstanding_cmds--; + + cam_iosched_bio_complete(softc->cam_iosched, bp, done_ccb); + xpt_release_ccb(done_ccb); + if (state == NDA_CCB_TRIM) { +#ifdef notyet + TAILQ_HEAD(, bio) queue; + struct bio *bp1; + + TAILQ_INIT(&queue); + TAILQ_CONCAT(&queue, &softc->trim_req.bps, bio_queue); +#endif + cam_iosched_trim_done(softc->cam_iosched); + ndaschedule(periph); + cam_periph_unlock(periph); +#ifdef notyet +/* Not yet collapsing several BIO_DELETE requests into one TRIM */ + while ((bp1 = TAILQ_FIRST(&queue)) != NULL) { + TAILQ_REMOVE(&queue, bp1, bio_queue); + bp1->bio_error = error; + if (error != 0) { + bp1->bio_flags |= BIO_ERROR; + bp1->bio_resid = bp1->bio_bcount; + } else + bp1->bio_resid = 0; + biodone(bp1); + } +#else + biodone(bp); +#endif + } else { + ndaschedule(periph); + cam_periph_unlock(periph); + biodone(bp); + } + return; + } + case NDA_CCB_DUMP: + /* No-op. We're polling */ + return; + default: + break; + } + xpt_release_ccb(done_ccb); +} + +static int +ndaerror(union ccb *ccb, u_int32_t cam_flags, u_int32_t sense_flags) +{ + struct nda_softc *softc; + struct cam_periph *periph; + + periph = xpt_path_periph(ccb->ccb_h.path); + softc = (struct nda_softc *)periph->softc; + + switch (ccb->ccb_h.status & CAM_STATUS_MASK) { + case CAM_CMD_TIMEOUT: +#ifdef CAM_IO_STATS + softc->timeouts++; +#endif + break; + case CAM_REQ_ABORTED: + case CAM_REQ_CMP_ERR: + case CAM_REQ_TERMIO: + case CAM_UNREC_HBA_ERROR: + case CAM_DATA_RUN_ERR: + case CAM_ATA_STATUS_ERROR: +#ifdef CAM_IO_STATS + softc->errors++; +#endif + break; + default: + break; + } + + return(cam_periph_error(ccb, cam_flags, sense_flags, NULL)); +} + +/* + * Step through all NDA peripheral drivers, and if the device is still open, + * sync the disk cache to physical media. + */ +static void +ndaflush(void) +{ + struct cam_periph *periph; + struct nda_softc *softc; + union ccb *ccb; + int error; + + CAM_PERIPH_FOREACH(periph, &ndadriver) { + softc = (struct nda_softc *)periph->softc; + if (SCHEDULER_STOPPED()) { + /* If we paniced with the lock held, do not recurse. */ + if (!cam_periph_owned(periph) && + (softc->flags & NDA_FLAG_OPEN)) { + ndadump(softc->disk, NULL, 0, 0, 0); + } + continue; + } + cam_periph_lock(periph); + /* + * We only sync the cache if the drive is still open, and + * if the drive is capable of it.. + */ + if ((softc->flags & NDA_FLAG_OPEN) == 0) { + cam_periph_unlock(periph); + continue; + } + + ccb = cam_periph_getccb(periph, CAM_PRIORITY_NORMAL); + nda_nvme_flush(softc, &ccb->nvmeio); + error = cam_periph_runccb(ccb, ndaerror, /*cam_flags*/0, + /*sense_flags*/ SF_NO_RECOVERY | SF_NO_RETRY, + softc->disk->d_devstat); + if (error != 0) + xpt_print(periph->path, "Synchronize cache failed\n"); + xpt_release_ccb(ccb); + cam_periph_unlock(periph); + } +} + +static void +ndashutdown(void *arg, int howto) +{ + + ndaflush(); +} + +static void +ndasuspend(void *arg) +{ + + ndaflush(); +} diff --git a/sys/cam/nvme/nvme_xpt.c b/sys/cam/nvme/nvme_xpt.c new file mode 100644 index 000000000000..9139e29f7bff --- /dev/null +++ b/sys/cam/nvme/nvme_xpt.c @@ -0,0 +1,605 @@ +/*- + * Copyright (c) 2015 Netflix, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer, + * without modification, immediately at the beginning of the file. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * derived from ata_xpt.c: Copyright (c) 2009 Alexander Motin + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include /* for xpt_print below */ +#include "opt_cam.h" + +struct nvme_quirk_entry { + u_int quirks; +#define CAM_QUIRK_MAXTAGS 1 + u_int mintags; + u_int maxtags; +}; + +/* Not even sure why we need this */ +static periph_init_t nvme_probe_periph_init; + +static struct periph_driver nvme_probe_driver = +{ + nvme_probe_periph_init, "nvme_probe", + TAILQ_HEAD_INITIALIZER(nvme_probe_driver.units), /* generation */ 0, + CAM_PERIPH_DRV_EARLY +}; + +PERIPHDRIVER_DECLARE(nvme_probe, nvme_probe_driver); + +typedef enum { + NVME_PROBE_IDENTIFY, + NVME_PROBE_DONE, + NVME_PROBE_INVALID, + NVME_PROBE_RESET +} nvme_probe_action; + +static char *nvme_probe_action_text[] = { + "NVME_PROBE_IDENTIFY", + "NVME_PROBE_DONE", + "NVME_PROBE_INVALID", + "NVME_PROBE_RESET", +}; + +#define NVME_PROBE_SET_ACTION(softc, newaction) \ +do { \ + char **text; \ + text = nvme_probe_action_text; \ + CAM_DEBUG((softc)->periph->path, CAM_DEBUG_PROBE, \ + ("Probe %s to %s\n", text[(softc)->action], \ + text[(newaction)])); \ + (softc)->action = (newaction); \ +} while(0) + +typedef enum { + NVME_PROBE_NO_ANNOUNCE = 0x04 +} nvme_probe_flags; + +typedef struct { + TAILQ_HEAD(, ccb_hdr) request_ccbs; + nvme_probe_action action; + nvme_probe_flags flags; + int restart; + struct cam_periph *periph; +} nvme_probe_softc; + +static struct nvme_quirk_entry nvme_quirk_table[] = +{ + { +// { +// T_ANY, SIP_MEDIA_REMOVABLE|SIP_MEDIA_FIXED, +// /*vendor*/"*", /*product*/"*", /*revision*/"*" +// }, + .quirks = 0, .mintags = 0, .maxtags = 0 + }, +}; + +static const int nvme_quirk_table_size = + sizeof(nvme_quirk_table) / sizeof(*nvme_quirk_table); + +static cam_status nvme_probe_register(struct cam_periph *periph, + void *arg); +static void nvme_probe_schedule(struct cam_periph *nvme_probe_periph); +static void nvme_probe_start(struct cam_periph *periph, union ccb *start_ccb); +static void nvme_probe_cleanup(struct cam_periph *periph); +static void nvme_find_quirk(struct cam_ed *device); +static void nvme_scan_lun(struct cam_periph *periph, + struct cam_path *path, cam_flags flags, + union ccb *ccb); +static struct cam_ed * + nvme_alloc_device(struct cam_eb *bus, struct cam_et *target, + lun_id_t lun_id); +static void nvme_device_transport(struct cam_path *path); +static void nvme_dev_async(u_int32_t async_code, + struct cam_eb *bus, + struct cam_et *target, + struct cam_ed *device, + void *async_arg); +static void nvme_action(union ccb *start_ccb); +static void nvme_announce_periph(struct cam_periph *periph); + +static struct xpt_xport nvme_xport = { + .alloc_device = nvme_alloc_device, + .action = nvme_action, + .async = nvme_dev_async, + .announce = nvme_announce_periph, +}; + +struct xpt_xport * +nvme_get_xport(void) +{ + return (&nvme_xport); +} + +static void +nvme_probe_periph_init() +{ + printf("nvme cam probe device init\n"); +} + +static cam_status +nvme_probe_register(struct cam_periph *periph, void *arg) +{ + union ccb *request_ccb; /* CCB representing the probe request */ + cam_status status; + nvme_probe_softc *softc; + + request_ccb = (union ccb *)arg; + if (request_ccb == NULL) { + printf("nvme_probe_register: no probe CCB, " + "can't register device\n"); + return(CAM_REQ_CMP_ERR); + } + + softc = (nvme_probe_softc *)malloc(sizeof(*softc), M_CAMXPT, M_ZERO | M_NOWAIT); + + if (softc == NULL) { + printf("nvme_probe_register: Unable to probe new device. " + "Unable to allocate softc\n"); + return(CAM_REQ_CMP_ERR); + } + TAILQ_INIT(&softc->request_ccbs); + TAILQ_INSERT_TAIL(&softc->request_ccbs, &request_ccb->ccb_h, + periph_links.tqe); + softc->flags = 0; + periph->softc = softc; + softc->periph = periph; + softc->action = NVME_PROBE_INVALID; + status = cam_periph_acquire(periph); + if (status != CAM_REQ_CMP) { + return (status); + } + CAM_DEBUG(periph->path, CAM_DEBUG_PROBE, ("Probe started\n")); + +// nvme_device_transport(periph->path); + nvme_probe_schedule(periph); + + return(CAM_REQ_CMP); +} + +static void +nvme_probe_schedule(struct cam_periph *periph) +{ + union ccb *ccb; + nvme_probe_softc *softc; + + softc = (nvme_probe_softc *)periph->softc; + ccb = (union ccb *)TAILQ_FIRST(&softc->request_ccbs); + + NVME_PROBE_SET_ACTION(softc, NVME_PROBE_IDENTIFY); + + if (ccb->crcn.flags & CAM_EXPECT_INQ_CHANGE) + softc->flags |= NVME_PROBE_NO_ANNOUNCE; + else + softc->flags &= ~NVME_PROBE_NO_ANNOUNCE; + + xpt_schedule(periph, CAM_PRIORITY_XPT); +} + +static void +nvme_probe_start(struct cam_periph *periph, union ccb *start_ccb) +{ + struct ccb_nvmeio *nvmeio; + struct ccb_scsiio *csio; + nvme_probe_softc *softc; + struct cam_path *path; + const struct nvme_namespace_data *nvme_data; + lun_id_t lun; + + CAM_DEBUG(start_ccb->ccb_h.path, CAM_DEBUG_TRACE, ("nvme_probe_start\n")); + + softc = (nvme_probe_softc *)periph->softc; + path = start_ccb->ccb_h.path; + nvmeio = &start_ccb->nvmeio; + csio = &start_ccb->csio; + nvme_data = periph->path->device->nvme_data; + + if (softc->restart) { + softc->restart = 0; + if (periph->path->device->flags & CAM_DEV_UNCONFIGURED) + NVME_PROBE_SET_ACTION(softc, NVME_PROBE_RESET); + else + NVME_PROBE_SET_ACTION(softc, NVME_PROBE_IDENTIFY); + } + + /* + * Other transports have to ask their SIM to do a lot of action. + * NVMe doesn't, so don't do the dance. Just do things + * directly. + */ + switch (softc->action) { + case NVME_PROBE_RESET: + /* FALLTHROUGH */ + case NVME_PROBE_IDENTIFY: + nvme_device_transport(path); + /* + * Test for lun == CAM_LUN_WILDCARD is lame, but + * appears to be necessary here. XXX + */ + lun = xpt_path_lun_id(periph->path); + if (lun == CAM_LUN_WILDCARD || + periph->path->device->flags & CAM_DEV_UNCONFIGURED) { + path->device->flags &= ~CAM_DEV_UNCONFIGURED; + xpt_acquire_device(path->device); + start_ccb->ccb_h.func_code = XPT_GDEV_TYPE; + xpt_action(start_ccb); + xpt_async(AC_FOUND_DEVICE, path, start_ccb); + } + NVME_PROBE_SET_ACTION(softc, NVME_PROBE_DONE); + break; + default: + panic("nvme_probe_start: invalid action state 0x%x\n", softc->action); + } + /* + * Probing is now done. We need to complete any lingering items + * in the queue, though there shouldn't be any. + */ + xpt_release_ccb(start_ccb); + CAM_DEBUG(periph->path, CAM_DEBUG_PROBE, ("Probe completed\n")); + while ((start_ccb = (union ccb *)TAILQ_FIRST(&softc->request_ccbs))) { + TAILQ_REMOVE(&softc->request_ccbs, + &start_ccb->ccb_h, periph_links.tqe); + start_ccb->ccb_h.status = CAM_REQ_CMP; + xpt_done(start_ccb); + } +// XXX not sure I need this +// XXX unlike other XPTs, we never freeze the queue since we have a super-simple +// XXX state machine + /* Drop freeze taken due to CAM_DEV_QFREEZE flag set. -- did we really do this? */ +// cam_release_devq(path, 0, 0, 0, FALSE); + cam_periph_invalidate(periph); + cam_periph_release_locked(periph); +} + +static void +nvme_probe_cleanup(struct cam_periph *periph) +{ + free(periph->softc, M_CAMXPT); +} + +/* XXX should be used, don't delete */ +static void +nvme_find_quirk(struct cam_ed *device) +{ + struct nvme_quirk_entry *quirk; + caddr_t match; + + match = cam_quirkmatch((caddr_t)&device->nvme_data, + (caddr_t)nvme_quirk_table, + nvme_quirk_table_size, + sizeof(*nvme_quirk_table), nvme_identify_match); + + if (match == NULL) + panic("xpt_find_quirk: device didn't match wildcard entry!!"); + + quirk = (struct nvme_quirk_entry *)match; + device->quirk = quirk; + if (quirk->quirks & CAM_QUIRK_MAXTAGS) { + device->mintags = quirk->mintags; + device->maxtags = quirk->maxtags; + } +} + +static void +nvme_scan_lun(struct cam_periph *periph, struct cam_path *path, + cam_flags flags, union ccb *request_ccb) +{ + struct ccb_pathinq cpi; + cam_status status; + struct cam_periph *old_periph; + int lock; + + CAM_DEBUG(path, CAM_DEBUG_TRACE, ("nvme_scan_lun\n")); + + xpt_setup_ccb(&cpi.ccb_h, path, CAM_PRIORITY_NONE); + cpi.ccb_h.func_code = XPT_PATH_INQ; + xpt_action((union ccb *)&cpi); + + if (cpi.ccb_h.status != CAM_REQ_CMP) { + if (request_ccb != NULL) { + request_ccb->ccb_h.status = cpi.ccb_h.status; + xpt_done(request_ccb); + } + return; + } + + if (xpt_path_lun_id(path) == CAM_LUN_WILDCARD) { + CAM_DEBUG(path, CAM_DEBUG_TRACE, ("nvme_scan_lun ignoring bus\n")); + request_ccb->ccb_h.status = CAM_REQ_CMP; /* XXX signal error ? */ + xpt_done(request_ccb); + return; + } + + lock = (xpt_path_owned(path) == 0); + if (lock) + xpt_path_lock(path); + if ((old_periph = cam_periph_find(path, "nvme_probe")) != NULL) { + if ((old_periph->flags & CAM_PERIPH_INVALID) == 0) { + nvme_probe_softc *softc; + + softc = (nvme_probe_softc *)old_periph->softc; + TAILQ_INSERT_TAIL(&softc->request_ccbs, + &request_ccb->ccb_h, periph_links.tqe); + softc->restart = 1; + CAM_DEBUG(path, CAM_DEBUG_TRACE, + ("restarting nvme_probe device\n")); + } else { + request_ccb->ccb_h.status = CAM_REQ_CMP_ERR; + CAM_DEBUG(path, CAM_DEBUG_TRACE, + ("Failing to restart nvme_probe device\n")); + xpt_done(request_ccb); + } + } else { + CAM_DEBUG(path, CAM_DEBUG_TRACE, + ("Adding nvme_probe device\n")); + status = cam_periph_alloc(nvme_probe_register, NULL, nvme_probe_cleanup, + nvme_probe_start, "nvme_probe", + CAM_PERIPH_BIO, + request_ccb->ccb_h.path, NULL, 0, + request_ccb); + + if (status != CAM_REQ_CMP) { + xpt_print(path, "xpt_scan_lun: cam_alloc_periph " + "returned an error, can't continue probe\n"); + request_ccb->ccb_h.status = status; + xpt_done(request_ccb); + } + } + if (lock) + xpt_path_unlock(path); +} + +static struct cam_ed * +nvme_alloc_device(struct cam_eb *bus, struct cam_et *target, lun_id_t lun_id) +{ + struct nvme_quirk_entry *quirk; + struct cam_ed *device; + + device = xpt_alloc_device(bus, target, lun_id); + if (device == NULL) + return (NULL); + + /* + * Take the default quirk entry until we have inquiry + * data from nvme and can determine a better quirk to use. + */ + quirk = &nvme_quirk_table[nvme_quirk_table_size - 1]; + device->quirk = (void *)quirk; + device->mintags = 0; + device->maxtags = 0; + device->inq_flags = 0; + device->queue_flags = 0; + device->device_id = NULL; /* XXX Need to set this somewhere */ + device->device_id_len = 0; + device->serial_num = NULL; /* XXX Need to set this somewhere */ + device->serial_num_len = 0; + return (device); +} + +static void +nvme_device_transport(struct cam_path *path) +{ + struct ccb_pathinq cpi; + struct ccb_trans_settings cts; + /* XXX get data from nvme namespace and other info ??? */ + + /* Get transport information from the SIM */ + xpt_setup_ccb(&cpi.ccb_h, path, CAM_PRIORITY_NONE); + cpi.ccb_h.func_code = XPT_PATH_INQ; + xpt_action((union ccb *)&cpi); + + path->device->transport = cpi.transport; + path->device->transport_version = cpi.transport_version; + + path->device->protocol = cpi.protocol; + path->device->protocol_version = cpi.protocol_version; + + /* Tell the controller what we think */ + xpt_setup_ccb(&cts.ccb_h, path, CAM_PRIORITY_NONE); + cts.ccb_h.func_code = XPT_SET_TRAN_SETTINGS; + cts.type = CTS_TYPE_CURRENT_SETTINGS; + cts.transport = path->device->transport; + cts.transport_version = path->device->transport_version; + cts.protocol = path->device->protocol; + cts.protocol_version = path->device->protocol_version; + cts.proto_specific.valid = 0; + cts.xport_specific.valid = 0; + xpt_action((union ccb *)&cts); +} + +static void +nvme_dev_advinfo(union ccb *start_ccb) +{ + struct cam_ed *device; + struct ccb_dev_advinfo *cdai; + off_t amt; + + start_ccb->ccb_h.status = CAM_REQ_INVALID; + device = start_ccb->ccb_h.path->device; + cdai = &start_ccb->cdai; + switch(cdai->buftype) { + case CDAI_TYPE_SCSI_DEVID: + if (cdai->flags & CDAI_FLAG_STORE) + return; + cdai->provsiz = device->device_id_len; + if (device->device_id_len == 0) + break; + amt = device->device_id_len; + if (cdai->provsiz > cdai->bufsiz) + amt = cdai->bufsiz; + memcpy(cdai->buf, device->device_id, amt); + break; + case CDAI_TYPE_SERIAL_NUM: + if (cdai->flags & CDAI_FLAG_STORE) + return; + cdai->provsiz = device->serial_num_len; + if (device->serial_num_len == 0) + break; + amt = device->serial_num_len; + if (cdai->provsiz > cdai->bufsiz) + amt = cdai->bufsiz; + memcpy(cdai->buf, device->serial_num, amt); + break; + case CDAI_TYPE_PHYS_PATH: + if (cdai->flags & CDAI_FLAG_STORE) { + if (device->physpath != NULL) + free(device->physpath, M_CAMXPT); + device->physpath_len = cdai->bufsiz; + /* Clear existing buffer if zero length */ + if (cdai->bufsiz == 0) + break; + device->physpath = malloc(cdai->bufsiz, M_CAMXPT, M_NOWAIT); + if (device->physpath == NULL) { + start_ccb->ccb_h.status = CAM_REQ_ABORTED; + return; + } + memcpy(device->physpath, cdai->buf, cdai->bufsiz); + } else { + cdai->provsiz = device->physpath_len; + if (device->physpath_len == 0) + break; + amt = device->physpath_len; + if (cdai->provsiz > cdai->bufsiz) + amt = cdai->bufsiz; + memcpy(cdai->buf, device->physpath, amt); + } + break; + default: + return; + } + start_ccb->ccb_h.status = CAM_REQ_CMP; + + if (cdai->flags & CDAI_FLAG_STORE) { + xpt_async(AC_ADVINFO_CHANGED, start_ccb->ccb_h.path, + (void *)(uintptr_t)cdai->buftype); + } +} + +static void +nvme_action(union ccb *start_ccb) +{ + CAM_DEBUG(start_ccb->ccb_h.path, CAM_DEBUG_TRACE, + ("nvme_action: func= %#x\n", start_ccb->ccb_h.func_code)); + + switch (start_ccb->ccb_h.func_code) { + case XPT_SCAN_BUS: + printf("NVME scan BUS started -- ignored\n"); +// break; + case XPT_SCAN_TGT: + printf("NVME scan TGT started -- ignored\n"); +// break; + case XPT_SCAN_LUN: + printf("NVME scan started\n"); + nvme_scan_lun(start_ccb->ccb_h.path->periph, + start_ccb->ccb_h.path, start_ccb->crcn.flags, + start_ccb); + break; + case XPT_DEV_ADVINFO: + nvme_dev_advinfo(start_ccb); + break; + + default: + xpt_action_default(start_ccb); + break; + } +} + +/* + * Handle any per-device event notifications that require action by the XPT. + */ +static void +nvme_dev_async(u_int32_t async_code, struct cam_eb *bus, struct cam_et *target, + struct cam_ed *device, void *async_arg) +{ + + /* + * We only need to handle events for real devices. + */ + if (target->target_id == CAM_TARGET_WILDCARD + || device->lun_id == CAM_LUN_WILDCARD) + return; + + if (async_code == AC_LOST_DEVICE && + (device->flags & CAM_DEV_UNCONFIGURED) == 0) { + device->flags |= CAM_DEV_UNCONFIGURED; + xpt_release_device(device); + } +} + +static void +nvme_announce_periph(struct cam_periph *periph) +{ + struct ccb_pathinq cpi; + struct ccb_trans_settings cts; + struct cam_path *path = periph->path; + + cam_periph_assert(periph, MA_OWNED); + + xpt_setup_ccb(&cts.ccb_h, path, CAM_PRIORITY_NORMAL); + cts.ccb_h.func_code = XPT_GET_TRAN_SETTINGS; + cts.type = CTS_TYPE_CURRENT_SETTINGS; + xpt_action((union ccb*)&cts); + if ((cts.ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) + return; + /* Ask the SIM for its base transfer speed */ + xpt_setup_ccb(&cpi.ccb_h, path, CAM_PRIORITY_NORMAL); + cpi.ccb_h.func_code = XPT_PATH_INQ; + xpt_action((union ccb *)&cpi); + /* XXX NVME STUFF HERE */ + printf("\n"); +} diff --git a/sys/conf/files b/sys/conf/files index a364a622ec15..49dca44063de 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -86,6 +86,9 @@ cam/cam_xpt.c optional scbus cam/ata/ata_all.c optional scbus cam/ata/ata_xpt.c optional scbus cam/ata/ata_pmp.c optional scbus +cam/nvme/nvme_all.c optional scbus nvme +cam/nvme/nvme_da.c optional scbus nvme da +cam/nvme/nvme_xpt.c optional scbus nvme cam/scsi/scsi_xpt.c optional scbus cam/scsi/scsi_all.c optional scbus cam/scsi/scsi_cd.c optional cd diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64 index 53c299f15536..4949d7152bd5 100644 --- a/sys/conf/files.amd64 +++ b/sys/conf/files.amd64 @@ -290,6 +290,7 @@ dev/nvme/nvme_ctrlr_cmd.c optional nvme dev/nvme/nvme_ns.c optional nvme dev/nvme/nvme_ns_cmd.c optional nvme dev/nvme/nvme_qpair.c optional nvme +dev/nvme/nvme_sim.c optional nvme scbus !nvd dev/nvme/nvme_sysctl.c optional nvme dev/nvme/nvme_test.c optional nvme dev/nvme/nvme_util.c optional nvme