Cleanup uio-related code to use struct nvme_request and
nvme_ctrlr_submit_io_request(). While here, also fix case where a uio may have more than 1 iovec. NVMe's definition of SGEs (called PRPs) only allows for the first SGE to start on a non-page boundary. The simplest way to handle this is to construct a temporary uio for each iovec, and submit an NVMe request for each. Sponsored by: Intel
This commit is contained in:
parent
c9e224f9c9
commit
9becceac55
@ -230,15 +230,11 @@ nvme_dump_completion(struct nvme_completion *cpl)
|
||||
void
|
||||
nvme_payload_map(void *arg, bus_dma_segment_t *seg, int nseg, int error)
|
||||
{
|
||||
struct nvme_tracker *tr;
|
||||
struct nvme_qpair *qpair;
|
||||
struct nvme_tracker *tr = arg;
|
||||
uint32_t cur_nseg;
|
||||
|
||||
KASSERT(error == 0, ("nvme_payload_map error != 0\n"));
|
||||
|
||||
tr = (struct nvme_tracker *)arg;
|
||||
qpair = tr->qpair;
|
||||
|
||||
/*
|
||||
* Note that we specified PAGE_SIZE for alignment and max
|
||||
* segment size when creating the bus dma tags. So here
|
||||
@ -259,7 +255,7 @@ nvme_payload_map(void *arg, bus_dma_segment_t *seg, int nseg, int error)
|
||||
}
|
||||
}
|
||||
|
||||
nvme_qpair_submit_cmd(qpair, tr);
|
||||
nvme_qpair_submit_cmd(tr->qpair, tr);
|
||||
}
|
||||
|
||||
static int
|
||||
|
@ -833,12 +833,21 @@ nvme_ctrlr_submit_io_request(struct nvme_controller *ctrlr,
|
||||
|
||||
tr->req = req;
|
||||
|
||||
if (req->payload_size > 0) {
|
||||
err = bus_dmamap_load(tr->qpair->dma_tag, tr->payload_dma_map,
|
||||
req->payload, req->payload_size,
|
||||
nvme_payload_map, tr, 0);
|
||||
if (req->uio == NULL) {
|
||||
if (req->payload_size > 0) {
|
||||
err = bus_dmamap_load(tr->qpair->dma_tag,
|
||||
tr->payload_dma_map, req->payload,
|
||||
req->payload_size,
|
||||
nvme_payload_map, tr, 0);
|
||||
if (err != 0)
|
||||
panic("bus_dmamap_load returned non-zero!\n");
|
||||
} else
|
||||
nvme_qpair_submit_cmd(tr->qpair, tr);
|
||||
} else {
|
||||
err = bus_dmamap_load_uio(tr->qpair->dma_tag,
|
||||
tr->payload_dma_map, req->uio,
|
||||
nvme_payload_map_uio, tr, 0);
|
||||
if (err != 0)
|
||||
panic("bus_dmamap_load returned non-zero!\n");
|
||||
} else
|
||||
nvme_qpair_submit_cmd(tr->qpair, tr);
|
||||
}
|
||||
}
|
||||
|
@ -101,6 +101,7 @@ struct nvme_request {
|
||||
struct nvme_command cmd;
|
||||
void *payload;
|
||||
uint32_t payload_size;
|
||||
struct uio *uio;
|
||||
nvme_cb_fn_t cb_fn;
|
||||
void *cb_arg;
|
||||
SLIST_ENTRY(nvme_request) slist;
|
||||
@ -333,6 +334,8 @@ void nvme_ctrlr_cmd_asynchronous_event_request(struct nvme_controller *ctrlr,
|
||||
|
||||
void nvme_payload_map(void *arg, bus_dma_segment_t *seg, int nseg,
|
||||
int error);
|
||||
void nvme_payload_map_uio(void *arg, bus_dma_segment_t *seg, int nseg,
|
||||
bus_size_t mapsize, int error);
|
||||
|
||||
int nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev);
|
||||
int nvme_ctrlr_reset(struct nvme_controller *ctrlr);
|
||||
@ -392,6 +395,22 @@ nvme_allocate_request(void *payload, uint32_t payload_size, nvme_cb_fn_t cb_fn,
|
||||
return (req);
|
||||
}
|
||||
|
||||
static __inline struct nvme_request *
|
||||
nvme_allocate_request_uio(struct uio *uio, nvme_cb_fn_t cb_fn, void *cb_arg)
|
||||
{
|
||||
struct nvme_request *req;
|
||||
|
||||
req = uma_zalloc(nvme_request_zone, M_NOWAIT | M_ZERO);
|
||||
if (req == NULL)
|
||||
return (NULL);
|
||||
|
||||
req->uio = uio;
|
||||
req->cb_fn = cb_fn;
|
||||
req->cb_arg = cb_arg;
|
||||
|
||||
return (req);
|
||||
}
|
||||
|
||||
#define nvme_free_request(req) uma_zfree(nvme_request_zone, req)
|
||||
|
||||
#endif /* __NVME_PRIVATE_H__ */
|
||||
|
@ -163,7 +163,7 @@ nvme_qpair_process_completions(struct nvme_qpair *qpair)
|
||||
/* nvme_qpair_submit_cmd() will release the lock. */
|
||||
nvme_qpair_submit_cmd(qpair, tr);
|
||||
else {
|
||||
if (req->payload_size > 0)
|
||||
if (req->payload_size > 0 || req->uio != NULL)
|
||||
bus_dmamap_unload(qpair->dma_tag,
|
||||
tr->payload_dma_map);
|
||||
|
||||
|
@ -38,8 +38,10 @@ static void
|
||||
nvme_uio_done(void *arg, const struct nvme_completion *status)
|
||||
{
|
||||
struct mtx *mtx;
|
||||
struct uio *uio = arg;
|
||||
|
||||
/* TODO: update uio flags based on status */
|
||||
if (status->sf_sc == 0 && status->sf_sct == 0)
|
||||
uio->uio_resid = 0;
|
||||
|
||||
mtx = mtx_pool_find(mtxpool_sleep, arg);
|
||||
mtx_lock(mtx);
|
||||
@ -47,33 +49,17 @@ nvme_uio_done(void *arg, const struct nvme_completion *status)
|
||||
mtx_unlock(mtx);
|
||||
}
|
||||
|
||||
static struct nvme_tracker *
|
||||
nvme_allocate_tracker_uio(struct nvme_controller *ctrlr, struct uio *uio,
|
||||
struct nvme_request *req)
|
||||
{
|
||||
struct nvme_tracker *tr;
|
||||
struct nvme_qpair *qpair;
|
||||
|
||||
if (ctrlr->per_cpu_io_queues)
|
||||
qpair = &ctrlr->ioq[curcpu];
|
||||
else
|
||||
qpair = &ctrlr->ioq[0];
|
||||
|
||||
tr = nvme_qpair_allocate_tracker(qpair);
|
||||
|
||||
if (tr == NULL)
|
||||
return (NULL);
|
||||
|
||||
tr->qpair = qpair;
|
||||
tr->req = req;
|
||||
|
||||
return (tr);
|
||||
}
|
||||
|
||||
static void
|
||||
void
|
||||
nvme_payload_map_uio(void *arg, bus_dma_segment_t *seg, int nseg,
|
||||
bus_size_t mapsize, int error)
|
||||
{
|
||||
struct nvme_tracker *tr = arg;
|
||||
|
||||
/*
|
||||
* Now that we know the actual size of the uio, divide it by the
|
||||
* sector size that we stored in cdw12.
|
||||
*/
|
||||
tr->req->cmd.cdw12 = (mapsize / tr->req->cmd.cdw12)-1;
|
||||
nvme_payload_map(arg, seg, nseg, error);
|
||||
}
|
||||
|
||||
@ -81,20 +67,12 @@ static int
|
||||
nvme_read_uio(struct nvme_namespace *ns, struct uio *uio)
|
||||
{
|
||||
struct nvme_request *req;
|
||||
struct nvme_tracker *tr;
|
||||
struct nvme_command *cmd;
|
||||
int err, i;
|
||||
uint64_t lba, iosize = 0;
|
||||
uint64_t lba;
|
||||
|
||||
for (i = 0; i < uio->uio_iovcnt; i++) {
|
||||
iosize += uio->uio_iov[i].iov_len;
|
||||
}
|
||||
req = nvme_allocate_request_uio(uio, nvme_uio_done, uio);
|
||||
|
||||
req = nvme_allocate_request(NULL, iosize, nvme_uio_done, uio);
|
||||
|
||||
tr = nvme_allocate_tracker_uio(ns->ctrlr, uio, req);
|
||||
|
||||
if (tr == NULL)
|
||||
if (req == NULL)
|
||||
return (ENOMEM);
|
||||
|
||||
cmd = &req->cmd;
|
||||
@ -103,13 +81,16 @@ nvme_read_uio(struct nvme_namespace *ns, struct uio *uio)
|
||||
lba = uio->uio_offset / nvme_ns_get_sector_size(ns);
|
||||
|
||||
*(uint64_t *)&cmd->cdw10 = lba;
|
||||
/*
|
||||
* Store the sector size in cdw12 (where the LBA count normally goes).
|
||||
* We'll adjust cdw12 in the map_uio callback based on the mapsize
|
||||
* parameter. This allows us to not have to store the namespace
|
||||
* in the request simply to get the sector size in the map_uio
|
||||
* callback.
|
||||
*/
|
||||
cmd->cdw12 = nvme_ns_get_sector_size(ns);
|
||||
|
||||
cmd->cdw12 = (iosize / nvme_ns_get_sector_size(ns))-1;
|
||||
|
||||
err = bus_dmamap_load_uio(tr->qpair->dma_tag, tr->payload_dma_map, uio,
|
||||
nvme_payload_map_uio, tr, 0);
|
||||
|
||||
KASSERT(err == 0, ("bus_dmamap_load_uio returned non-zero!\n"));
|
||||
nvme_ctrlr_submit_io_request(ns->ctrlr, req);
|
||||
|
||||
return (0);
|
||||
}
|
||||
@ -118,20 +99,12 @@ static int
|
||||
nvme_write_uio(struct nvme_namespace *ns, struct uio *uio)
|
||||
{
|
||||
struct nvme_request *req;
|
||||
struct nvme_tracker *tr;
|
||||
struct nvme_command *cmd;
|
||||
int err, i;
|
||||
uint64_t lba, iosize = 0;
|
||||
uint64_t lba;
|
||||
|
||||
for (i = 0; i < uio->uio_iovcnt; i++) {
|
||||
iosize += uio->uio_iov[i].iov_len;
|
||||
}
|
||||
req = nvme_allocate_request_uio(uio, nvme_uio_done, uio);
|
||||
|
||||
req = nvme_allocate_request(NULL, iosize, nvme_uio_done, uio);
|
||||
|
||||
tr = nvme_allocate_tracker_uio(ns->ctrlr, uio, req);
|
||||
|
||||
if (tr == NULL)
|
||||
if (req == NULL)
|
||||
return (ENOMEM);
|
||||
|
||||
cmd = &req->cmd;
|
||||
@ -140,13 +113,16 @@ nvme_write_uio(struct nvme_namespace *ns, struct uio *uio)
|
||||
lba = uio->uio_offset / nvme_ns_get_sector_size(ns);
|
||||
|
||||
*(uint64_t *)&cmd->cdw10 = lba;
|
||||
/*
|
||||
* Store the sector size in cdw12 (where the LBA count normally goes).
|
||||
* We'll adjust cdw12 in the map_uio callback based on the mapsize
|
||||
* parameter. This allows us to not have to store the namespace
|
||||
* in the request simply to get the sector size in the map_uio
|
||||
* callback.
|
||||
*/
|
||||
cmd->cdw12 = nvme_ns_get_sector_size(ns);
|
||||
|
||||
cmd->cdw12 = (iosize / nvme_ns_get_sector_size(ns))-1;
|
||||
|
||||
err = bus_dmamap_load_uio(tr->qpair->dma_tag, tr->payload_dma_map, uio,
|
||||
nvme_payload_map_uio, tr, 0);
|
||||
|
||||
KASSERT(err == 0, ("bus_dmamap_load_uio returned non-zero!\n"));
|
||||
nvme_ctrlr_submit_io_request(ns->ctrlr, req);
|
||||
|
||||
return (0);
|
||||
}
|
||||
@ -154,9 +130,11 @@ nvme_write_uio(struct nvme_namespace *ns, struct uio *uio)
|
||||
int
|
||||
nvme_ns_physio(struct cdev *dev, struct uio *uio, int ioflag)
|
||||
{
|
||||
struct uio uio_tmp;
|
||||
struct iovec uio_iov_tmp;
|
||||
struct nvme_namespace *ns;
|
||||
struct mtx *mtx;
|
||||
int err;
|
||||
int i, nvme_err, physio_err = 0;
|
||||
#if __FreeBSD_version > 900017
|
||||
int ref;
|
||||
#endif
|
||||
@ -164,7 +142,7 @@ nvme_ns_physio(struct cdev *dev, struct uio *uio, int ioflag)
|
||||
PHOLD(curproc);
|
||||
|
||||
ns = dev->si_drv1;
|
||||
mtx = mtx_pool_find(mtxpool_sleep, uio);
|
||||
mtx = mtx_pool_find(mtxpool_sleep, &uio_tmp);
|
||||
|
||||
#if __FreeBSD_version > 900017
|
||||
dev_refthread(dev, &ref);
|
||||
@ -172,15 +150,48 @@ nvme_ns_physio(struct cdev *dev, struct uio *uio, int ioflag)
|
||||
dev_refthread(dev);
|
||||
#endif
|
||||
|
||||
mtx_lock(mtx);
|
||||
if (uio->uio_rw == UIO_READ)
|
||||
err = nvme_read_uio(ns, uio);
|
||||
else
|
||||
err = nvme_write_uio(ns, uio);
|
||||
/*
|
||||
* NVM Express doesn't really support true SGLs. All SG elements
|
||||
* must be PAGE_SIZE, except for the first and last element.
|
||||
* Because of this, we need to break up each iovec into a separate
|
||||
* NVMe command - otherwise we could end up with sub-PAGE_SIZE
|
||||
* elements in the middle of an SGL which is not allowed.
|
||||
*/
|
||||
uio_tmp.uio_iov = &uio_iov_tmp;
|
||||
uio_tmp.uio_iovcnt = 1;
|
||||
uio_tmp.uio_offset = uio->uio_offset;
|
||||
uio_tmp.uio_segflg = uio->uio_segflg;
|
||||
uio_tmp.uio_rw = uio->uio_rw;
|
||||
uio_tmp.uio_td = uio->uio_td;
|
||||
|
||||
if (err == 0)
|
||||
msleep(uio, mtx, PRIBIO, "nvme_physio", 0);
|
||||
mtx_unlock(mtx);
|
||||
for (i = 0; i < uio->uio_iovcnt; i++) {
|
||||
|
||||
uio_iov_tmp.iov_base = uio->uio_iov[i].iov_base;
|
||||
uio_iov_tmp.iov_len = uio->uio_iov[i].iov_len;
|
||||
uio_tmp.uio_resid = uio_iov_tmp.iov_len;
|
||||
|
||||
mtx_lock(mtx);
|
||||
|
||||
if (uio->uio_rw == UIO_READ)
|
||||
nvme_err = nvme_read_uio(ns, &uio_tmp);
|
||||
else
|
||||
nvme_err = nvme_write_uio(ns, &uio_tmp);
|
||||
|
||||
if (nvme_err == 0)
|
||||
msleep(&uio_tmp, mtx, PRIBIO, "nvme_physio", 0);
|
||||
|
||||
mtx_unlock(mtx);
|
||||
|
||||
if (uio_tmp.uio_resid == 0) {
|
||||
uio->uio_resid -= uio_iov_tmp.iov_len;
|
||||
uio->uio_offset += uio_iov_tmp.iov_len;
|
||||
} else {
|
||||
physio_err = EFAULT;
|
||||
break;
|
||||
}
|
||||
|
||||
uio_tmp.uio_offset += uio_iov_tmp.iov_len;
|
||||
}
|
||||
|
||||
#if __FreeBSD_version > 900017
|
||||
dev_relthread(dev, ref);
|
||||
@ -188,9 +199,6 @@ nvme_ns_physio(struct cdev *dev, struct uio *uio, int ioflag)
|
||||
dev_relthread(dev);
|
||||
#endif
|
||||
|
||||
if (err == 0)
|
||||
uio->uio_resid = 0;
|
||||
|
||||
PRELE(curproc);
|
||||
return (0);
|
||||
return (physio_err);
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user