lib/idxd: rotate portal offset with each submission
Allows for better performance by not hitting the same portal address with every submission. Signed-off-by: paul luse <paul.e.luse@intel.com> Signed-off-by: Ziye Yang <ziye.yang@intel.com> Change-Id: I1ec8eae6f3acec9e98161029cd5406ec08603aa6 Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/8190 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com> Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com> Community-CI: Mellanox Build Bot
This commit is contained in:
parent
320ab72fb5
commit
b4f302e525
@ -47,7 +47,6 @@
|
||||
#define ALIGN_4K 0x1000
|
||||
#define USERSPACE_DRIVER_NAME "user"
|
||||
#define KERNEL_DRIVER_NAME "kernel"
|
||||
#define CHAN_PER_DEVICE(total_wq_size) ((total_wq_size >= 128) ? 8 : 4)
|
||||
/*
|
||||
* Need to limit how many completions we reap in one poller to avoid starving
|
||||
* other threads as callers can submit new operations on the polling thread.
|
||||
@ -82,6 +81,15 @@ struct device_config g_dev_cfg1 = {
|
||||
.total_engines = 4,
|
||||
};
|
||||
|
||||
static inline void
|
||||
_submit_to_hw(struct spdk_idxd_io_channel *chan, struct idxd_hw_desc *desc)
|
||||
{
|
||||
movdir64b(chan->portal + chan->portal_offset, desc);
|
||||
chan->portal_offset = (chan->portal_offset + chan->idxd->chan_per_device * PORTAL_STRIDE) &
|
||||
PORTAL_MASK;
|
||||
|
||||
}
|
||||
|
||||
struct spdk_idxd_io_channel *
|
||||
spdk_idxd_get_channel(struct spdk_idxd_device *idxd)
|
||||
{
|
||||
@ -103,13 +111,17 @@ spdk_idxd_get_channel(struct spdk_idxd_device *idxd)
|
||||
}
|
||||
|
||||
pthread_mutex_lock(&idxd->num_channels_lock);
|
||||
if (idxd->num_channels == CHAN_PER_DEVICE(idxd->total_wq_size)) {
|
||||
if (idxd->num_channels == idxd->chan_per_device) {
|
||||
/* too many channels sharing this device */
|
||||
pthread_mutex_unlock(&idxd->num_channels_lock);
|
||||
free(chan->batch_base);
|
||||
free(chan);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Have each channel start at a different offset. */
|
||||
chan->portal_offset = (idxd->num_channels * PORTAL_STRIDE) & PORTAL_MASK;
|
||||
|
||||
idxd->num_channels++;
|
||||
pthread_mutex_unlock(&idxd->num_channels_lock);
|
||||
|
||||
@ -153,7 +165,7 @@ spdk_idxd_put_channel(struct spdk_idxd_io_channel *chan)
|
||||
int
|
||||
spdk_idxd_chan_get_max_operations(struct spdk_idxd_io_channel *chan)
|
||||
{
|
||||
return chan->idxd->total_wq_size / CHAN_PER_DEVICE(chan->idxd->total_wq_size);
|
||||
return chan->idxd->total_wq_size / chan->idxd->chan_per_device;
|
||||
}
|
||||
|
||||
int
|
||||
@ -168,8 +180,7 @@ spdk_idxd_configure_chan(struct spdk_idxd_io_channel *chan)
|
||||
chan->idxd->wq_id = 0;
|
||||
}
|
||||
|
||||
num_ring_slots = chan->idxd->queues[chan->idxd->wq_id].wqcfg.wq_size / CHAN_PER_DEVICE(
|
||||
chan->idxd->total_wq_size);
|
||||
num_ring_slots = chan->idxd->queues[chan->idxd->wq_id].wqcfg.wq_size / chan->idxd->chan_per_device;
|
||||
|
||||
chan->ring_slots = spdk_bit_array_create(num_ring_slots);
|
||||
if (chan->ring_slots == NULL) {
|
||||
@ -419,7 +430,7 @@ spdk_idxd_submit_copy(struct spdk_idxd_io_channel *chan, void *dst, const void *
|
||||
desc->flags |= IDXD_FLAG_CACHE_CONTROL; /* direct IO to CPU cache instead of mem */
|
||||
|
||||
/* Submit operation. */
|
||||
movdir64b(chan->portal, desc);
|
||||
_submit_to_hw(chan, desc);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -469,7 +480,7 @@ spdk_idxd_submit_dualcast(struct spdk_idxd_io_channel *chan, void *dst1, void *d
|
||||
desc->flags |= IDXD_FLAG_CACHE_CONTROL; /* direct IO to CPU cache instead of mem */
|
||||
|
||||
/* Submit operation. */
|
||||
movdir64b(chan->portal, desc);
|
||||
_submit_to_hw(chan, desc);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -506,7 +517,7 @@ spdk_idxd_submit_compare(struct spdk_idxd_io_channel *chan, void *src1, const vo
|
||||
desc->xfer_size = nbytes;
|
||||
|
||||
/* Submit operation. */
|
||||
movdir64b(chan->portal, desc);
|
||||
_submit_to_hw(chan, desc);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -539,7 +550,7 @@ spdk_idxd_submit_fill(struct spdk_idxd_io_channel *chan, void *dst, uint64_t fil
|
||||
desc->flags |= IDXD_FLAG_CACHE_CONTROL; /* direct IO to CPU cache instead of mem */
|
||||
|
||||
/* Submit operation. */
|
||||
movdir64b(chan->portal, desc);
|
||||
_submit_to_hw(chan, desc);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -575,7 +586,7 @@ spdk_idxd_submit_crc32c(struct spdk_idxd_io_channel *chan, uint32_t *crc_dst, vo
|
||||
comp->crc_dst = crc_dst;
|
||||
|
||||
/* Submit operation. */
|
||||
movdir64b(chan->portal, desc);
|
||||
_submit_to_hw(chan, desc);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -616,7 +627,7 @@ spdk_idxd_submit_copy_crc32c(struct spdk_idxd_io_channel *chan, void *dst, void
|
||||
comp->crc_dst = crc_dst;
|
||||
|
||||
/* Submit operation. */
|
||||
movdir64b(chan->portal, desc);
|
||||
_submit_to_hw(chan, desc);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -742,7 +753,7 @@ spdk_idxd_batch_submit(struct spdk_idxd_io_channel *chan, struct idxd_batch *bat
|
||||
batch->remaining++;
|
||||
|
||||
/* Submit operation. */
|
||||
movdir64b(chan->portal, desc);
|
||||
_submit_to_hw(chan, desc);
|
||||
SPDK_DEBUGLOG(idxd, "Submitted batch %p\n", batch);
|
||||
|
||||
return 0;
|
||||
|
@ -102,6 +102,7 @@ struct spdk_idxd_io_channel {
|
||||
struct spdk_idxd_device *idxd;
|
||||
/* The portal is the address that we write descriptors to for submission. */
|
||||
void *portal;
|
||||
uint32_t portal_offset;
|
||||
uint16_t ring_size;
|
||||
|
||||
/*
|
||||
@ -191,6 +192,7 @@ struct spdk_idxd_device {
|
||||
int wq_id;
|
||||
uint32_t num_channels;
|
||||
uint32_t total_wq_size;
|
||||
uint32_t chan_per_device;
|
||||
pthread_mutex_t num_channels_lock;
|
||||
|
||||
struct idxd_group *groups;
|
||||
|
@ -210,6 +210,10 @@ config_wqs(struct spdk_kernel_idxd_device *kernel_idxd,
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Spread the channels we allow per device based on the total number of WQE to try
|
||||
* and achieve optimal performance for common cases.
|
||||
*/
|
||||
kernel_idxd->idxd.chan_per_device = (kernel_idxd->idxd.total_wq_size >= 128) ? 8 : 4;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -48,7 +48,10 @@ extern "C" {
|
||||
|
||||
#define IDXD_MMIO_BAR 0
|
||||
#define IDXD_WQ_BAR 2
|
||||
#define PORTAL_SIZE (4096 * 4)
|
||||
#define PORTAL_SIZE 0x1000
|
||||
#define WQ_TOTAL_PORTAL_SIZE (PORTAL_SIZE * 4)
|
||||
#define PORTAL_STRIDE 0x40
|
||||
#define PORTAL_MASK (PORTAL_SIZE - 1)
|
||||
|
||||
#define CFG_ENGINE_OFFSET 0x20
|
||||
#define CFG_FLAG_OFFSET 0x28
|
||||
|
@ -274,6 +274,10 @@ idxd_wq_config(struct spdk_user_idxd_device *user_idxd)
|
||||
assert(LOG2_WQ_MAX_XFER <= user_idxd->registers.gencap.max_xfer_shift);
|
||||
|
||||
idxd->total_wq_size = user_idxd->registers.wqcap.total_wq_size;
|
||||
/* Spread the channels we allow per device based on the total number of WQE to try
|
||||
* and achieve optimal performance for common cases.
|
||||
*/
|
||||
idxd->chan_per_device = (idxd->total_wq_size >= 128) ? 8 : 4;
|
||||
idxd->queues = calloc(1, user_idxd->registers.wqcap.num_wqs * sizeof(struct idxd_wq));
|
||||
if (idxd->queues == NULL) {
|
||||
SPDK_ERRLOG("Failed to allocate queue memory\n");
|
||||
@ -519,7 +523,7 @@ user_idxd_dump_sw_err(struct spdk_idxd_device *idxd, void *portal)
|
||||
static char *
|
||||
user_idxd_portal_get_addr(struct spdk_idxd_device *idxd)
|
||||
{
|
||||
return (char *)idxd->portals + idxd->wq_id * PORTAL_SIZE;
|
||||
return (char *)idxd->portals + idxd->wq_id * WQ_TOTAL_PORTAL_SIZE;
|
||||
}
|
||||
|
||||
static bool
|
||||
|
Loading…
Reference in New Issue
Block a user