lib/idxd: rotate portal offset with each submission

Allows for better performance by not hitting the same portal
address with every submission.

Signed-off-by: paul luse <paul.e.luse@intel.com>
Signed-off-by: Ziye Yang <ziye.yang@intel.com>
Change-Id: I1ec8eae6f3acec9e98161029cd5406ec08603aa6
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/8190
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Community-CI: Mellanox Build Bot
This commit is contained in:
paul luse 2021-06-05 10:18:11 -04:00 committed by Jim Harris
parent 320ab72fb5
commit b4f302e525
5 changed files with 38 additions and 14 deletions

View File

@ -47,7 +47,6 @@
#define ALIGN_4K 0x1000
#define USERSPACE_DRIVER_NAME "user"
#define KERNEL_DRIVER_NAME "kernel"
#define CHAN_PER_DEVICE(total_wq_size) ((total_wq_size >= 128) ? 8 : 4)
/*
* Need to limit how many completions we reap in one poller to avoid starving
* other threads as callers can submit new operations on the polling thread.
@ -82,6 +81,15 @@ struct device_config g_dev_cfg1 = {
.total_engines = 4,
};
static inline void
_submit_to_hw(struct spdk_idxd_io_channel *chan, struct idxd_hw_desc *desc)
{
movdir64b(chan->portal + chan->portal_offset, desc);
chan->portal_offset = (chan->portal_offset + chan->idxd->chan_per_device * PORTAL_STRIDE) &
PORTAL_MASK;
}
struct spdk_idxd_io_channel *
spdk_idxd_get_channel(struct spdk_idxd_device *idxd)
{
@ -103,13 +111,17 @@ spdk_idxd_get_channel(struct spdk_idxd_device *idxd)
}
pthread_mutex_lock(&idxd->num_channels_lock);
if (idxd->num_channels == CHAN_PER_DEVICE(idxd->total_wq_size)) {
if (idxd->num_channels == idxd->chan_per_device) {
/* too many channels sharing this device */
pthread_mutex_unlock(&idxd->num_channels_lock);
free(chan->batch_base);
free(chan);
return NULL;
}
/* Have each channel start at a different offset. */
chan->portal_offset = (idxd->num_channels * PORTAL_STRIDE) & PORTAL_MASK;
idxd->num_channels++;
pthread_mutex_unlock(&idxd->num_channels_lock);
@ -153,7 +165,7 @@ spdk_idxd_put_channel(struct spdk_idxd_io_channel *chan)
int
spdk_idxd_chan_get_max_operations(struct spdk_idxd_io_channel *chan)
{
return chan->idxd->total_wq_size / CHAN_PER_DEVICE(chan->idxd->total_wq_size);
return chan->idxd->total_wq_size / chan->idxd->chan_per_device;
}
int
@ -168,8 +180,7 @@ spdk_idxd_configure_chan(struct spdk_idxd_io_channel *chan)
chan->idxd->wq_id = 0;
}
num_ring_slots = chan->idxd->queues[chan->idxd->wq_id].wqcfg.wq_size / CHAN_PER_DEVICE(
chan->idxd->total_wq_size);
num_ring_slots = chan->idxd->queues[chan->idxd->wq_id].wqcfg.wq_size / chan->idxd->chan_per_device;
chan->ring_slots = spdk_bit_array_create(num_ring_slots);
if (chan->ring_slots == NULL) {
@ -419,7 +430,7 @@ spdk_idxd_submit_copy(struct spdk_idxd_io_channel *chan, void *dst, const void *
desc->flags |= IDXD_FLAG_CACHE_CONTROL; /* direct IO to CPU cache instead of mem */
/* Submit operation. */
movdir64b(chan->portal, desc);
_submit_to_hw(chan, desc);
return 0;
}
@ -469,7 +480,7 @@ spdk_idxd_submit_dualcast(struct spdk_idxd_io_channel *chan, void *dst1, void *d
desc->flags |= IDXD_FLAG_CACHE_CONTROL; /* direct IO to CPU cache instead of mem */
/* Submit operation. */
movdir64b(chan->portal, desc);
_submit_to_hw(chan, desc);
return 0;
}
@ -506,7 +517,7 @@ spdk_idxd_submit_compare(struct spdk_idxd_io_channel *chan, void *src1, const vo
desc->xfer_size = nbytes;
/* Submit operation. */
movdir64b(chan->portal, desc);
_submit_to_hw(chan, desc);
return 0;
}
@ -539,7 +550,7 @@ spdk_idxd_submit_fill(struct spdk_idxd_io_channel *chan, void *dst, uint64_t fil
desc->flags |= IDXD_FLAG_CACHE_CONTROL; /* direct IO to CPU cache instead of mem */
/* Submit operation. */
movdir64b(chan->portal, desc);
_submit_to_hw(chan, desc);
return 0;
}
@ -575,7 +586,7 @@ spdk_idxd_submit_crc32c(struct spdk_idxd_io_channel *chan, uint32_t *crc_dst, vo
comp->crc_dst = crc_dst;
/* Submit operation. */
movdir64b(chan->portal, desc);
_submit_to_hw(chan, desc);
return 0;
}
@ -616,7 +627,7 @@ spdk_idxd_submit_copy_crc32c(struct spdk_idxd_io_channel *chan, void *dst, void
comp->crc_dst = crc_dst;
/* Submit operation. */
movdir64b(chan->portal, desc);
_submit_to_hw(chan, desc);
return 0;
}
@ -742,7 +753,7 @@ spdk_idxd_batch_submit(struct spdk_idxd_io_channel *chan, struct idxd_batch *bat
batch->remaining++;
/* Submit operation. */
movdir64b(chan->portal, desc);
_submit_to_hw(chan, desc);
SPDK_DEBUGLOG(idxd, "Submitted batch %p\n", batch);
return 0;

View File

@ -102,6 +102,7 @@ struct spdk_idxd_io_channel {
struct spdk_idxd_device *idxd;
/* The portal is the address that we write descriptors to for submission. */
void *portal;
uint32_t portal_offset;
uint16_t ring_size;
/*
@ -191,6 +192,7 @@ struct spdk_idxd_device {
int wq_id;
uint32_t num_channels;
uint32_t total_wq_size;
uint32_t chan_per_device;
pthread_mutex_t num_channels_lock;
struct idxd_group *groups;

View File

@ -210,6 +210,10 @@ config_wqs(struct spdk_kernel_idxd_device *kernel_idxd,
return -1;
}
/* Spread the channels we allow per device based on the total number of WQE to try
* and achieve optimal performance for common cases.
*/
kernel_idxd->idxd.chan_per_device = (kernel_idxd->idxd.total_wq_size >= 128) ? 8 : 4;
return 0;
}

View File

@ -48,7 +48,10 @@ extern "C" {
#define IDXD_MMIO_BAR 0
#define IDXD_WQ_BAR 2
#define PORTAL_SIZE (4096 * 4)
#define PORTAL_SIZE 0x1000
#define WQ_TOTAL_PORTAL_SIZE (PORTAL_SIZE * 4)
#define PORTAL_STRIDE 0x40
#define PORTAL_MASK (PORTAL_SIZE - 1)
#define CFG_ENGINE_OFFSET 0x20
#define CFG_FLAG_OFFSET 0x28

View File

@ -274,6 +274,10 @@ idxd_wq_config(struct spdk_user_idxd_device *user_idxd)
assert(LOG2_WQ_MAX_XFER <= user_idxd->registers.gencap.max_xfer_shift);
idxd->total_wq_size = user_idxd->registers.wqcap.total_wq_size;
/* Spread the channels we allow per device based on the total number of WQE to try
* and achieve optimal performance for common cases.
*/
idxd->chan_per_device = (idxd->total_wq_size >= 128) ? 8 : 4;
idxd->queues = calloc(1, user_idxd->registers.wqcap.num_wqs * sizeof(struct idxd_wq));
if (idxd->queues == NULL) {
SPDK_ERRLOG("Failed to allocate queue memory\n");
@ -519,7 +523,7 @@ user_idxd_dump_sw_err(struct spdk_idxd_device *idxd, void *portal)
static char *
user_idxd_portal_get_addr(struct spdk_idxd_device *idxd)
{
return (char *)idxd->portals + idxd->wq_id * PORTAL_SIZE;
return (char *)idxd->portals + idxd->wq_id * WQ_TOTAL_PORTAL_SIZE;
}
static bool