idxd: simplification in re-balancing channels
For flow control reasons we have to resize the bit arrays we use to manage flow as channels come and go. However since channels are assigned to devices, until the channel count reaches the device count there's no sharing so no resize of the array is needed. So, when we use a device for the first time there's no need to run through the rest of the channels and re-balance. Same thing is done on destruction. The code to free idxd specific resources was moved from the rebalance function to the idxd put channel function which is a much more logical place for it as well. Signed-off-by: paul luse <paul.e.luse@intel.com> Change-Id: Ib4df163286906f413dd6429dc6833af7b68e208c Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/5846 Community-CI: Broadcom CI Community-CI: Mellanox Build Bot Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com> Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
This commit is contained in:
parent
15c0e78042
commit
8a1a845053
15
CHANGELOG.md
15
CHANGELOG.md
@ -10,6 +10,21 @@ suitable polling group.
|
||||
|
||||
## v21.01:
|
||||
|
||||
### idxd
|
||||
|
||||
A new API `spdk_idxd_get_rebalance` was added so that users of the library
|
||||
can know whether they need to rebalance the flow control for the channel
|
||||
that was just added/removed. This is based on how the low level library
|
||||
shares devices amongst channels.
|
||||
|
||||
The API `spdk_idxd_reconfigure_chan` had the `num_channels` removed as this
|
||||
is now tracked in the library. The app makes use the new API above to
|
||||
determine whether to rebalance or not. This applies to `spdk_idxd_configure_chan`
|
||||
as well.
|
||||
|
||||
The API `spdk_idxd_put_channel` now returns the rebalance state for the
|
||||
underlying device.
|
||||
|
||||
### bdev
|
||||
|
||||
An `opts_size` element was added in the `spdk_bdev_opts` structure to solve the
|
||||
|
@ -64,7 +64,7 @@ struct idxd_batch;
|
||||
/**
|
||||
* Signature for configuring a channel
|
||||
*
|
||||
* \param chan IDXD channel to be configured.
|
||||
* \param chan IDXD channel to be configured
|
||||
* \return 0 on success, negative errno on failure.
|
||||
*/
|
||||
int spdk_idxd_configure_chan(struct spdk_idxd_io_channel *chan);
|
||||
@ -73,10 +73,9 @@ int spdk_idxd_configure_chan(struct spdk_idxd_io_channel *chan);
|
||||
* Reconfigures this channel based on how many current channels there are.
|
||||
*
|
||||
* \param chan IDXD channel to be set.
|
||||
* \param num_channels total number of channels in use.
|
||||
* \return 0 on success, negative errno on failure.
|
||||
*/
|
||||
int spdk_idxd_reconfigure_chan(struct spdk_idxd_io_channel *chan, uint32_t num_channels);
|
||||
int spdk_idxd_reconfigure_chan(struct spdk_idxd_io_channel *chan);
|
||||
|
||||
/**
|
||||
* Signature for callback function invoked when a request is completed.
|
||||
@ -408,8 +407,17 @@ struct spdk_idxd_io_channel *spdk_idxd_get_channel(struct spdk_idxd_device *idxd
|
||||
* Free an IDXD channel.
|
||||
*
|
||||
* \param chan IDXD channel to free.
|
||||
* \return true if the underlying device needs a rebalance
|
||||
*/
|
||||
void spdk_idxd_put_channel(struct spdk_idxd_io_channel *chan);
|
||||
bool spdk_idxd_put_channel(struct spdk_idxd_io_channel *chan);
|
||||
|
||||
/**
|
||||
* Determine if the idxd device needs rebalancing.
|
||||
*
|
||||
* \param idxd IDXD device.
|
||||
* \return true if rebalance is needed, false if not.
|
||||
*/
|
||||
bool spdk_idxd_device_needs_rebalance(struct spdk_idxd_device *idxd);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
@ -34,7 +34,7 @@
|
||||
SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
|
||||
include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
|
||||
|
||||
SO_VER := 3
|
||||
SO_VER := 4
|
||||
SO_MINOR := 0
|
||||
|
||||
C_SRCS = idxd.c
|
||||
|
@ -100,6 +100,12 @@ _idxd_write_8(struct spdk_idxd_device *idxd, uint32_t offset, uint64_t value)
|
||||
spdk_mmio_write_8((uint64_t *)(idxd->reg_base + offset), value);
|
||||
}
|
||||
|
||||
bool
|
||||
spdk_idxd_device_needs_rebalance(struct spdk_idxd_device *idxd)
|
||||
{
|
||||
return idxd->needs_rebalance;
|
||||
}
|
||||
|
||||
struct spdk_idxd_io_channel *
|
||||
spdk_idxd_get_channel(struct spdk_idxd_device *idxd)
|
||||
{
|
||||
@ -130,13 +136,44 @@ spdk_idxd_get_channel(struct spdk_idxd_device *idxd)
|
||||
batch++;
|
||||
}
|
||||
|
||||
pthread_mutex_lock(&chan->idxd->num_channels_lock);
|
||||
chan->idxd->num_channels++;
|
||||
if (chan->idxd->num_channels > 1) {
|
||||
chan->idxd->needs_rebalance = true;
|
||||
} else {
|
||||
chan->idxd->needs_rebalance = false;
|
||||
}
|
||||
pthread_mutex_unlock(&chan->idxd->num_channels_lock);
|
||||
|
||||
return chan;
|
||||
}
|
||||
|
||||
void
|
||||
bool
|
||||
spdk_idxd_put_channel(struct spdk_idxd_io_channel *chan)
|
||||
{
|
||||
struct idxd_batch *batch;
|
||||
bool rebalance = false;
|
||||
|
||||
pthread_mutex_lock(&chan->idxd->num_channels_lock);
|
||||
assert(chan->idxd->num_channels > 0);
|
||||
chan->idxd->num_channels--;
|
||||
if (chan->idxd->num_channels > 0) {
|
||||
rebalance = true;
|
||||
}
|
||||
pthread_mutex_unlock(&chan->idxd->num_channels_lock);
|
||||
|
||||
spdk_free(chan->completions);
|
||||
spdk_free(chan->desc);
|
||||
spdk_bit_array_free(&chan->ring_slots);
|
||||
while ((batch = TAILQ_FIRST(&chan->batch_pool))) {
|
||||
TAILQ_REMOVE(&chan->batch_pool, batch, link);
|
||||
spdk_free(batch->user_completions);
|
||||
spdk_free(batch->user_desc);
|
||||
}
|
||||
free(chan->batch_base);
|
||||
free(chan);
|
||||
|
||||
return rebalance;
|
||||
}
|
||||
|
||||
int
|
||||
@ -151,7 +188,9 @@ spdk_idxd_configure_chan(struct spdk_idxd_io_channel *chan)
|
||||
chan->idxd->wq_id = 0;
|
||||
}
|
||||
|
||||
num_ring_slots = chan->idxd->queues[chan->idxd->wq_id].wqcfg.wq_size;
|
||||
pthread_mutex_lock(&chan->idxd->num_channels_lock);
|
||||
num_ring_slots = chan->idxd->queues[chan->idxd->wq_id].wqcfg.wq_size / chan->idxd->num_channels;
|
||||
pthread_mutex_unlock(&chan->idxd->num_channels_lock);
|
||||
|
||||
chan->ring_slots = spdk_bit_array_create(num_ring_slots);
|
||||
if (chan->ring_slots == NULL) {
|
||||
@ -272,30 +311,26 @@ _idxd_drain(struct spdk_idxd_io_channel *chan)
|
||||
}
|
||||
|
||||
int
|
||||
spdk_idxd_reconfigure_chan(struct spdk_idxd_io_channel *chan, uint32_t num_channels)
|
||||
spdk_idxd_reconfigure_chan(struct spdk_idxd_io_channel *chan)
|
||||
{
|
||||
uint32_t num_ring_slots;
|
||||
int rc;
|
||||
struct idxd_batch *batch;
|
||||
|
||||
_idxd_drain(chan);
|
||||
|
||||
assert(spdk_bit_array_count_set(chan->ring_slots) == 0);
|
||||
|
||||
if (num_channels == 0) {
|
||||
spdk_free(chan->completions);
|
||||
spdk_free(chan->desc);
|
||||
spdk_bit_array_free(&chan->ring_slots);
|
||||
while ((batch = TAILQ_FIRST(&chan->batch_pool))) {
|
||||
TAILQ_REMOVE(&chan->batch_pool, batch, link);
|
||||
spdk_free(batch->user_completions);
|
||||
spdk_free(batch->user_desc);
|
||||
}
|
||||
free(chan->batch_base);
|
||||
pthread_mutex_lock(&chan->idxd->num_channels_lock);
|
||||
assert(chan->idxd->num_channels > 0);
|
||||
num_ring_slots = chan->ring_size / chan->idxd->num_channels;
|
||||
/* If no change (ie this was a call from another thread doing its for_each_channel,
|
||||
* then we can just bail now.
|
||||
*/
|
||||
if (num_ring_slots == chan->max_ring_slots) {
|
||||
pthread_mutex_unlock(&chan->idxd->num_channels_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
num_ring_slots = chan->ring_size / num_channels;
|
||||
pthread_mutex_unlock(&chan->idxd->num_channels_lock);
|
||||
|
||||
/* re-allocate our descriptor ring for hw flow control. */
|
||||
rc = spdk_bit_array_resize(&chan->ring_slots, num_ring_slots);
|
||||
@ -637,6 +672,7 @@ idxd_attach(struct spdk_pci_device *device)
|
||||
}
|
||||
|
||||
idxd->device = device;
|
||||
pthread_mutex_init(&idxd->num_channels_lock, NULL);
|
||||
|
||||
/* Enable PCI busmaster. */
|
||||
spdk_pci_device_cfg_read32(device, &cmd_reg, 4);
|
||||
|
@ -186,6 +186,9 @@ struct spdk_idxd_device {
|
||||
void *portals;
|
||||
int socket_id;
|
||||
int wq_id;
|
||||
uint32_t num_channels;
|
||||
bool needs_rebalance;
|
||||
pthread_mutex_t num_channels_lock;
|
||||
|
||||
struct idxd_registers registers;
|
||||
uint32_t ims_offset;
|
||||
|
@ -2,6 +2,7 @@
|
||||
global:
|
||||
|
||||
# public functions
|
||||
spdk_idxd_device_needs_rebalance;
|
||||
spdk_idxd_configure_chan;
|
||||
spdk_idxd_reconfigure_chan;
|
||||
spdk_idxd_probe;
|
||||
|
@ -66,7 +66,6 @@ static TAILQ_HEAD(, pci_device) g_pci_devices = TAILQ_HEAD_INITIALIZER(g_pci_dev
|
||||
|
||||
struct idxd_device {
|
||||
struct spdk_idxd_device *idxd;
|
||||
int num_channels;
|
||||
TAILQ_ENTRY(idxd_device) tailq;
|
||||
};
|
||||
static TAILQ_HEAD(, idxd_device) g_idxd_devices = TAILQ_HEAD_INITIALIZER(g_idxd_devices);
|
||||
@ -74,15 +73,12 @@ static struct idxd_device *g_next_dev = NULL;
|
||||
|
||||
struct idxd_io_channel {
|
||||
struct spdk_idxd_io_channel *chan;
|
||||
struct spdk_idxd_device *idxd;
|
||||
struct idxd_device *dev;
|
||||
enum channel_state state;
|
||||
struct spdk_poller *poller;
|
||||
TAILQ_HEAD(, spdk_accel_task) queued_tasks;
|
||||
};
|
||||
|
||||
pthread_mutex_t g_configuration_lock = PTHREAD_MUTEX_INITIALIZER;
|
||||
|
||||
static struct spdk_io_channel *idxd_get_io_channel(void);
|
||||
|
||||
static struct idxd_device *
|
||||
@ -336,18 +332,23 @@ _config_max_desc(struct spdk_io_channel_iter *i)
|
||||
{
|
||||
struct idxd_io_channel *chan;
|
||||
struct spdk_io_channel *ch;
|
||||
struct spdk_idxd_device *idxd;
|
||||
int rc;
|
||||
|
||||
ch = spdk_io_channel_iter_get_channel(i);
|
||||
chan = spdk_io_channel_get_ctx(ch);
|
||||
idxd = spdk_io_channel_iter_get_ctx(i);
|
||||
|
||||
pthread_mutex_lock(&g_configuration_lock);
|
||||
rc = spdk_idxd_reconfigure_chan(chan->chan, chan->dev->num_channels);
|
||||
pthread_mutex_unlock(&g_configuration_lock);
|
||||
if (rc == 0) {
|
||||
chan->state = IDXD_CHANNEL_ACTIVE;
|
||||
} else {
|
||||
chan->state = IDXD_CHANNEL_ERROR;
|
||||
/* reconfigure channel only if this channel is on the same idxd
|
||||
* device that initiated the rebalance.
|
||||
*/
|
||||
if (chan->dev->idxd == idxd) {
|
||||
rc = spdk_idxd_reconfigure_chan(chan->chan);
|
||||
if (rc == 0) {
|
||||
chan->state = IDXD_CHANNEL_ACTIVE;
|
||||
} else {
|
||||
chan->state = IDXD_CHANNEL_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
spdk_for_each_channel_continue(i, 0);
|
||||
@ -359,12 +360,18 @@ _pause_chan(struct spdk_io_channel_iter *i)
|
||||
{
|
||||
struct idxd_io_channel *chan;
|
||||
struct spdk_io_channel *ch;
|
||||
struct spdk_idxd_device *idxd;
|
||||
|
||||
ch = spdk_io_channel_iter_get_channel(i);
|
||||
chan = spdk_io_channel_get_ctx(ch);
|
||||
idxd = spdk_io_channel_iter_get_ctx(i);
|
||||
|
||||
/* start queueing up new requests. */
|
||||
chan->state = IDXD_CHANNEL_PAUSED;
|
||||
/* start queueing up new requests if this channel is on the same idxd
|
||||
* device that initiated the rebalance.
|
||||
*/
|
||||
if (chan->dev->idxd == idxd) {
|
||||
chan->state = IDXD_CHANNEL_PAUSED;
|
||||
}
|
||||
|
||||
spdk_for_each_channel_continue(i, 0);
|
||||
}
|
||||
@ -372,7 +379,11 @@ _pause_chan(struct spdk_io_channel_iter *i)
|
||||
static void
|
||||
_pause_chan_done(struct spdk_io_channel_iter *i, int status)
|
||||
{
|
||||
spdk_for_each_channel(&idxd_accel_engine, _config_max_desc, NULL, NULL);
|
||||
struct spdk_idxd_device *idxd;
|
||||
|
||||
idxd = spdk_io_channel_iter_get_ctx(i);
|
||||
|
||||
spdk_for_each_channel(&idxd_accel_engine, _config_max_desc, idxd, NULL);
|
||||
}
|
||||
|
||||
static int
|
||||
@ -401,28 +412,30 @@ idxd_create_cb(void *io_device, void *ctx_buf)
|
||||
* Configure the channel but leave paused until all others
|
||||
* are paused and re-configured based on the new number of
|
||||
* channels. This enables dynamic load balancing for HW
|
||||
* flow control.
|
||||
* flow control. The idxd device will tell us if rebalance is
|
||||
* needed based on how many channels are using it.
|
||||
*/
|
||||
pthread_mutex_lock(&g_configuration_lock);
|
||||
rc = spdk_idxd_configure_chan(chan->chan);
|
||||
if (rc) {
|
||||
SPDK_ERRLOG("Failed to configure new channel rc = %d\n", rc);
|
||||
chan->state = IDXD_CHANNEL_ERROR;
|
||||
spdk_poller_unregister(&chan->poller);
|
||||
pthread_mutex_unlock(&g_configuration_lock);
|
||||
return rc;
|
||||
}
|
||||
|
||||
if (spdk_idxd_device_needs_rebalance(chan->dev->idxd) == false) {
|
||||
chan->state = IDXD_CHANNEL_ACTIVE;
|
||||
return 0;
|
||||
}
|
||||
|
||||
chan->state = IDXD_CHANNEL_PAUSED;
|
||||
chan->dev->num_channels++;
|
||||
pthread_mutex_unlock(&g_configuration_lock);
|
||||
|
||||
/*
|
||||
* Pause all channels so that we can set proper flow control
|
||||
* per channel. When all are paused, we'll update the max
|
||||
* number of descriptors allowed per channel.
|
||||
*/
|
||||
spdk_for_each_channel(&idxd_accel_engine, _pause_chan, NULL,
|
||||
spdk_for_each_channel(&idxd_accel_engine, _pause_chan, chan->dev->idxd,
|
||||
_pause_chan_done);
|
||||
|
||||
return 0;
|
||||
@ -431,27 +444,31 @@ idxd_create_cb(void *io_device, void *ctx_buf)
|
||||
static void
|
||||
_pause_chan_destroy_done(struct spdk_io_channel_iter *i, int status)
|
||||
{
|
||||
/* Rebalance the rings with the smaller number of remaining channels. */
|
||||
spdk_for_each_channel(&idxd_accel_engine, _config_max_desc, NULL, NULL);
|
||||
struct spdk_idxd_device *idxd;
|
||||
|
||||
idxd = spdk_io_channel_iter_get_ctx(i);
|
||||
|
||||
/* Rebalance the rings with the smaller number of remaining channels, but
|
||||
* pass the idxd device along so its only done on shared channels.
|
||||
*/
|
||||
spdk_for_each_channel(&idxd_accel_engine, _config_max_desc, idxd, NULL);
|
||||
}
|
||||
|
||||
static void
|
||||
idxd_destroy_cb(void *io_device, void *ctx_buf)
|
||||
{
|
||||
struct idxd_io_channel *chan = ctx_buf;
|
||||
|
||||
pthread_mutex_lock(&g_configuration_lock);
|
||||
assert(chan->dev->num_channels > 0);
|
||||
chan->dev->num_channels--;
|
||||
spdk_idxd_reconfigure_chan(chan->chan, 0);
|
||||
pthread_mutex_unlock(&g_configuration_lock);
|
||||
bool rebalance;
|
||||
|
||||
spdk_poller_unregister(&chan->poller);
|
||||
spdk_idxd_put_channel(chan->chan);
|
||||
rebalance = spdk_idxd_put_channel(chan->chan);
|
||||
|
||||
/* Pause each channel then rebalance the max number of ring slots. */
|
||||
spdk_for_each_channel(&idxd_accel_engine, _pause_chan, NULL,
|
||||
_pause_chan_destroy_done);
|
||||
/* Only rebalance if there are still other channels on this device */
|
||||
if (rebalance == true) {
|
||||
/* Pause each channel then rebalance the max number of ring slots. */
|
||||
spdk_for_each_channel(&idxd_accel_engine, _pause_chan, chan->dev->idxd,
|
||||
_pause_chan_destroy_done);
|
||||
}
|
||||
}
|
||||
|
||||
static struct spdk_io_channel *
|
||||
|
@ -254,6 +254,7 @@ static int
|
||||
test_spdk_idxd_reconfigure_chan(void)
|
||||
{
|
||||
struct spdk_idxd_io_channel chan = {};
|
||||
struct spdk_idxd_device idxd = {};
|
||||
int rc;
|
||||
uint32_t test_ring_size = 8;
|
||||
uint32_t num_channels = 2;
|
||||
@ -263,8 +264,10 @@ test_spdk_idxd_reconfigure_chan(void)
|
||||
chan.completions = spdk_zmalloc(test_ring_size * sizeof(struct idxd_hw_desc), 0, NULL,
|
||||
SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
|
||||
SPDK_CU_ASSERT_FATAL(chan.completions != NULL);
|
||||
chan.idxd = &idxd;
|
||||
chan.idxd->num_channels = num_channels;
|
||||
|
||||
rc = spdk_idxd_reconfigure_chan(&chan, num_channels);
|
||||
rc = spdk_idxd_reconfigure_chan(&chan);
|
||||
CU_ASSERT(rc == 0);
|
||||
CU_ASSERT(chan.max_ring_slots == test_ring_size / num_channels);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user