idxd: simplification in re-balancing channels

For flow control reasons we have to resize the bit arrays we
use to manage flow as channels come and go.  However since
channels are assigned to devices, until the channel count
reaches the device count there's no sharing so no resize of
the array is needed. So, when we use a device for the first
time there's no need to run through the rest of the channels
and re-balance.

Same thing is done on destruction. The code to free idxd
specific resources was moved from the rebalance function to
the idxd put channel function which is a much more logical
place for it as well.

Signed-off-by: paul luse <paul.e.luse@intel.com>
Change-Id: Ib4df163286906f413dd6429dc6833af7b68e208c
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/5846
Community-CI: Broadcom CI
Community-CI: Mellanox Build Bot
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
This commit is contained in:
paul luse 2021-01-09 16:55:16 -05:00 committed by Tomasz Zawadzki
parent 15c0e78042
commit 8a1a845053
8 changed files with 137 additions and 54 deletions

View File

@ -10,6 +10,21 @@ suitable polling group.
## v21.01:
### idxd
A new API `spdk_idxd_get_rebalance` was added so that users of the library
can know whether they need to rebalance the flow control for the channel
that was just added/removed. This is based on how the low level library
shares devices amongst channels.
The API `spdk_idxd_reconfigure_chan` had the `num_channels` removed as this
is now tracked in the library. The app makes use the new API above to
determine whether to rebalance or not. This applies to `spdk_idxd_configure_chan`
as well.
The API `spdk_idxd_put_channel` now returns the rebalance state for the
underlying device.
### bdev
An `opts_size` element was added in the `spdk_bdev_opts` structure to solve the

View File

@ -64,7 +64,7 @@ struct idxd_batch;
/**
* Signature for configuring a channel
*
* \param chan IDXD channel to be configured.
* \param chan IDXD channel to be configured
* \return 0 on success, negative errno on failure.
*/
int spdk_idxd_configure_chan(struct spdk_idxd_io_channel *chan);
@ -73,10 +73,9 @@ int spdk_idxd_configure_chan(struct spdk_idxd_io_channel *chan);
* Reconfigures this channel based on how many current channels there are.
*
* \param chan IDXD channel to be set.
* \param num_channels total number of channels in use.
* \return 0 on success, negative errno on failure.
*/
int spdk_idxd_reconfigure_chan(struct spdk_idxd_io_channel *chan, uint32_t num_channels);
int spdk_idxd_reconfigure_chan(struct spdk_idxd_io_channel *chan);
/**
* Signature for callback function invoked when a request is completed.
@ -408,8 +407,17 @@ struct spdk_idxd_io_channel *spdk_idxd_get_channel(struct spdk_idxd_device *idxd
* Free an IDXD channel.
*
* \param chan IDXD channel to free.
* \return true if the underlying device needs a rebalance
*/
void spdk_idxd_put_channel(struct spdk_idxd_io_channel *chan);
bool spdk_idxd_put_channel(struct spdk_idxd_io_channel *chan);
/**
* Determine if the idxd device needs rebalancing.
*
* \param idxd IDXD device.
* \return true if rebalance is needed, false if not.
*/
bool spdk_idxd_device_needs_rebalance(struct spdk_idxd_device *idxd);
#ifdef __cplusplus
}

View File

@ -34,7 +34,7 @@
SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
SO_VER := 3
SO_VER := 4
SO_MINOR := 0
C_SRCS = idxd.c

View File

@ -100,6 +100,12 @@ _idxd_write_8(struct spdk_idxd_device *idxd, uint32_t offset, uint64_t value)
spdk_mmio_write_8((uint64_t *)(idxd->reg_base + offset), value);
}
bool
spdk_idxd_device_needs_rebalance(struct spdk_idxd_device *idxd)
{
return idxd->needs_rebalance;
}
struct spdk_idxd_io_channel *
spdk_idxd_get_channel(struct spdk_idxd_device *idxd)
{
@ -130,13 +136,44 @@ spdk_idxd_get_channel(struct spdk_idxd_device *idxd)
batch++;
}
pthread_mutex_lock(&chan->idxd->num_channels_lock);
chan->idxd->num_channels++;
if (chan->idxd->num_channels > 1) {
chan->idxd->needs_rebalance = true;
} else {
chan->idxd->needs_rebalance = false;
}
pthread_mutex_unlock(&chan->idxd->num_channels_lock);
return chan;
}
void
bool
spdk_idxd_put_channel(struct spdk_idxd_io_channel *chan)
{
struct idxd_batch *batch;
bool rebalance = false;
pthread_mutex_lock(&chan->idxd->num_channels_lock);
assert(chan->idxd->num_channels > 0);
chan->idxd->num_channels--;
if (chan->idxd->num_channels > 0) {
rebalance = true;
}
pthread_mutex_unlock(&chan->idxd->num_channels_lock);
spdk_free(chan->completions);
spdk_free(chan->desc);
spdk_bit_array_free(&chan->ring_slots);
while ((batch = TAILQ_FIRST(&chan->batch_pool))) {
TAILQ_REMOVE(&chan->batch_pool, batch, link);
spdk_free(batch->user_completions);
spdk_free(batch->user_desc);
}
free(chan->batch_base);
free(chan);
return rebalance;
}
int
@ -151,7 +188,9 @@ spdk_idxd_configure_chan(struct spdk_idxd_io_channel *chan)
chan->idxd->wq_id = 0;
}
num_ring_slots = chan->idxd->queues[chan->idxd->wq_id].wqcfg.wq_size;
pthread_mutex_lock(&chan->idxd->num_channels_lock);
num_ring_slots = chan->idxd->queues[chan->idxd->wq_id].wqcfg.wq_size / chan->idxd->num_channels;
pthread_mutex_unlock(&chan->idxd->num_channels_lock);
chan->ring_slots = spdk_bit_array_create(num_ring_slots);
if (chan->ring_slots == NULL) {
@ -272,30 +311,26 @@ _idxd_drain(struct spdk_idxd_io_channel *chan)
}
int
spdk_idxd_reconfigure_chan(struct spdk_idxd_io_channel *chan, uint32_t num_channels)
spdk_idxd_reconfigure_chan(struct spdk_idxd_io_channel *chan)
{
uint32_t num_ring_slots;
int rc;
struct idxd_batch *batch;
_idxd_drain(chan);
assert(spdk_bit_array_count_set(chan->ring_slots) == 0);
if (num_channels == 0) {
spdk_free(chan->completions);
spdk_free(chan->desc);
spdk_bit_array_free(&chan->ring_slots);
while ((batch = TAILQ_FIRST(&chan->batch_pool))) {
TAILQ_REMOVE(&chan->batch_pool, batch, link);
spdk_free(batch->user_completions);
spdk_free(batch->user_desc);
}
free(chan->batch_base);
pthread_mutex_lock(&chan->idxd->num_channels_lock);
assert(chan->idxd->num_channels > 0);
num_ring_slots = chan->ring_size / chan->idxd->num_channels;
/* If no change (ie this was a call from another thread doing its for_each_channel,
* then we can just bail now.
*/
if (num_ring_slots == chan->max_ring_slots) {
pthread_mutex_unlock(&chan->idxd->num_channels_lock);
return 0;
}
num_ring_slots = chan->ring_size / num_channels;
pthread_mutex_unlock(&chan->idxd->num_channels_lock);
/* re-allocate our descriptor ring for hw flow control. */
rc = spdk_bit_array_resize(&chan->ring_slots, num_ring_slots);
@ -637,6 +672,7 @@ idxd_attach(struct spdk_pci_device *device)
}
idxd->device = device;
pthread_mutex_init(&idxd->num_channels_lock, NULL);
/* Enable PCI busmaster. */
spdk_pci_device_cfg_read32(device, &cmd_reg, 4);

View File

@ -186,6 +186,9 @@ struct spdk_idxd_device {
void *portals;
int socket_id;
int wq_id;
uint32_t num_channels;
bool needs_rebalance;
pthread_mutex_t num_channels_lock;
struct idxd_registers registers;
uint32_t ims_offset;

View File

@ -2,6 +2,7 @@
global:
# public functions
spdk_idxd_device_needs_rebalance;
spdk_idxd_configure_chan;
spdk_idxd_reconfigure_chan;
spdk_idxd_probe;

View File

@ -66,7 +66,6 @@ static TAILQ_HEAD(, pci_device) g_pci_devices = TAILQ_HEAD_INITIALIZER(g_pci_dev
struct idxd_device {
struct spdk_idxd_device *idxd;
int num_channels;
TAILQ_ENTRY(idxd_device) tailq;
};
static TAILQ_HEAD(, idxd_device) g_idxd_devices = TAILQ_HEAD_INITIALIZER(g_idxd_devices);
@ -74,15 +73,12 @@ static struct idxd_device *g_next_dev = NULL;
struct idxd_io_channel {
struct spdk_idxd_io_channel *chan;
struct spdk_idxd_device *idxd;
struct idxd_device *dev;
enum channel_state state;
struct spdk_poller *poller;
TAILQ_HEAD(, spdk_accel_task) queued_tasks;
};
pthread_mutex_t g_configuration_lock = PTHREAD_MUTEX_INITIALIZER;
static struct spdk_io_channel *idxd_get_io_channel(void);
static struct idxd_device *
@ -336,18 +332,23 @@ _config_max_desc(struct spdk_io_channel_iter *i)
{
struct idxd_io_channel *chan;
struct spdk_io_channel *ch;
struct spdk_idxd_device *idxd;
int rc;
ch = spdk_io_channel_iter_get_channel(i);
chan = spdk_io_channel_get_ctx(ch);
idxd = spdk_io_channel_iter_get_ctx(i);
pthread_mutex_lock(&g_configuration_lock);
rc = spdk_idxd_reconfigure_chan(chan->chan, chan->dev->num_channels);
pthread_mutex_unlock(&g_configuration_lock);
if (rc == 0) {
chan->state = IDXD_CHANNEL_ACTIVE;
} else {
chan->state = IDXD_CHANNEL_ERROR;
/* reconfigure channel only if this channel is on the same idxd
* device that initiated the rebalance.
*/
if (chan->dev->idxd == idxd) {
rc = spdk_idxd_reconfigure_chan(chan->chan);
if (rc == 0) {
chan->state = IDXD_CHANNEL_ACTIVE;
} else {
chan->state = IDXD_CHANNEL_ERROR;
}
}
spdk_for_each_channel_continue(i, 0);
@ -359,12 +360,18 @@ _pause_chan(struct spdk_io_channel_iter *i)
{
struct idxd_io_channel *chan;
struct spdk_io_channel *ch;
struct spdk_idxd_device *idxd;
ch = spdk_io_channel_iter_get_channel(i);
chan = spdk_io_channel_get_ctx(ch);
idxd = spdk_io_channel_iter_get_ctx(i);
/* start queueing up new requests. */
chan->state = IDXD_CHANNEL_PAUSED;
/* start queueing up new requests if this channel is on the same idxd
* device that initiated the rebalance.
*/
if (chan->dev->idxd == idxd) {
chan->state = IDXD_CHANNEL_PAUSED;
}
spdk_for_each_channel_continue(i, 0);
}
@ -372,7 +379,11 @@ _pause_chan(struct spdk_io_channel_iter *i)
static void
_pause_chan_done(struct spdk_io_channel_iter *i, int status)
{
spdk_for_each_channel(&idxd_accel_engine, _config_max_desc, NULL, NULL);
struct spdk_idxd_device *idxd;
idxd = spdk_io_channel_iter_get_ctx(i);
spdk_for_each_channel(&idxd_accel_engine, _config_max_desc, idxd, NULL);
}
static int
@ -401,28 +412,30 @@ idxd_create_cb(void *io_device, void *ctx_buf)
* Configure the channel but leave paused until all others
* are paused and re-configured based on the new number of
* channels. This enables dynamic load balancing for HW
* flow control.
* flow control. The idxd device will tell us if rebalance is
* needed based on how many channels are using it.
*/
pthread_mutex_lock(&g_configuration_lock);
rc = spdk_idxd_configure_chan(chan->chan);
if (rc) {
SPDK_ERRLOG("Failed to configure new channel rc = %d\n", rc);
chan->state = IDXD_CHANNEL_ERROR;
spdk_poller_unregister(&chan->poller);
pthread_mutex_unlock(&g_configuration_lock);
return rc;
}
if (spdk_idxd_device_needs_rebalance(chan->dev->idxd) == false) {
chan->state = IDXD_CHANNEL_ACTIVE;
return 0;
}
chan->state = IDXD_CHANNEL_PAUSED;
chan->dev->num_channels++;
pthread_mutex_unlock(&g_configuration_lock);
/*
* Pause all channels so that we can set proper flow control
* per channel. When all are paused, we'll update the max
* number of descriptors allowed per channel.
*/
spdk_for_each_channel(&idxd_accel_engine, _pause_chan, NULL,
spdk_for_each_channel(&idxd_accel_engine, _pause_chan, chan->dev->idxd,
_pause_chan_done);
return 0;
@ -431,27 +444,31 @@ idxd_create_cb(void *io_device, void *ctx_buf)
static void
_pause_chan_destroy_done(struct spdk_io_channel_iter *i, int status)
{
/* Rebalance the rings with the smaller number of remaining channels. */
spdk_for_each_channel(&idxd_accel_engine, _config_max_desc, NULL, NULL);
struct spdk_idxd_device *idxd;
idxd = spdk_io_channel_iter_get_ctx(i);
/* Rebalance the rings with the smaller number of remaining channels, but
* pass the idxd device along so its only done on shared channels.
*/
spdk_for_each_channel(&idxd_accel_engine, _config_max_desc, idxd, NULL);
}
static void
idxd_destroy_cb(void *io_device, void *ctx_buf)
{
struct idxd_io_channel *chan = ctx_buf;
pthread_mutex_lock(&g_configuration_lock);
assert(chan->dev->num_channels > 0);
chan->dev->num_channels--;
spdk_idxd_reconfigure_chan(chan->chan, 0);
pthread_mutex_unlock(&g_configuration_lock);
bool rebalance;
spdk_poller_unregister(&chan->poller);
spdk_idxd_put_channel(chan->chan);
rebalance = spdk_idxd_put_channel(chan->chan);
/* Pause each channel then rebalance the max number of ring slots. */
spdk_for_each_channel(&idxd_accel_engine, _pause_chan, NULL,
_pause_chan_destroy_done);
/* Only rebalance if there are still other channels on this device */
if (rebalance == true) {
/* Pause each channel then rebalance the max number of ring slots. */
spdk_for_each_channel(&idxd_accel_engine, _pause_chan, chan->dev->idxd,
_pause_chan_destroy_done);
}
}
static struct spdk_io_channel *

View File

@ -254,6 +254,7 @@ static int
test_spdk_idxd_reconfigure_chan(void)
{
struct spdk_idxd_io_channel chan = {};
struct spdk_idxd_device idxd = {};
int rc;
uint32_t test_ring_size = 8;
uint32_t num_channels = 2;
@ -263,8 +264,10 @@ test_spdk_idxd_reconfigure_chan(void)
chan.completions = spdk_zmalloc(test_ring_size * sizeof(struct idxd_hw_desc), 0, NULL,
SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
SPDK_CU_ASSERT_FATAL(chan.completions != NULL);
chan.idxd = &idxd;
chan.idxd->num_channels = num_channels;
rc = spdk_idxd_reconfigure_chan(&chan, num_channels);
rc = spdk_idxd_reconfigure_chan(&chan);
CU_ASSERT(rc == 0);
CU_ASSERT(chan.max_ring_slots == test_ring_size / num_channels);