diff --git a/CHANGELOG.md b/CHANGELOG.md index 96ad266595..79a621c94b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,21 @@ suitable polling group. ## v21.01: +### idxd + +A new API `spdk_idxd_get_rebalance` was added so that users of the library +can know whether they need to rebalance the flow control for the channel +that was just added/removed. This is based on how the low level library +shares devices amongst channels. + +The API `spdk_idxd_reconfigure_chan` had the `num_channels` removed as this +is now tracked in the library. The app makes use the new API above to +determine whether to rebalance or not. This applies to `spdk_idxd_configure_chan` +as well. + +The API `spdk_idxd_put_channel` now returns the rebalance state for the +underlying device. + ### bdev An `opts_size` element was added in the `spdk_bdev_opts` structure to solve the diff --git a/include/spdk/idxd.h b/include/spdk/idxd.h index cb9ebe8b81..6c35a37569 100644 --- a/include/spdk/idxd.h +++ b/include/spdk/idxd.h @@ -64,7 +64,7 @@ struct idxd_batch; /** * Signature for configuring a channel * - * \param chan IDXD channel to be configured. + * \param chan IDXD channel to be configured * \return 0 on success, negative errno on failure. */ int spdk_idxd_configure_chan(struct spdk_idxd_io_channel *chan); @@ -73,10 +73,9 @@ int spdk_idxd_configure_chan(struct spdk_idxd_io_channel *chan); * Reconfigures this channel based on how many current channels there are. * * \param chan IDXD channel to be set. - * \param num_channels total number of channels in use. * \return 0 on success, negative errno on failure. */ -int spdk_idxd_reconfigure_chan(struct spdk_idxd_io_channel *chan, uint32_t num_channels); +int spdk_idxd_reconfigure_chan(struct spdk_idxd_io_channel *chan); /** * Signature for callback function invoked when a request is completed. @@ -408,8 +407,17 @@ struct spdk_idxd_io_channel *spdk_idxd_get_channel(struct spdk_idxd_device *idxd * Free an IDXD channel. * * \param chan IDXD channel to free. + * \return true if the underlying device needs a rebalance */ -void spdk_idxd_put_channel(struct spdk_idxd_io_channel *chan); +bool spdk_idxd_put_channel(struct spdk_idxd_io_channel *chan); + +/** + * Determine if the idxd device needs rebalancing. + * + * \param idxd IDXD device. + * \return true if rebalance is needed, false if not. + */ +bool spdk_idxd_device_needs_rebalance(struct spdk_idxd_device *idxd); #ifdef __cplusplus } diff --git a/lib/idxd/Makefile b/lib/idxd/Makefile index 5d896649b5..08c19e83da 100644 --- a/lib/idxd/Makefile +++ b/lib/idxd/Makefile @@ -34,7 +34,7 @@ SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..) include $(SPDK_ROOT_DIR)/mk/spdk.common.mk -SO_VER := 3 +SO_VER := 4 SO_MINOR := 0 C_SRCS = idxd.c diff --git a/lib/idxd/idxd.c b/lib/idxd/idxd.c index 92e0379186..ae304162b2 100644 --- a/lib/idxd/idxd.c +++ b/lib/idxd/idxd.c @@ -100,6 +100,12 @@ _idxd_write_8(struct spdk_idxd_device *idxd, uint32_t offset, uint64_t value) spdk_mmio_write_8((uint64_t *)(idxd->reg_base + offset), value); } +bool +spdk_idxd_device_needs_rebalance(struct spdk_idxd_device *idxd) +{ + return idxd->needs_rebalance; +} + struct spdk_idxd_io_channel * spdk_idxd_get_channel(struct spdk_idxd_device *idxd) { @@ -130,13 +136,44 @@ spdk_idxd_get_channel(struct spdk_idxd_device *idxd) batch++; } + pthread_mutex_lock(&chan->idxd->num_channels_lock); + chan->idxd->num_channels++; + if (chan->idxd->num_channels > 1) { + chan->idxd->needs_rebalance = true; + } else { + chan->idxd->needs_rebalance = false; + } + pthread_mutex_unlock(&chan->idxd->num_channels_lock); + return chan; } -void +bool spdk_idxd_put_channel(struct spdk_idxd_io_channel *chan) { + struct idxd_batch *batch; + bool rebalance = false; + + pthread_mutex_lock(&chan->idxd->num_channels_lock); + assert(chan->idxd->num_channels > 0); + chan->idxd->num_channels--; + if (chan->idxd->num_channels > 0) { + rebalance = true; + } + pthread_mutex_unlock(&chan->idxd->num_channels_lock); + + spdk_free(chan->completions); + spdk_free(chan->desc); + spdk_bit_array_free(&chan->ring_slots); + while ((batch = TAILQ_FIRST(&chan->batch_pool))) { + TAILQ_REMOVE(&chan->batch_pool, batch, link); + spdk_free(batch->user_completions); + spdk_free(batch->user_desc); + } + free(chan->batch_base); free(chan); + + return rebalance; } int @@ -151,7 +188,9 @@ spdk_idxd_configure_chan(struct spdk_idxd_io_channel *chan) chan->idxd->wq_id = 0; } - num_ring_slots = chan->idxd->queues[chan->idxd->wq_id].wqcfg.wq_size; + pthread_mutex_lock(&chan->idxd->num_channels_lock); + num_ring_slots = chan->idxd->queues[chan->idxd->wq_id].wqcfg.wq_size / chan->idxd->num_channels; + pthread_mutex_unlock(&chan->idxd->num_channels_lock); chan->ring_slots = spdk_bit_array_create(num_ring_slots); if (chan->ring_slots == NULL) { @@ -272,30 +311,26 @@ _idxd_drain(struct spdk_idxd_io_channel *chan) } int -spdk_idxd_reconfigure_chan(struct spdk_idxd_io_channel *chan, uint32_t num_channels) +spdk_idxd_reconfigure_chan(struct spdk_idxd_io_channel *chan) { uint32_t num_ring_slots; int rc; - struct idxd_batch *batch; _idxd_drain(chan); assert(spdk_bit_array_count_set(chan->ring_slots) == 0); - if (num_channels == 0) { - spdk_free(chan->completions); - spdk_free(chan->desc); - spdk_bit_array_free(&chan->ring_slots); - while ((batch = TAILQ_FIRST(&chan->batch_pool))) { - TAILQ_REMOVE(&chan->batch_pool, batch, link); - spdk_free(batch->user_completions); - spdk_free(batch->user_desc); - } - free(chan->batch_base); + pthread_mutex_lock(&chan->idxd->num_channels_lock); + assert(chan->idxd->num_channels > 0); + num_ring_slots = chan->ring_size / chan->idxd->num_channels; + /* If no change (ie this was a call from another thread doing its for_each_channel, + * then we can just bail now. + */ + if (num_ring_slots == chan->max_ring_slots) { + pthread_mutex_unlock(&chan->idxd->num_channels_lock); return 0; } - - num_ring_slots = chan->ring_size / num_channels; + pthread_mutex_unlock(&chan->idxd->num_channels_lock); /* re-allocate our descriptor ring for hw flow control. */ rc = spdk_bit_array_resize(&chan->ring_slots, num_ring_slots); @@ -637,6 +672,7 @@ idxd_attach(struct spdk_pci_device *device) } idxd->device = device; + pthread_mutex_init(&idxd->num_channels_lock, NULL); /* Enable PCI busmaster. */ spdk_pci_device_cfg_read32(device, &cmd_reg, 4); diff --git a/lib/idxd/idxd.h b/lib/idxd/idxd.h index bea0f52fb5..085823bff6 100644 --- a/lib/idxd/idxd.h +++ b/lib/idxd/idxd.h @@ -186,6 +186,9 @@ struct spdk_idxd_device { void *portals; int socket_id; int wq_id; + uint32_t num_channels; + bool needs_rebalance; + pthread_mutex_t num_channels_lock; struct idxd_registers registers; uint32_t ims_offset; diff --git a/lib/idxd/spdk_idxd.map b/lib/idxd/spdk_idxd.map index 4bffdf209c..4f9b7c5558 100644 --- a/lib/idxd/spdk_idxd.map +++ b/lib/idxd/spdk_idxd.map @@ -2,6 +2,7 @@ global: # public functions + spdk_idxd_device_needs_rebalance; spdk_idxd_configure_chan; spdk_idxd_reconfigure_chan; spdk_idxd_probe; diff --git a/module/accel/idxd/accel_engine_idxd.c b/module/accel/idxd/accel_engine_idxd.c index f5d1da7f3e..440ff76976 100644 --- a/module/accel/idxd/accel_engine_idxd.c +++ b/module/accel/idxd/accel_engine_idxd.c @@ -66,7 +66,6 @@ static TAILQ_HEAD(, pci_device) g_pci_devices = TAILQ_HEAD_INITIALIZER(g_pci_dev struct idxd_device { struct spdk_idxd_device *idxd; - int num_channels; TAILQ_ENTRY(idxd_device) tailq; }; static TAILQ_HEAD(, idxd_device) g_idxd_devices = TAILQ_HEAD_INITIALIZER(g_idxd_devices); @@ -74,15 +73,12 @@ static struct idxd_device *g_next_dev = NULL; struct idxd_io_channel { struct spdk_idxd_io_channel *chan; - struct spdk_idxd_device *idxd; struct idxd_device *dev; enum channel_state state; struct spdk_poller *poller; TAILQ_HEAD(, spdk_accel_task) queued_tasks; }; -pthread_mutex_t g_configuration_lock = PTHREAD_MUTEX_INITIALIZER; - static struct spdk_io_channel *idxd_get_io_channel(void); static struct idxd_device * @@ -336,18 +332,23 @@ _config_max_desc(struct spdk_io_channel_iter *i) { struct idxd_io_channel *chan; struct spdk_io_channel *ch; + struct spdk_idxd_device *idxd; int rc; ch = spdk_io_channel_iter_get_channel(i); chan = spdk_io_channel_get_ctx(ch); + idxd = spdk_io_channel_iter_get_ctx(i); - pthread_mutex_lock(&g_configuration_lock); - rc = spdk_idxd_reconfigure_chan(chan->chan, chan->dev->num_channels); - pthread_mutex_unlock(&g_configuration_lock); - if (rc == 0) { - chan->state = IDXD_CHANNEL_ACTIVE; - } else { - chan->state = IDXD_CHANNEL_ERROR; + /* reconfigure channel only if this channel is on the same idxd + * device that initiated the rebalance. + */ + if (chan->dev->idxd == idxd) { + rc = spdk_idxd_reconfigure_chan(chan->chan); + if (rc == 0) { + chan->state = IDXD_CHANNEL_ACTIVE; + } else { + chan->state = IDXD_CHANNEL_ERROR; + } } spdk_for_each_channel_continue(i, 0); @@ -359,12 +360,18 @@ _pause_chan(struct spdk_io_channel_iter *i) { struct idxd_io_channel *chan; struct spdk_io_channel *ch; + struct spdk_idxd_device *idxd; ch = spdk_io_channel_iter_get_channel(i); chan = spdk_io_channel_get_ctx(ch); + idxd = spdk_io_channel_iter_get_ctx(i); - /* start queueing up new requests. */ - chan->state = IDXD_CHANNEL_PAUSED; + /* start queueing up new requests if this channel is on the same idxd + * device that initiated the rebalance. + */ + if (chan->dev->idxd == idxd) { + chan->state = IDXD_CHANNEL_PAUSED; + } spdk_for_each_channel_continue(i, 0); } @@ -372,7 +379,11 @@ _pause_chan(struct spdk_io_channel_iter *i) static void _pause_chan_done(struct spdk_io_channel_iter *i, int status) { - spdk_for_each_channel(&idxd_accel_engine, _config_max_desc, NULL, NULL); + struct spdk_idxd_device *idxd; + + idxd = spdk_io_channel_iter_get_ctx(i); + + spdk_for_each_channel(&idxd_accel_engine, _config_max_desc, idxd, NULL); } static int @@ -401,28 +412,30 @@ idxd_create_cb(void *io_device, void *ctx_buf) * Configure the channel but leave paused until all others * are paused and re-configured based on the new number of * channels. This enables dynamic load balancing for HW - * flow control. + * flow control. The idxd device will tell us if rebalance is + * needed based on how many channels are using it. */ - pthread_mutex_lock(&g_configuration_lock); rc = spdk_idxd_configure_chan(chan->chan); if (rc) { SPDK_ERRLOG("Failed to configure new channel rc = %d\n", rc); chan->state = IDXD_CHANNEL_ERROR; spdk_poller_unregister(&chan->poller); - pthread_mutex_unlock(&g_configuration_lock); return rc; } + if (spdk_idxd_device_needs_rebalance(chan->dev->idxd) == false) { + chan->state = IDXD_CHANNEL_ACTIVE; + return 0; + } + chan->state = IDXD_CHANNEL_PAUSED; - chan->dev->num_channels++; - pthread_mutex_unlock(&g_configuration_lock); /* * Pause all channels so that we can set proper flow control * per channel. When all are paused, we'll update the max * number of descriptors allowed per channel. */ - spdk_for_each_channel(&idxd_accel_engine, _pause_chan, NULL, + spdk_for_each_channel(&idxd_accel_engine, _pause_chan, chan->dev->idxd, _pause_chan_done); return 0; @@ -431,27 +444,31 @@ idxd_create_cb(void *io_device, void *ctx_buf) static void _pause_chan_destroy_done(struct spdk_io_channel_iter *i, int status) { - /* Rebalance the rings with the smaller number of remaining channels. */ - spdk_for_each_channel(&idxd_accel_engine, _config_max_desc, NULL, NULL); + struct spdk_idxd_device *idxd; + + idxd = spdk_io_channel_iter_get_ctx(i); + + /* Rebalance the rings with the smaller number of remaining channels, but + * pass the idxd device along so its only done on shared channels. + */ + spdk_for_each_channel(&idxd_accel_engine, _config_max_desc, idxd, NULL); } static void idxd_destroy_cb(void *io_device, void *ctx_buf) { struct idxd_io_channel *chan = ctx_buf; - - pthread_mutex_lock(&g_configuration_lock); - assert(chan->dev->num_channels > 0); - chan->dev->num_channels--; - spdk_idxd_reconfigure_chan(chan->chan, 0); - pthread_mutex_unlock(&g_configuration_lock); + bool rebalance; spdk_poller_unregister(&chan->poller); - spdk_idxd_put_channel(chan->chan); + rebalance = spdk_idxd_put_channel(chan->chan); - /* Pause each channel then rebalance the max number of ring slots. */ - spdk_for_each_channel(&idxd_accel_engine, _pause_chan, NULL, - _pause_chan_destroy_done); + /* Only rebalance if there are still other channels on this device */ + if (rebalance == true) { + /* Pause each channel then rebalance the max number of ring slots. */ + spdk_for_each_channel(&idxd_accel_engine, _pause_chan, chan->dev->idxd, + _pause_chan_destroy_done); + } } static struct spdk_io_channel * diff --git a/test/unit/lib/idxd/idxd.c/idxd_ut.c b/test/unit/lib/idxd/idxd.c/idxd_ut.c index 661607bf70..c0617a237d 100644 --- a/test/unit/lib/idxd/idxd.c/idxd_ut.c +++ b/test/unit/lib/idxd/idxd.c/idxd_ut.c @@ -254,6 +254,7 @@ static int test_spdk_idxd_reconfigure_chan(void) { struct spdk_idxd_io_channel chan = {}; + struct spdk_idxd_device idxd = {}; int rc; uint32_t test_ring_size = 8; uint32_t num_channels = 2; @@ -263,8 +264,10 @@ test_spdk_idxd_reconfigure_chan(void) chan.completions = spdk_zmalloc(test_ring_size * sizeof(struct idxd_hw_desc), 0, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); SPDK_CU_ASSERT_FATAL(chan.completions != NULL); + chan.idxd = &idxd; + chan.idxd->num_channels = num_channels; - rc = spdk_idxd_reconfigure_chan(&chan, num_channels); + rc = spdk_idxd_reconfigure_chan(&chan); CU_ASSERT(rc == 0); CU_ASSERT(chan.max_ring_slots == test_ring_size / num_channels);