From 8a1a84505391c77dbf2e5510d41a7f3b8d9baa51 Mon Sep 17 00:00:00 2001 From: paul luse Date: Sat, 9 Jan 2021 16:55:16 -0500 Subject: [PATCH] idxd: simplification in re-balancing channels For flow control reasons we have to resize the bit arrays we use to manage flow as channels come and go. However since channels are assigned to devices, until the channel count reaches the device count there's no sharing so no resize of the array is needed. So, when we use a device for the first time there's no need to run through the rest of the channels and re-balance. Same thing is done on destruction. The code to free idxd specific resources was moved from the rebalance function to the idxd put channel function which is a much more logical place for it as well. Signed-off-by: paul luse Change-Id: Ib4df163286906f413dd6429dc6833af7b68e208c Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/5846 Community-CI: Broadcom CI Community-CI: Mellanox Build Bot Tested-by: SPDK CI Jenkins Reviewed-by: Jim Harris Reviewed-by: Shuhei Matsumoto --- CHANGELOG.md | 15 +++++ include/spdk/idxd.h | 16 ++++-- lib/idxd/Makefile | 2 +- lib/idxd/idxd.c | 68 ++++++++++++++++------ lib/idxd/idxd.h | 3 + lib/idxd/spdk_idxd.map | 1 + module/accel/idxd/accel_engine_idxd.c | 81 ++++++++++++++++----------- test/unit/lib/idxd/idxd.c/idxd_ut.c | 5 +- 8 files changed, 137 insertions(+), 54 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 96ad266595..79a621c94b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,21 @@ suitable polling group. ## v21.01: +### idxd + +A new API `spdk_idxd_get_rebalance` was added so that users of the library +can know whether they need to rebalance the flow control for the channel +that was just added/removed. This is based on how the low level library +shares devices amongst channels. + +The API `spdk_idxd_reconfigure_chan` had the `num_channels` removed as this +is now tracked in the library. The app makes use the new API above to +determine whether to rebalance or not. This applies to `spdk_idxd_configure_chan` +as well. + +The API `spdk_idxd_put_channel` now returns the rebalance state for the +underlying device. + ### bdev An `opts_size` element was added in the `spdk_bdev_opts` structure to solve the diff --git a/include/spdk/idxd.h b/include/spdk/idxd.h index cb9ebe8b81..6c35a37569 100644 --- a/include/spdk/idxd.h +++ b/include/spdk/idxd.h @@ -64,7 +64,7 @@ struct idxd_batch; /** * Signature for configuring a channel * - * \param chan IDXD channel to be configured. + * \param chan IDXD channel to be configured * \return 0 on success, negative errno on failure. */ int spdk_idxd_configure_chan(struct spdk_idxd_io_channel *chan); @@ -73,10 +73,9 @@ int spdk_idxd_configure_chan(struct spdk_idxd_io_channel *chan); * Reconfigures this channel based on how many current channels there are. * * \param chan IDXD channel to be set. - * \param num_channels total number of channels in use. * \return 0 on success, negative errno on failure. */ -int spdk_idxd_reconfigure_chan(struct spdk_idxd_io_channel *chan, uint32_t num_channels); +int spdk_idxd_reconfigure_chan(struct spdk_idxd_io_channel *chan); /** * Signature for callback function invoked when a request is completed. @@ -408,8 +407,17 @@ struct spdk_idxd_io_channel *spdk_idxd_get_channel(struct spdk_idxd_device *idxd * Free an IDXD channel. * * \param chan IDXD channel to free. + * \return true if the underlying device needs a rebalance */ -void spdk_idxd_put_channel(struct spdk_idxd_io_channel *chan); +bool spdk_idxd_put_channel(struct spdk_idxd_io_channel *chan); + +/** + * Determine if the idxd device needs rebalancing. + * + * \param idxd IDXD device. + * \return true if rebalance is needed, false if not. + */ +bool spdk_idxd_device_needs_rebalance(struct spdk_idxd_device *idxd); #ifdef __cplusplus } diff --git a/lib/idxd/Makefile b/lib/idxd/Makefile index 5d896649b5..08c19e83da 100644 --- a/lib/idxd/Makefile +++ b/lib/idxd/Makefile @@ -34,7 +34,7 @@ SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..) include $(SPDK_ROOT_DIR)/mk/spdk.common.mk -SO_VER := 3 +SO_VER := 4 SO_MINOR := 0 C_SRCS = idxd.c diff --git a/lib/idxd/idxd.c b/lib/idxd/idxd.c index 92e0379186..ae304162b2 100644 --- a/lib/idxd/idxd.c +++ b/lib/idxd/idxd.c @@ -100,6 +100,12 @@ _idxd_write_8(struct spdk_idxd_device *idxd, uint32_t offset, uint64_t value) spdk_mmio_write_8((uint64_t *)(idxd->reg_base + offset), value); } +bool +spdk_idxd_device_needs_rebalance(struct spdk_idxd_device *idxd) +{ + return idxd->needs_rebalance; +} + struct spdk_idxd_io_channel * spdk_idxd_get_channel(struct spdk_idxd_device *idxd) { @@ -130,13 +136,44 @@ spdk_idxd_get_channel(struct spdk_idxd_device *idxd) batch++; } + pthread_mutex_lock(&chan->idxd->num_channels_lock); + chan->idxd->num_channels++; + if (chan->idxd->num_channels > 1) { + chan->idxd->needs_rebalance = true; + } else { + chan->idxd->needs_rebalance = false; + } + pthread_mutex_unlock(&chan->idxd->num_channels_lock); + return chan; } -void +bool spdk_idxd_put_channel(struct spdk_idxd_io_channel *chan) { + struct idxd_batch *batch; + bool rebalance = false; + + pthread_mutex_lock(&chan->idxd->num_channels_lock); + assert(chan->idxd->num_channels > 0); + chan->idxd->num_channels--; + if (chan->idxd->num_channels > 0) { + rebalance = true; + } + pthread_mutex_unlock(&chan->idxd->num_channels_lock); + + spdk_free(chan->completions); + spdk_free(chan->desc); + spdk_bit_array_free(&chan->ring_slots); + while ((batch = TAILQ_FIRST(&chan->batch_pool))) { + TAILQ_REMOVE(&chan->batch_pool, batch, link); + spdk_free(batch->user_completions); + spdk_free(batch->user_desc); + } + free(chan->batch_base); free(chan); + + return rebalance; } int @@ -151,7 +188,9 @@ spdk_idxd_configure_chan(struct spdk_idxd_io_channel *chan) chan->idxd->wq_id = 0; } - num_ring_slots = chan->idxd->queues[chan->idxd->wq_id].wqcfg.wq_size; + pthread_mutex_lock(&chan->idxd->num_channels_lock); + num_ring_slots = chan->idxd->queues[chan->idxd->wq_id].wqcfg.wq_size / chan->idxd->num_channels; + pthread_mutex_unlock(&chan->idxd->num_channels_lock); chan->ring_slots = spdk_bit_array_create(num_ring_slots); if (chan->ring_slots == NULL) { @@ -272,30 +311,26 @@ _idxd_drain(struct spdk_idxd_io_channel *chan) } int -spdk_idxd_reconfigure_chan(struct spdk_idxd_io_channel *chan, uint32_t num_channels) +spdk_idxd_reconfigure_chan(struct spdk_idxd_io_channel *chan) { uint32_t num_ring_slots; int rc; - struct idxd_batch *batch; _idxd_drain(chan); assert(spdk_bit_array_count_set(chan->ring_slots) == 0); - if (num_channels == 0) { - spdk_free(chan->completions); - spdk_free(chan->desc); - spdk_bit_array_free(&chan->ring_slots); - while ((batch = TAILQ_FIRST(&chan->batch_pool))) { - TAILQ_REMOVE(&chan->batch_pool, batch, link); - spdk_free(batch->user_completions); - spdk_free(batch->user_desc); - } - free(chan->batch_base); + pthread_mutex_lock(&chan->idxd->num_channels_lock); + assert(chan->idxd->num_channels > 0); + num_ring_slots = chan->ring_size / chan->idxd->num_channels; + /* If no change (ie this was a call from another thread doing its for_each_channel, + * then we can just bail now. + */ + if (num_ring_slots == chan->max_ring_slots) { + pthread_mutex_unlock(&chan->idxd->num_channels_lock); return 0; } - - num_ring_slots = chan->ring_size / num_channels; + pthread_mutex_unlock(&chan->idxd->num_channels_lock); /* re-allocate our descriptor ring for hw flow control. */ rc = spdk_bit_array_resize(&chan->ring_slots, num_ring_slots); @@ -637,6 +672,7 @@ idxd_attach(struct spdk_pci_device *device) } idxd->device = device; + pthread_mutex_init(&idxd->num_channels_lock, NULL); /* Enable PCI busmaster. */ spdk_pci_device_cfg_read32(device, &cmd_reg, 4); diff --git a/lib/idxd/idxd.h b/lib/idxd/idxd.h index bea0f52fb5..085823bff6 100644 --- a/lib/idxd/idxd.h +++ b/lib/idxd/idxd.h @@ -186,6 +186,9 @@ struct spdk_idxd_device { void *portals; int socket_id; int wq_id; + uint32_t num_channels; + bool needs_rebalance; + pthread_mutex_t num_channels_lock; struct idxd_registers registers; uint32_t ims_offset; diff --git a/lib/idxd/spdk_idxd.map b/lib/idxd/spdk_idxd.map index 4bffdf209c..4f9b7c5558 100644 --- a/lib/idxd/spdk_idxd.map +++ b/lib/idxd/spdk_idxd.map @@ -2,6 +2,7 @@ global: # public functions + spdk_idxd_device_needs_rebalance; spdk_idxd_configure_chan; spdk_idxd_reconfigure_chan; spdk_idxd_probe; diff --git a/module/accel/idxd/accel_engine_idxd.c b/module/accel/idxd/accel_engine_idxd.c index f5d1da7f3e..440ff76976 100644 --- a/module/accel/idxd/accel_engine_idxd.c +++ b/module/accel/idxd/accel_engine_idxd.c @@ -66,7 +66,6 @@ static TAILQ_HEAD(, pci_device) g_pci_devices = TAILQ_HEAD_INITIALIZER(g_pci_dev struct idxd_device { struct spdk_idxd_device *idxd; - int num_channels; TAILQ_ENTRY(idxd_device) tailq; }; static TAILQ_HEAD(, idxd_device) g_idxd_devices = TAILQ_HEAD_INITIALIZER(g_idxd_devices); @@ -74,15 +73,12 @@ static struct idxd_device *g_next_dev = NULL; struct idxd_io_channel { struct spdk_idxd_io_channel *chan; - struct spdk_idxd_device *idxd; struct idxd_device *dev; enum channel_state state; struct spdk_poller *poller; TAILQ_HEAD(, spdk_accel_task) queued_tasks; }; -pthread_mutex_t g_configuration_lock = PTHREAD_MUTEX_INITIALIZER; - static struct spdk_io_channel *idxd_get_io_channel(void); static struct idxd_device * @@ -336,18 +332,23 @@ _config_max_desc(struct spdk_io_channel_iter *i) { struct idxd_io_channel *chan; struct spdk_io_channel *ch; + struct spdk_idxd_device *idxd; int rc; ch = spdk_io_channel_iter_get_channel(i); chan = spdk_io_channel_get_ctx(ch); + idxd = spdk_io_channel_iter_get_ctx(i); - pthread_mutex_lock(&g_configuration_lock); - rc = spdk_idxd_reconfigure_chan(chan->chan, chan->dev->num_channels); - pthread_mutex_unlock(&g_configuration_lock); - if (rc == 0) { - chan->state = IDXD_CHANNEL_ACTIVE; - } else { - chan->state = IDXD_CHANNEL_ERROR; + /* reconfigure channel only if this channel is on the same idxd + * device that initiated the rebalance. + */ + if (chan->dev->idxd == idxd) { + rc = spdk_idxd_reconfigure_chan(chan->chan); + if (rc == 0) { + chan->state = IDXD_CHANNEL_ACTIVE; + } else { + chan->state = IDXD_CHANNEL_ERROR; + } } spdk_for_each_channel_continue(i, 0); @@ -359,12 +360,18 @@ _pause_chan(struct spdk_io_channel_iter *i) { struct idxd_io_channel *chan; struct spdk_io_channel *ch; + struct spdk_idxd_device *idxd; ch = spdk_io_channel_iter_get_channel(i); chan = spdk_io_channel_get_ctx(ch); + idxd = spdk_io_channel_iter_get_ctx(i); - /* start queueing up new requests. */ - chan->state = IDXD_CHANNEL_PAUSED; + /* start queueing up new requests if this channel is on the same idxd + * device that initiated the rebalance. + */ + if (chan->dev->idxd == idxd) { + chan->state = IDXD_CHANNEL_PAUSED; + } spdk_for_each_channel_continue(i, 0); } @@ -372,7 +379,11 @@ _pause_chan(struct spdk_io_channel_iter *i) static void _pause_chan_done(struct spdk_io_channel_iter *i, int status) { - spdk_for_each_channel(&idxd_accel_engine, _config_max_desc, NULL, NULL); + struct spdk_idxd_device *idxd; + + idxd = spdk_io_channel_iter_get_ctx(i); + + spdk_for_each_channel(&idxd_accel_engine, _config_max_desc, idxd, NULL); } static int @@ -401,28 +412,30 @@ idxd_create_cb(void *io_device, void *ctx_buf) * Configure the channel but leave paused until all others * are paused and re-configured based on the new number of * channels. This enables dynamic load balancing for HW - * flow control. + * flow control. The idxd device will tell us if rebalance is + * needed based on how many channels are using it. */ - pthread_mutex_lock(&g_configuration_lock); rc = spdk_idxd_configure_chan(chan->chan); if (rc) { SPDK_ERRLOG("Failed to configure new channel rc = %d\n", rc); chan->state = IDXD_CHANNEL_ERROR; spdk_poller_unregister(&chan->poller); - pthread_mutex_unlock(&g_configuration_lock); return rc; } + if (spdk_idxd_device_needs_rebalance(chan->dev->idxd) == false) { + chan->state = IDXD_CHANNEL_ACTIVE; + return 0; + } + chan->state = IDXD_CHANNEL_PAUSED; - chan->dev->num_channels++; - pthread_mutex_unlock(&g_configuration_lock); /* * Pause all channels so that we can set proper flow control * per channel. When all are paused, we'll update the max * number of descriptors allowed per channel. */ - spdk_for_each_channel(&idxd_accel_engine, _pause_chan, NULL, + spdk_for_each_channel(&idxd_accel_engine, _pause_chan, chan->dev->idxd, _pause_chan_done); return 0; @@ -431,27 +444,31 @@ idxd_create_cb(void *io_device, void *ctx_buf) static void _pause_chan_destroy_done(struct spdk_io_channel_iter *i, int status) { - /* Rebalance the rings with the smaller number of remaining channels. */ - spdk_for_each_channel(&idxd_accel_engine, _config_max_desc, NULL, NULL); + struct spdk_idxd_device *idxd; + + idxd = spdk_io_channel_iter_get_ctx(i); + + /* Rebalance the rings with the smaller number of remaining channels, but + * pass the idxd device along so its only done on shared channels. + */ + spdk_for_each_channel(&idxd_accel_engine, _config_max_desc, idxd, NULL); } static void idxd_destroy_cb(void *io_device, void *ctx_buf) { struct idxd_io_channel *chan = ctx_buf; - - pthread_mutex_lock(&g_configuration_lock); - assert(chan->dev->num_channels > 0); - chan->dev->num_channels--; - spdk_idxd_reconfigure_chan(chan->chan, 0); - pthread_mutex_unlock(&g_configuration_lock); + bool rebalance; spdk_poller_unregister(&chan->poller); - spdk_idxd_put_channel(chan->chan); + rebalance = spdk_idxd_put_channel(chan->chan); - /* Pause each channel then rebalance the max number of ring slots. */ - spdk_for_each_channel(&idxd_accel_engine, _pause_chan, NULL, - _pause_chan_destroy_done); + /* Only rebalance if there are still other channels on this device */ + if (rebalance == true) { + /* Pause each channel then rebalance the max number of ring slots. */ + spdk_for_each_channel(&idxd_accel_engine, _pause_chan, chan->dev->idxd, + _pause_chan_destroy_done); + } } static struct spdk_io_channel * diff --git a/test/unit/lib/idxd/idxd.c/idxd_ut.c b/test/unit/lib/idxd/idxd.c/idxd_ut.c index 661607bf70..c0617a237d 100644 --- a/test/unit/lib/idxd/idxd.c/idxd_ut.c +++ b/test/unit/lib/idxd/idxd.c/idxd_ut.c @@ -254,6 +254,7 @@ static int test_spdk_idxd_reconfigure_chan(void) { struct spdk_idxd_io_channel chan = {}; + struct spdk_idxd_device idxd = {}; int rc; uint32_t test_ring_size = 8; uint32_t num_channels = 2; @@ -263,8 +264,10 @@ test_spdk_idxd_reconfigure_chan(void) chan.completions = spdk_zmalloc(test_ring_size * sizeof(struct idxd_hw_desc), 0, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); SPDK_CU_ASSERT_FATAL(chan.completions != NULL); + chan.idxd = &idxd; + chan.idxd->num_channels = num_channels; - rc = spdk_idxd_reconfigure_chan(&chan, num_channels); + rc = spdk_idxd_reconfigure_chan(&chan); CU_ASSERT(rc == 0); CU_ASSERT(chan.max_ring_slots == test_ring_size / num_channels);