From a70aae4610b04c572a45f466b299697259b9a374 Mon Sep 17 00:00:00 2001 From: Jim Harris Date: Wed, 11 Aug 2021 20:23:43 +0000 Subject: [PATCH] bdev_rbd: do final clean up on main_td The bdev layer doesn't call the destruct callback until all channels have been released, but because the channel delete callback passes message to the main thread, we can end up with a complicated race condition. Currently we have a deferred_free code path to handle this race, but we can handle this a bit more cleanly by doing the construct operation on the main_td as well. This also simplifies the next patch which will asynchronously destruct the bdev to fix an RPC bug. Here's the race: 1) first channel was created on thread A, so disk->main_td = thread A 2) second channel was created on thread B 3) first channel is freed (but disk->main_td is still thread A) 4) spdk_bdev_unregister is called on thread C 5) bdev layer gives callback on thread B to upper layer 6) upper layer on thread B frees channel 7) bdev_rbd_destroy_cb runs on thread B and has to send msg to thread A for processing 8) bdev layer calls bdev_rbd_destruct on thread C (since step #4 was on thread C) Signed-off-by: Jim Harris Change-Id: I25ede2dc56e24dac0919aed05b9def2560823ee7 Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/9158 Community-CI: Broadcom CI Community-CI: Mellanox Build Bot Tested-by: SPDK CI Jenkins Reviewed-by: Changpeng Liu Reviewed-by: Ben Walker Reviewed-by: Ziye Yang --- module/bdev/rbd/bdev_rbd.c | 44 ++++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/module/bdev/rbd/bdev_rbd.c b/module/bdev/rbd/bdev_rbd.c index 7830aa63e0..ee4e7027c5 100644 --- a/module/bdev/rbd/bdev_rbd.c +++ b/module/bdev/rbd/bdev_rbd.c @@ -77,7 +77,6 @@ struct bdev_rbd { uint32_t ch_count; struct bdev_rbd_group_channel *group_ch; - bool deferred_free; TAILQ_ENTRY(bdev_rbd) tailq; struct spdk_poller *reset_timer; struct spdk_bdev_io *reset_bdev_io; @@ -502,24 +501,40 @@ bdev_rbd_free_cb(void *io_device) struct bdev_rbd *rbd = io_device; assert(rbd != NULL); + assert(rbd->ch_count == 0); - pthread_mutex_lock(&rbd->mutex); + bdev_rbd_free(rbd); +} - if (rbd->ch_count != 0) { - rbd->deferred_free = true; - pthread_mutex_unlock(&rbd->mutex); - } else { - pthread_mutex_unlock(&rbd->mutex); - bdev_rbd_free((struct bdev_rbd *)rbd); - } +static void +_bdev_rbd_destruct(void *ctx) +{ + struct bdev_rbd *rbd = ctx; + + spdk_io_device_unregister(rbd, bdev_rbd_free_cb); } static int bdev_rbd_destruct(void *ctx) { struct bdev_rbd *rbd = ctx; + struct spdk_thread *td; - spdk_io_device_unregister(rbd, bdev_rbd_free_cb); + if (rbd->main_td == NULL) { + td = spdk_get_thread(); + } else { + td = rbd->main_td; + } + + /* Start the destruct operation on the rbd bdev's + * main thread. This guarantees it will only start + * executing after any messages related to channel + * deletions have finished completing. *Always* + * send a message, even if this function gets called + * from the main thread, in case there are pending + * channel delete messages in flight to this thread. + */ + spdk_thread_send_msg(td, _bdev_rbd_destruct, rbd); return 0; } @@ -756,7 +771,6 @@ static void _bdev_rbd_destroy_cb(void *ctx) { struct bdev_rbd *disk = ctx; - bool deferred_free; pthread_mutex_lock(&disk->mutex); assert(disk->ch_count > 0); @@ -769,15 +783,7 @@ _bdev_rbd_destroy_cb(void *ctx) } bdev_rbd_free_channel_resources(disk); - - deferred_free = disk->deferred_free; pthread_mutex_unlock(&disk->mutex); - - /* Need to free rbd structure if there is deferred_free case - * by the bdev_rbd_destruct function */ - if (deferred_free) { - bdev_rbd_free(disk); - } } static void