From 284aca9e36208dc1b2d6b4504e7797e6700de31c Mon Sep 17 00:00:00 2001 From: Shuhei Matsumoto Date: Wed, 7 Aug 2019 08:53:26 +0900 Subject: [PATCH] bdev/raid: Fix race issue among multiple threads to free RAID bdev The following issue was observed. The first thread returned the last IO channel and the second thread then removed the first base device, but raid_bdev_cleanup() was called before raid_bdev_destroy_cb() was called. raid_bdev_destroy_cb() was accessed to the raid bdev already freed by raid_bdev_cleanup() and caused segmentation fault. The call sequence was as follows: The first thread: spdk_put_io_channel() -> ch->destroy_cb -> raid_bdev_destroy_cb -> access raid bdev The second thread: raid_bdev_remove_base_devices() -> raid_bdev_deconfigure() -> spdk_bdev_unregister() -> spdk_io_device_unregister() -> spdk_bdev_destroy_cb() -> raid_bdev_destruct() -> raid_bdev_cleanup() -> free raid bdev The fix is to hold number of created channels in struct raid_bdev_io_channel and use it in raid_bdev_destroy_cb(). Bdev layer, IO device/channel layer, and NVMe-oF layer already process this case correctly. Fixes #884. Reported-by: yidong0635 Change-Id: Ie9d61bdddca479ce7f491ff9a08db45e71f16a8d Signed-off-by: yidong0635 Signed-off-by: Shuhei Matsumoto Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/463249 Reviewed-by: Broadcom SPDK FC-NVMe CI Reviewed-by: Changpeng Liu Reviewed-by: Seth Howell Reviewed-by: Ben Walker Reviewed-by: Jim Harris Tested-by: SPDK CI Jenkins --- lib/bdev/raid/bdev_raid.c | 12 ++++++------ lib/bdev/raid/bdev_raid.h | 5 ++++- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/lib/bdev/raid/bdev_raid.c b/lib/bdev/raid/bdev_raid.c index 2f0aa1e49b..a5fd7790d7 100644 --- a/lib/bdev/raid/bdev_raid.c +++ b/lib/bdev/raid/bdev_raid.c @@ -96,13 +96,15 @@ raid_bdev_create_cb(void *io_device, void *ctx_buf) assert(raid_bdev != NULL); assert(raid_bdev->state == RAID_BDEV_STATE_ONLINE); - raid_ch->base_channel = calloc(raid_bdev->num_base_bdevs, + raid_ch->num_channels = raid_bdev->num_base_bdevs; + + raid_ch->base_channel = calloc(raid_ch->num_channels, sizeof(struct spdk_io_channel *)); if (!raid_ch->base_channel) { SPDK_ERRLOG("Unable to allocate base bdevs io channel\n"); return -ENOMEM; } - for (uint32_t i = 0; i < raid_bdev->num_base_bdevs; i++) { + for (uint8_t i = 0; i < raid_ch->num_channels; i++) { /* * Get the spdk_io_channel for all the base bdevs. This is used during * split logic to send the respective child bdev ios to respective base @@ -111,7 +113,7 @@ raid_bdev_create_cb(void *io_device, void *ctx_buf) raid_ch->base_channel[i] = spdk_bdev_get_io_channel( raid_bdev->base_bdev_info[i].desc); if (!raid_ch->base_channel[i]) { - for (uint32_t j = 0; j < i; j++) { + for (uint8_t j = 0; j < i; j++) { spdk_put_io_channel(raid_ch->base_channel[j]); } free(raid_ch->base_channel); @@ -138,14 +140,12 @@ static void raid_bdev_destroy_cb(void *io_device, void *ctx_buf) { struct raid_bdev_io_channel *raid_ch = ctx_buf; - struct raid_bdev *raid_bdev = io_device; SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_destroy_cb\n"); - assert(raid_bdev != NULL); assert(raid_ch != NULL); assert(raid_ch->base_channel); - for (uint32_t i = 0; i < raid_bdev->num_base_bdevs; i++) { + for (uint8_t i = 0; i < raid_ch->num_channels; i++) { /* Free base bdev channels */ assert(raid_ch->base_channel[i] != NULL); spdk_put_io_channel(raid_ch->base_channel[i]); diff --git a/lib/bdev/raid/bdev_raid.h b/lib/bdev/raid/bdev_raid.h index 84cff9c5ea..6fa0fe4fed 100644 --- a/lib/bdev/raid/bdev_raid.h +++ b/lib/bdev/raid/bdev_raid.h @@ -202,7 +202,10 @@ struct raid_config { */ struct raid_bdev_io_channel { /* Array of IO channels of base bdevs */ - struct spdk_io_channel **base_channel; + struct spdk_io_channel **base_channel; + + /* Number of IO channels */ + uint8_t num_channels; }; /* TAIL heads for various raid bdev lists */