bdev_rbd: do final clean up on main_td

The bdev layer doesn't call the destruct callback until
all channels have been released, but because the channel
delete callback passes message to the main thread, we can
end up with a complicated race condition.  Currently we
have a deferred_free code path to handle this race, but
we can handle this a bit more cleanly by doing the
construct operation on the main_td as well.

This also simplifies the next patch which will
asynchronously destruct the bdev to fix an RPC bug.

Here's the race:

1) first channel was created on thread A, so disk->main_td = thread A
2) second channel was created on thread B
3) first channel is freed (but disk->main_td is still thread A)
4) spdk_bdev_unregister is called on thread C
5) bdev layer gives callback on thread B to upper layer
6) upper layer on thread B frees channel
7) bdev_rbd_destroy_cb runs on thread B and has to send msg to thread A
   for processing
8) bdev layer calls bdev_rbd_destruct on thread C (since step #4 was on
   thread C)

Signed-off-by: Jim Harris <james.r.harris@intel.com>
Change-Id: I25ede2dc56e24dac0919aed05b9def2560823ee7
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/9158
Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Community-CI: Mellanox Build Bot
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-by: Ziye Yang <ziye.yang@intel.com>
This commit is contained in:
Jim Harris 2021-08-11 20:23:43 +00:00 committed by Tomasz Zawadzki
parent e71ae46797
commit a70aae4610

View File

@ -77,7 +77,6 @@ struct bdev_rbd {
uint32_t ch_count;
struct bdev_rbd_group_channel *group_ch;
bool deferred_free;
TAILQ_ENTRY(bdev_rbd) tailq;
struct spdk_poller *reset_timer;
struct spdk_bdev_io *reset_bdev_io;
@ -502,24 +501,40 @@ bdev_rbd_free_cb(void *io_device)
struct bdev_rbd *rbd = io_device;
assert(rbd != NULL);
assert(rbd->ch_count == 0);
pthread_mutex_lock(&rbd->mutex);
bdev_rbd_free(rbd);
}
if (rbd->ch_count != 0) {
rbd->deferred_free = true;
pthread_mutex_unlock(&rbd->mutex);
} else {
pthread_mutex_unlock(&rbd->mutex);
bdev_rbd_free((struct bdev_rbd *)rbd);
}
static void
_bdev_rbd_destruct(void *ctx)
{
struct bdev_rbd *rbd = ctx;
spdk_io_device_unregister(rbd, bdev_rbd_free_cb);
}
static int
bdev_rbd_destruct(void *ctx)
{
struct bdev_rbd *rbd = ctx;
struct spdk_thread *td;
spdk_io_device_unregister(rbd, bdev_rbd_free_cb);
if (rbd->main_td == NULL) {
td = spdk_get_thread();
} else {
td = rbd->main_td;
}
/* Start the destruct operation on the rbd bdev's
* main thread. This guarantees it will only start
* executing after any messages related to channel
* deletions have finished completing. *Always*
* send a message, even if this function gets called
* from the main thread, in case there are pending
* channel delete messages in flight to this thread.
*/
spdk_thread_send_msg(td, _bdev_rbd_destruct, rbd);
return 0;
}
@ -756,7 +771,6 @@ static void
_bdev_rbd_destroy_cb(void *ctx)
{
struct bdev_rbd *disk = ctx;
bool deferred_free;
pthread_mutex_lock(&disk->mutex);
assert(disk->ch_count > 0);
@ -769,15 +783,7 @@ _bdev_rbd_destroy_cb(void *ctx)
}
bdev_rbd_free_channel_resources(disk);
deferred_free = disk->deferred_free;
pthread_mutex_unlock(&disk->mutex);
/* Need to free rbd structure if there is deferred_free case
* by the bdev_rbd_destruct function */
if (deferred_free) {
bdev_rbd_free(disk);
}
}
static void