From 5c0160263cdc873e090d155d13af018ae283cbd5 Mon Sep 17 00:00:00 2001 From: Ziye Yang Date: Thu, 22 Apr 2021 21:51:42 +0800 Subject: [PATCH] bdev/rbd: Revise bdev_rbd_create rpc function. Revise bdev_rbd_create rpc call to add an optional parameter "--cluster-name", e.g., "--cluster-name Rados". Then users can create a rbd bdev with registered Rados Cluster. This shared strategy can be used to remove the thread creation overhead if multiple rbds are connected to the same Ceph cluster. Signed-off-by: Ziye Yang Change-Id: Ide5800f8fc6b2074805272a59731c666fe279b9a Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/7584 Community-CI: Broadcom CI Community-CI: Mellanox Build Bot Reviewed-by: Changpeng Liu Reviewed-by: Shuhei Matsumoto Tested-by: SPDK CI Jenkins --- CHANGELOG.md | 3 + doc/jsonrpc.md | 32 +++++++ module/bdev/rbd/bdev_rbd.c | 159 ++++++++++++++++++++++++++++++--- module/bdev/rbd/bdev_rbd.h | 2 +- module/bdev/rbd/bdev_rbd_rpc.c | 7 +- scripts/rpc.py | 4 +- scripts/rpc/bdev.py | 5 +- test/iscsi_tgt/rbd/rbd.sh | 4 +- 8 files changed, 196 insertions(+), 20 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4c91c9c02f..e2a41fe9f8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,9 @@ New RPC `bdev_rbd_register_cluster` and `bdev_rbd_unregister_cluster` was added, and delete the rados object cluster, then users can choose the cluster to create related rbd device. +Revised `bdev_rbd_create` parameter, it allows to use an optional parameter --cluster-name +to create a rbd bdev with an already registered Rados Cluster Object. + ## v21.04: ### accel diff --git a/doc/jsonrpc.md b/doc/jsonrpc.md index 6c0e885140..264b0f33f0 100644 --- a/doc/jsonrpc.md +++ b/doc/jsonrpc.md @@ -3421,6 +3421,7 @@ pool_name | Required | string | Pool name rbd_name | Required | string | Image name block_size | Required | number | Block size config | Optional | string map | Explicit librados configuration +cluster_name | Optional | string | Rados cluster object name created in this module. If no config is specified, Ceph configuration files must exist with all relevant settings for accessing the pool. If a config map is @@ -3432,6 +3433,10 @@ secret key stored in Ceph keyrings) are enough. When accessing the image as some user other than "admin" (the default), the "user_id" has to be set. +If provided with cluster_name option, it will use the Rados cluster object +referenced by the name (created by bdev_rbd_register_cluster RPC) and ignores +"user_id + config" combination to create its own Rados cluster. + ### Result Name of newly created bdev. @@ -3468,6 +3473,33 @@ response: } ~~~ +Example request with `cluster_name`: + +~~ +{ + "params": { + "pool_name": "rbd", + "rbd_name": "foo", + "block_size": 4096, + "cluster_name": "rbd_cluster" + }, + "jsonrpc": "2.0", + "method": "bdev_rbd_create", + "id": 1 +} +~~ + +Example response: + +~~ +response: +{ + "jsonrpc": "2.0", + "id": 1, + "result": "Ceph0" +} +~~ + ## bdev_rbd_delete {#rpc_bdev_rbd_delete} Delete @ref bdev_config_rbd bdev diff --git a/module/bdev/rbd/bdev_rbd.c b/module/bdev/rbd/bdev_rbd.c index 76373b6266..ff981cb4eb 100644 --- a/module/bdev/rbd/bdev_rbd.c +++ b/module/bdev/rbd/bdev_rbd.c @@ -63,6 +63,8 @@ struct bdev_rbd { char *pool_name; char **config; rados_t cluster; + rados_t *cluster_p; + char *cluster_name; rbd_image_info_t info; TAILQ_ENTRY(bdev_rbd) tailq; struct spdk_poller *reset_timer; @@ -112,6 +114,35 @@ bdev_rbd_cluster_free(struct bdev_rbd_cluster *entry) free(entry); } +static void +bdev_rbd_put_cluster(rados_t **cluster) +{ + struct bdev_rbd_cluster *entry; + + assert(cluster != NULL); + + /* No need go through the map if *cluster equals to NULL */ + if (*cluster == NULL) { + return; + } + + pthread_mutex_lock(&g_map_bdev_rbd_cluster_mutex); + STAILQ_FOREACH(entry, &g_map_bdev_rbd_cluster, link) { + if (*cluster != &entry->cluster) { + continue; + } + + assert(entry->ref > 0); + entry->ref--; + *cluster = NULL; + pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex); + return; + } + + pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex); + SPDK_ERRLOG("Cannot find the entry for cluster=%p\n", cluster); +} + static void bdev_rbd_free(struct bdev_rbd *rbd) { @@ -119,15 +150,19 @@ bdev_rbd_free(struct bdev_rbd *rbd) return; } - if (rbd->cluster) { - rados_shutdown(rbd->cluster); - } - free(rbd->disk.name); free(rbd->rbd_name); free(rbd->user_id); free(rbd->pool_name); bdev_rbd_free_config(rbd->config); + + if (rbd->cluster_name) { + bdev_rbd_put_cluster(&rbd->cluster_p); + free(rbd->cluster_name); + } else if (rbd->cluster) { + rados_shutdown(rbd->cluster); + } + free(rbd); } @@ -209,11 +244,49 @@ bdev_rados_cluster_init(const char *user_id, const char *const *config, return 0; } +static int +bdev_rbd_get_cluster(const char *cluster_name, rados_t **cluster) +{ + struct bdev_rbd_cluster *entry; + + if (cluster == NULL) { + SPDK_ERRLOG("cluster should not be NULL\n"); + return -1; + } + + pthread_mutex_lock(&g_map_bdev_rbd_cluster_mutex); + STAILQ_FOREACH(entry, &g_map_bdev_rbd_cluster, link) { + if (strncmp(cluster_name, entry->name, strlen(entry->name)) == 0) { + entry->ref++; + *cluster = &entry->cluster; + pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex); + return 0; + } + } + + pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex); + return -1; +} + +static int +bdev_rbd_shared_cluster_init(const char *cluster_name, rados_t **cluster) +{ + int ret; + + ret = bdev_rbd_get_cluster(cluster_name, cluster); + if (ret < 0) { + SPDK_ERRLOG("Failed to create rados_t struct\n"); + return -1; + } + + return ret; +} + static void * bdev_rbd_cluster_handle(void *arg) { - struct bdev_rbd *rbd = arg; void *ret = arg; + struct bdev_rbd *rbd = arg; int rc; rc = bdev_rados_cluster_init(rbd->user_id, (const char *const *)rbd->config, @@ -234,14 +307,24 @@ bdev_rbd_init(struct bdev_rbd *rbd) rados_ioctx_t io_ctx = NULL; rbd_image_t image = NULL; - /* Cluster should be created in non-SPDK thread to avoid conflict between - * Rados and SPDK thread */ - if (spdk_call_unaffinitized(bdev_rbd_cluster_handle, rbd) == NULL) { - SPDK_ERRLOG("Cannot create the rados object on rbd=%p\n", rbd); - return -1; + if (!rbd->cluster_name) { + rbd->cluster_p = &rbd->cluster; + /* Cluster should be created in non-SPDK thread to avoid conflict between + * Rados and SPDK thread */ + if (spdk_call_unaffinitized(bdev_rbd_cluster_handle, rbd) == NULL) { + SPDK_ERRLOG("Cannot create the rados object on rbd=%p\n", rbd); + return -1; + } + } else { + ret = bdev_rbd_shared_cluster_init(rbd->cluster_name, &rbd->cluster_p); + if (ret < 0) { + SPDK_ERRLOG("Failed to create rados object for rbd =%p on cluster_name=%s\n", + rbd, rbd->cluster_name); + return -1; + } } - ret = rados_ioctx_create(rbd->cluster, rbd->pool_name, &io_ctx); + ret = rados_ioctx_create(*(rbd->cluster_p), rbd->pool_name, &io_ctx); if (ret < 0) { SPDK_ERRLOG("Failed to create ioctx\n"); return -1; @@ -514,9 +597,9 @@ bdev_rbd_handle(void *arg) struct bdev_rbd_io_channel *ch = arg; void *ret = arg; - assert(ch->disk->cluster != NULL); + assert(ch->disk->cluster_p != NULL); - if (rados_ioctx_create(ch->disk->cluster, ch->disk->pool_name, &ch->io_ctx) < 0) { + if (rados_ioctx_create(*(ch->disk->cluster_p), ch->disk->pool_name, &ch->io_ctx) < 0) { SPDK_ERRLOG("Failed to create ioctx\n"); ret = NULL; return ret; @@ -602,6 +685,40 @@ bdev_rbd_get_io_channel(void *ctx) return spdk_get_io_channel(rbd_bdev); } +static void +bdev_rbd_cluster_dump_entry(const char *cluster_name, struct spdk_json_write_ctx *w) +{ + struct bdev_rbd_cluster *entry; + + pthread_mutex_lock(&g_map_bdev_rbd_cluster_mutex); + STAILQ_FOREACH(entry, &g_map_bdev_rbd_cluster, link) { + if (strncmp(cluster_name, entry->name, strlen(entry->name))) { + continue; + } + if (entry->user_id) { + spdk_json_write_named_string(w, "user_id", entry->user_id); + } + + if (entry->config_param) { + char **config_entry = entry->config_param; + + spdk_json_write_named_object_begin(w, "config_param"); + while (*config_entry) { + spdk_json_write_named_string(w, config_entry[0], config_entry[1]); + config_entry += 2; + } + spdk_json_write_object_end(w); + } else if (entry->config_file) { + spdk_json_write_named_string(w, "config_file", entry->config_file); + } + + pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex); + return; + } + + pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex); +} + static int bdev_rbd_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) { @@ -613,6 +730,11 @@ bdev_rbd_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) spdk_json_write_named_string(w, "rbd_name", rbd_bdev->rbd_name); + if (rbd_bdev->cluster_name) { + bdev_rbd_cluster_dump_entry(rbd_bdev->cluster_name, w); + goto end; + } + if (rbd_bdev->user_id) { spdk_json_write_named_string(w, "user_id", rbd_bdev->user_id); } @@ -628,6 +750,7 @@ bdev_rbd_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) spdk_json_write_object_end(w); } +end: spdk_json_write_object_end(w); return 0; @@ -842,7 +965,8 @@ bdev_rbd_create(struct spdk_bdev **bdev, const char *name, const char *user_id, const char *pool_name, const char *const *config, const char *rbd_name, - uint32_t block_size) + uint32_t block_size, + const char *cluster_name) { struct bdev_rbd *rbd; int ret; @@ -871,6 +995,13 @@ bdev_rbd_create(struct spdk_bdev **bdev, const char *name, const char *user_id, } } + if (cluster_name) { + rbd->cluster_name = strdup(cluster_name); + if (!rbd->cluster_name) { + bdev_rbd_free(rbd); + return -ENOMEM; + } + } rbd->pool_name = strdup(pool_name); if (!rbd->pool_name) { bdev_rbd_free(rbd); diff --git a/module/bdev/rbd/bdev_rbd.h b/module/bdev/rbd/bdev_rbd.h index 44c88d436f..4f6f0bad36 100644 --- a/module/bdev/rbd/bdev_rbd.h +++ b/module/bdev/rbd/bdev_rbd.h @@ -53,7 +53,7 @@ typedef void (*spdk_delete_rbd_complete)(void *cb_arg, int bdeverrno); int bdev_rbd_create(struct spdk_bdev **bdev, const char *name, const char *user_id, const char *pool_name, const char *const *config, - const char *rbd_name, uint32_t block_size); + const char *rbd_name, uint32_t block_size, const char *cluster_name); /** * Delete rbd bdev. * diff --git a/module/bdev/rbd/bdev_rbd_rpc.c b/module/bdev/rbd/bdev_rbd_rpc.c index 24e5042c8a..8eee4400fb 100644 --- a/module/bdev/rbd/bdev_rbd_rpc.c +++ b/module/bdev/rbd/bdev_rbd_rpc.c @@ -44,6 +44,7 @@ struct rpc_create_rbd { char *rbd_name; uint32_t block_size; char **config; + char *cluster_name; }; static void @@ -54,6 +55,7 @@ free_rpc_create_rbd(struct rpc_create_rbd *req) free(req->pool_name); free(req->rbd_name); bdev_rbd_free_config(req->config); + free(req->cluster_name); } static int @@ -104,7 +106,8 @@ static const struct spdk_json_object_decoder rpc_create_rbd_decoders[] = { {"pool_name", offsetof(struct rpc_create_rbd, pool_name), spdk_json_decode_string}, {"rbd_name", offsetof(struct rpc_create_rbd, rbd_name), spdk_json_decode_string}, {"block_size", offsetof(struct rpc_create_rbd, block_size), spdk_json_decode_uint32}, - {"config", offsetof(struct rpc_create_rbd, config), bdev_rbd_decode_config, true} + {"config", offsetof(struct rpc_create_rbd, config), bdev_rbd_decode_config, true}, + {"cluster_name", offsetof(struct rpc_create_rbd, cluster_name), spdk_json_decode_string, true} }; static void @@ -128,7 +131,7 @@ rpc_bdev_rbd_create(struct spdk_jsonrpc_request *request, rc = bdev_rbd_create(&bdev, req.name, req.user_id, req.pool_name, (const char *const *)req.config, req.rbd_name, - req.block_size); + req.block_size, req.cluster_name); if (rc) { spdk_jsonrpc_send_error_response(request, rc, spdk_strerror(-rc)); goto cleanup; diff --git a/scripts/rpc.py b/scripts/rpc.py index 28824ab9f2..47cb5b0878 100755 --- a/scripts/rpc.py +++ b/scripts/rpc.py @@ -676,7 +676,8 @@ if __name__ == "__main__": config=config, pool_name=args.pool_name, rbd_name=args.rbd_name, - block_size=args.block_size)) + block_size=args.block_size, + cluster_name=args.cluster_name)) p = subparsers.add_parser('bdev_rbd_create', aliases=['construct_rbd_bdev'], help='Add a bdev with ceph rbd backend') @@ -687,6 +688,7 @@ if __name__ == "__main__": p.add_argument('pool_name', help='rbd pool name') p.add_argument('rbd_name', help='rbd image name') p.add_argument('block_size', help='rbd block size', type=int) + p.add_argument('-c', '--cluster_name', help="cluster name to identify the Rados cluster", required=False) p.set_defaults(func=bdev_rbd_create) def bdev_rbd_delete(args): diff --git a/scripts/rpc/bdev.py b/scripts/rpc/bdev.py index b30b5c32a8..4f75cb81a8 100644 --- a/scripts/rpc/bdev.py +++ b/scripts/rpc/bdev.py @@ -694,7 +694,7 @@ def bdev_rbd_unregister_cluster(client, name): @deprecated_alias('construct_rbd_bdev') -def bdev_rbd_create(client, pool_name, rbd_name, block_size, name=None, user=None, config=None): +def bdev_rbd_create(client, pool_name, rbd_name, block_size, name=None, user=None, config=None, cluster_name=None): """Create a Ceph RBD block device. Args: @@ -704,6 +704,7 @@ def bdev_rbd_create(client, pool_name, rbd_name, block_size, name=None, user=Non name: name of block device (optional) user: Ceph user name (optional) config: map of config keys to values (optional) + cluster_name: Name to identify Rados cluster (optional) Returns: Name of created block device. @@ -720,6 +721,8 @@ def bdev_rbd_create(client, pool_name, rbd_name, block_size, name=None, user=Non params['user_id'] = user if config is not None: params['config'] = config + if cluster_name is not None: + params['cluster_name'] = cluster_name return client.call('bdev_rbd_create', params) diff --git a/test/iscsi_tgt/rbd/rbd.sh b/test/iscsi_tgt/rbd/rbd.sh index 8a2a9a7021..607ef95614 100755 --- a/test/iscsi_tgt/rbd/rbd.sh +++ b/test/iscsi_tgt/rbd/rbd.sh @@ -31,7 +31,8 @@ timing_exit start_iscsi_tgt $rpc_py iscsi_create_portal_group $PORTAL_TAG $TARGET_IP:$ISCSI_PORT $rpc_py iscsi_create_initiator_group $INITIATOR_TAG $INITIATOR_NAME $NETMASK -rbd_bdev="$($rpc_py bdev_rbd_create $RBD_POOL $RBD_NAME 4096)" +rbd_cluster_name="$($rpc_py bdev_rbd_register_cluster iscsi_rbd_cluster)" +rbd_bdev="$($rpc_py bdev_rbd_create $RBD_POOL $RBD_NAME 4096 -c $rbd_cluster_name)" $rpc_py bdev_get_bdevs $rpc_py bdev_rbd_resize $rbd_bdev 2000 @@ -64,6 +65,7 @@ trap - SIGINT SIGTERM EXIT iscsicleanup $rpc_py bdev_rbd_delete $rbd_bdev +$rpc_py bdev_rbd_unregister_cluster $rbd_cluster_name killprocess $pid rbd_cleanup