blob: avoid recursion when split IO immmediately complete

In some scenarios, a split IO can immediately complete.  For
example, a very large unmap operation to a newly thin-provisioned
blob has no operations to perform, so the batch for its operation
immediately completes.

But if it immediately completes, we can't recursively submit
the next split IO.  So use variables in the context structure
to detect when an operation immediately completes, to allow
it to unwind and submit the next operation without recursing.

Fixes issue #2347.

Signed-off-by: Jim Harris <james.r.harris@intel.com>
Change-Id: I8e4c121190c7d08152aa8de20cf6abc55b5edc46
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/11388
Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Community-CI: Mellanox Build Bot
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-by: Konrad Sztyber <konrad.sztyber@intel.com>
This commit is contained in:
Jim Harris 2022-02-02 16:41:57 +00:00
parent b6992a90d3
commit c5d80a8bc3

View File

@ -2533,6 +2533,9 @@ struct op_split_ctx {
void *curr_payload;
enum spdk_blob_op_type op_type;
spdk_bs_sequence_t *seq;
bool in_submit_ctx;
bool completed_in_submit_ctx;
bool done;
};
static void
@ -2542,18 +2545,40 @@ blob_request_submit_op_split_next(void *cb_arg, int bserrno)
struct spdk_blob *blob = ctx->blob;
struct spdk_io_channel *ch = ctx->channel;
enum spdk_blob_op_type op_type = ctx->op_type;
uint8_t *buf = ctx->curr_payload;
uint64_t offset = ctx->io_unit_offset;
uint64_t length = ctx->io_units_remaining;
uint8_t *buf;
uint64_t offset;
uint64_t length;
uint64_t op_length;
if (bserrno != 0 || ctx->io_units_remaining == 0) {
bs_sequence_finish(ctx->seq, bserrno);
free(ctx);
if (ctx->in_submit_ctx) {
/* Defer freeing of the ctx object, since it will be
* accessed when this unwinds back to the submisison
* context.
*/
ctx->done = true;
} else {
free(ctx);
}
return;
}
do {
if (ctx->in_submit_ctx) {
/* If this split operation completed in the context
* of its submission, mark the flag and return immediately
* to avoid recursion.
*/
ctx->completed_in_submit_ctx = true;
return;
}
while (true) {
ctx->completed_in_submit_ctx = false;
offset = ctx->io_unit_offset;
length = ctx->io_units_remaining;
buf = ctx->curr_payload;
op_length = spdk_min(length, bs_num_io_units_to_cluster_boundary(blob,
offset));
@ -2564,6 +2589,9 @@ blob_request_submit_op_split_next(void *cb_arg, int bserrno)
ctx->curr_payload += op_length * blob->bs->io_unit_size;
}
assert(!ctx->in_submit_ctx);
ctx->in_submit_ctx = true;
switch (op_type) {
case SPDK_BLOB_READ:
spdk_blob_io_read(blob, ch, buf, offset, op_length,
@ -2586,9 +2614,33 @@ blob_request_submit_op_split_next(void *cb_arg, int bserrno)
SPDK_ERRLOG("readv/write not valid\n");
bs_sequence_finish(ctx->seq, -EINVAL);
free(ctx);
break;
return;
}
} while (false);
#ifndef __clang_analyzer__
/* scan-build reports a false positive around accessing the ctx here. It
* forms a path that recursively calls this function, but then says
* "assuming ctx->in_submit_ctx is false", when that isn't possible.
* This path does free(ctx), returns to here, and reports a use-after-free
* bug. Wrapping this bit of code so that scan-build doesn't see it
* works around the scan-build bug.
*/
assert(ctx->in_submit_ctx);
ctx->in_submit_ctx = false;
/* If the operation completed immediately, loop back and submit the
* next operation. Otherwise we can return and the next split
* operation will get submitted when this current operation is
* later completed asynchronously.
*/
if (ctx->completed_in_submit_ctx) {
continue;
} else if (ctx->done) {
free(ctx);
}
#endif
break;
}
}
static void