perf: Add option to display perf cores usage

Run perf tool with parameter "-m" to display real-time overall cpu usage on used cores.

Signed-off-by: Jaylyn Ren <jaylyn.ren@arm.com>
Change-Id: I1f60315ac1e1ed1dfd238eb0a42a4b93c773e21e
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/6444
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Aleksey Marchuk <alexeymar@mellanox.com>
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Community-CI: Mellanox Build Bot
This commit is contained in:
Jaylyn Ren 2021-02-17 22:50:17 +08:00 committed by Tomasz Zawadzki
parent 83adcc14c9
commit c2a4702d7b

View File

@ -136,6 +136,11 @@ struct ns_worker_stats {
uint64_t total_tsc;
uint64_t min_tsc;
uint64_t max_tsc;
uint64_t last_tsc;
uint64_t busy_tsc;
uint64_t idle_tsc;
uint64_t last_busy_tsc;
uint64_t last_idle_tsc;
};
struct ns_worker_ctx {
@ -203,7 +208,7 @@ struct ns_fn_table {
int (*submit_io)(struct perf_task *task, struct ns_worker_ctx *ns_ctx,
struct ns_entry *entry, uint64_t offset_in_ios);
void (*check_io)(struct ns_worker_ctx *ns_ctx);
int64_t (*check_io)(struct ns_worker_ctx *ns_ctx);
void (*verify_io)(struct perf_task *task, struct ns_entry *entry);
@ -231,6 +236,8 @@ static pthread_barrier_t g_worker_sync_barrier;
static uint64_t g_tsc_rate;
static bool g_monitor_perf_cores = false;
static uint32_t g_io_align = 0x200;
static bool g_io_align_specified;
static uint32_t g_io_size_bytes;
@ -447,10 +454,10 @@ uring_submit_io(struct perf_task *task, struct ns_worker_ctx *ns_ctx,
return 0;
}
static void
static int64_t
uring_check_io(struct ns_worker_ctx *ns_ctx)
{
int i, count, to_complete, to_submit, ret = 0;
int i, to_complete, to_submit, count = 0, ret = 0;
struct perf_task *task;
to_submit = ns_ctx->u.uring.io_pending;
@ -460,7 +467,7 @@ uring_check_io(struct ns_worker_ctx *ns_ctx)
* It will automatically call spdk_io_uring_enter appropriately. */
ret = io_uring_submit(&ns_ctx->u.uring.ring);
if (ret < 0) {
return;
return -1;
}
ns_ctx->u.uring.io_pending = 0;
ns_ctx->u.uring.io_inflight += to_submit;
@ -481,6 +488,7 @@ uring_check_io(struct ns_worker_ctx *ns_ctx)
task_complete(task);
}
}
return count;
}
static void
@ -580,7 +588,7 @@ aio_submit_io(struct perf_task *task, struct ns_worker_ctx *ns_ctx,
}
}
static void
static int64_t
aio_check_io(struct ns_worker_ctx *ns_ctx)
{
int count, i;
@ -598,6 +606,7 @@ aio_check_io(struct ns_worker_ctx *ns_ctx)
for (i = 0; i < count; i++) {
task_complete(ns_ctx->u.aio.events[i].data);
}
return count;
}
static void
@ -884,7 +893,7 @@ perf_disconnect_cb(struct spdk_nvme_qpair *qpair, void *ctx)
}
static void
static int64_t
nvme_check_io(struct ns_worker_ctx *ns_ctx)
{
int64_t rc;
@ -894,6 +903,7 @@ nvme_check_io(struct ns_worker_ctx *ns_ctx)
fprintf(stderr, "NVMe io qpair process completion error\n");
exit(1);
}
return rc;
}
static void
@ -1404,6 +1414,11 @@ print_periodic_performance(bool warmup)
double mb_this_second;
struct worker_thread *worker;
struct ns_worker_ctx *ns_ctx;
uint64_t busy_tsc;
uint64_t idle_tsc;
uint64_t core_busy_tsc = 0;
uint64_t core_idle_tsc = 0;
double core_busy_perc = 0;
if (!isatty(STDOUT_FILENO)) {
/* Don't print periodic stats if output is not going
@ -1411,17 +1426,34 @@ print_periodic_performance(bool warmup)
*/
return;
}
io_this_second = 0;
TAILQ_FOREACH(worker, &g_workers, link) {
busy_tsc = 0;
idle_tsc = 0;
TAILQ_FOREACH(ns_ctx, &worker->ns_ctx, link) {
io_this_second += ns_ctx->stats.io_completed - ns_ctx->stats.last_io_completed;
ns_ctx->stats.last_io_completed = ns_ctx->stats.io_completed;
if (g_monitor_perf_cores) {
busy_tsc += ns_ctx->stats.busy_tsc - ns_ctx->stats.last_busy_tsc;
idle_tsc += ns_ctx->stats.idle_tsc - ns_ctx->stats.last_idle_tsc;
ns_ctx->stats.last_busy_tsc = ns_ctx->stats.busy_tsc;
ns_ctx->stats.last_idle_tsc = ns_ctx->stats.idle_tsc;
}
}
if (g_monitor_perf_cores) {
core_busy_tsc += busy_tsc;
core_idle_tsc += idle_tsc;
core_busy_perc += (double)core_busy_tsc / (core_idle_tsc + core_busy_tsc) * 100;
}
}
mb_this_second = (double)io_this_second * g_io_size_bytes / (1024 * 1024);
printf("%s%9ju IOPS, %8.2f MiB/s\r", warmup ? "[warmup] " : "", io_this_second, mb_this_second);
printf("%s%9ju IOPS, %8.2f MiB/s", warmup ? "[warmup] " : "", io_this_second, mb_this_second);
if (g_monitor_perf_cores) {
printf("%3d Core(s): %6.2f%% Busy", g_num_workers, core_busy_perc);
}
printf("\r");
fflush(stdout);
}
@ -1434,6 +1466,8 @@ work_fn(void *arg)
uint32_t unfinished_ns_ctx;
bool warmup = false;
int rc;
int64_t check_rc;
uint64_t check_now;
/* Allocate queue pairs for each namespace. */
TAILQ_FOREACH(ns_ctx, &worker->ns_ctx, link) {
@ -1473,7 +1507,15 @@ work_fn(void *arg)
* to replace each I/O that is completed.
*/
TAILQ_FOREACH(ns_ctx, &worker->ns_ctx, link) {
ns_ctx->entry->fn_table->check_io(ns_ctx);
check_now = spdk_get_ticks();
check_rc = ns_ctx->entry->fn_table->check_io(ns_ctx);
if (check_rc > 0) {
ns_ctx->stats.busy_tsc += check_now - ns_ctx->stats.last_tsc;
} else {
ns_ctx->stats.idle_tsc += check_now - ns_ctx->stats.last_tsc;
}
ns_ctx->stats.last_tsc = check_now;
}
tsc_current = spdk_get_ticks();
@ -1590,6 +1632,7 @@ static void usage(char *program_name)
printf("\t[-A IO buffer alignment. Must be power of 2 and not less than cache line (%u)]\n",
SPDK_CACHE_LINE_SIZE);
printf("\t[-S set the default sock impl, e.g. \"posix\"]\n");
printf("\t[-m display real-time overall cpu usage on used cores]\n");
#ifdef SPDK_CONFIG_URING
printf("\t[-R enable using liburing to drive kernel devices (Default: libaio)]\n");
#endif
@ -2022,7 +2065,7 @@ parse_args(int argc, char **argv)
int rc;
while ((op = getopt(argc, argv,
"a:b:c:e:gi:lo:q:r:k:s:t:w:z:A:C:DGHILM:NO:P:Q:RS:T:U:VZ:")) != -1) {
"a:b:c:e:gi:lmo:q:r:k:s:t:w:z:A:C:DGHILM:NO:P:Q:RS:T:U:VZ:")) != -1) {
switch (op) {
case 'a':
case 'A':
@ -2117,6 +2160,9 @@ parse_args(int argc, char **argv)
case 'l':
g_latency_ssd_tracking_enable = true;
break;
case 'm':
g_monitor_perf_cores = true;
break;
case 'r':
if (add_trid(optarg)) {
usage(argv[0]);