spdk_top: make sure data from RPC is consistent
This patch changes how cores, threads and pollers data structures are connected to each other. This is to ensure consistency and avoid writes/reads from null pointers when application state has been altered (e.g. by rescheduling thread to another core). Fixes #1921. Signed-off-by: Krzysztof Karas <krzysztof.karas@intel.com> Change-Id: If517a7db41c512b27f86598a047df8ed27d3a274 Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/7769 Community-CI: Mellanox Build Bot Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Konrad Sztyber <konrad.sztyber@intel.com> Reviewed-by: Maciej Szwed <maciej.szwed@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com>
This commit is contained in:
parent
a9a2c09579
commit
b096b67010
@ -628,7 +628,9 @@ get_thread_data(void)
|
||||
{
|
||||
struct spdk_jsonrpc_client_response *json_resp = NULL;
|
||||
struct rpc_threads_stats threads_stats;
|
||||
uint64_t i, j;
|
||||
struct rpc_thread_info *thread_info;
|
||||
struct rpc_core_info *core_info;
|
||||
uint64_t i, j, k;
|
||||
int rc = 0;
|
||||
|
||||
rc = rpc_send_req("thread_get_stats", &json_resp);
|
||||
@ -661,6 +663,28 @@ get_thread_data(void)
|
||||
qsort(&g_threads_stats.threads.thread_info, threads_stats.threads.threads_count,
|
||||
sizeof(g_threads_stats.threads.thread_info[0]), sort_threads);
|
||||
|
||||
for (i = 0; i < g_threads_stats.threads.threads_count; i++) {
|
||||
g_threads_stats.threads.thread_info[i].core_num = -1;
|
||||
}
|
||||
|
||||
for (i = 0; i < g_cores_stats.cores.cores_count; i++) {
|
||||
core_info = &g_cores_stats.cores.core[i];
|
||||
|
||||
for (j = 0; j < core_info->threads.threads_count; j++) {
|
||||
for (k = 0; k < g_threads_stats.threads.threads_count; k++) {
|
||||
/* For each thread on current core: check if it's ID also exists
|
||||
* in g_thread_info data structure. If it does then assign current
|
||||
* core's number to that thread, otherwise application state is inconsistent
|
||||
* (e.g. scheduler is moving threads between cores). */
|
||||
thread_info = &g_threads_stats.threads.thread_info[k];
|
||||
if (thread_info->id == core_info->threads.thread[j].id) {
|
||||
thread_info->core_num = core_info->lcore;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pthread_mutex_unlock(&g_thread_lock);
|
||||
|
||||
end:
|
||||
@ -1141,23 +1165,34 @@ copy_pollers(struct rpc_pollers *pollers, uint64_t pollers_count, enum spdk_poll
|
||||
struct rpc_poller_info **pollers_info)
|
||||
{
|
||||
uint64_t *last_run_counter;
|
||||
uint64_t i;
|
||||
uint64_t i, j;
|
||||
struct rpc_thread_info *thread_info;
|
||||
|
||||
for (i = 0; i < pollers_count; i++) {
|
||||
if (reset_last_counter) {
|
||||
last_run_counter = get_last_run_counter(pollers->pollers[i].name, thread->id);
|
||||
if (last_run_counter == NULL) {
|
||||
store_last_run_counter(pollers->pollers[i].name, thread->id, pollers->pollers[i].run_count);
|
||||
last_run_counter = get_last_run_counter(pollers->pollers[i].name, thread->id);
|
||||
for (j = 0; j < g_threads_stats.threads.threads_count; j++) {
|
||||
thread_info = &g_threads_stats.threads.thread_info[j];
|
||||
/* Check if poller's thread exists in g_threads_stats
|
||||
* (if poller is not "hanging" without a thread). */
|
||||
if (thread_info->id != thread->id) {
|
||||
continue;
|
||||
}
|
||||
|
||||
assert(last_run_counter != NULL);
|
||||
*last_run_counter = pollers->pollers[i].run_count;
|
||||
if (reset_last_counter) {
|
||||
last_run_counter = get_last_run_counter(pollers->pollers[i].name, thread->id);
|
||||
if (last_run_counter == NULL) {
|
||||
store_last_run_counter(pollers->pollers[i].name, thread->id, pollers->pollers[i].run_count);
|
||||
last_run_counter = get_last_run_counter(pollers->pollers[i].name, thread->id);
|
||||
}
|
||||
|
||||
assert(last_run_counter != NULL);
|
||||
*last_run_counter = pollers->pollers[i].run_count;
|
||||
}
|
||||
pollers_info[*current_count] = &pollers->pollers[i];
|
||||
snprintf(pollers_info[*current_count]->thread_name, MAX_POLLER_NAME - 1, "%s", thread->name);
|
||||
pollers_info[*current_count]->thread_id = thread->id;
|
||||
pollers_info[(*current_count)++]->type = type;
|
||||
break;
|
||||
}
|
||||
pollers_info[*current_count] = &pollers->pollers[i];
|
||||
snprintf(pollers_info[*current_count]->thread_name, MAX_POLLER_NAME - 1, "%s", thread->name);
|
||||
pollers_info[*current_count]->thread_id = thread->id;
|
||||
pollers_info[(*current_count)++]->type = type;
|
||||
}
|
||||
}
|
||||
|
||||
@ -2379,7 +2414,12 @@ data_thread_routine(void *arg)
|
||||
}
|
||||
pthread_mutex_unlock(&g_thread_lock);
|
||||
|
||||
/* Get data from RPC for each object type */
|
||||
/* Get data from RPC for each object type.
|
||||
* Start with cores since their number should not change. */
|
||||
rc = get_cores_data();
|
||||
if (rc) {
|
||||
print_bottom_error_message("ERROR occurred while getting cores data");
|
||||
}
|
||||
rc = get_thread_data();
|
||||
if (rc) {
|
||||
print_bottom_error_message("ERROR occurred while getting threads data");
|
||||
@ -2390,11 +2430,6 @@ data_thread_routine(void *arg)
|
||||
print_bottom_error_message("ERROR occurred while getting pollers data");
|
||||
}
|
||||
|
||||
rc = get_cores_data();
|
||||
if (rc) {
|
||||
print_bottom_error_message("ERROR occurred while getting cores data");
|
||||
}
|
||||
|
||||
usleep(g_sleep_time * SPDK_SEC_TO_USEC);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user