3cb46d40d3
This change is required to allow the branch ratio algorithm to power manage cores with no workload running on them. This is useful both when idle cores don't use C-states and for a number of hyperthreading scenarios. Signed-off-by: Rory Sexton <rory.sexton@intel.com> Reviewed-by: David Hunt <david.hunt@intel.com> Acked-by: Reshma Pattan <reshma.pattan@intel.com>
300 lines
6.6 KiB
C
300 lines
6.6 KiB
C
/* SPDX-License-Identifier: BSD-3-Clause
|
|
* Copyright(c) 2018 Intel Corporation
|
|
*/
|
|
|
|
#include <unistd.h>
|
|
#include <fcntl.h>
|
|
#include <rte_log.h>
|
|
|
|
#include "oob_monitor.h"
|
|
#include "power_manager.h"
|
|
#include "channel_manager.h"
|
|
|
|
static volatile unsigned run_loop = 1;
|
|
static uint64_t g_branches, g_branch_misses;
|
|
static int g_active;
|
|
|
|
void branch_monitor_exit(void)
|
|
{
|
|
run_loop = 0;
|
|
}
|
|
|
|
/* Number of microseconds between each poll */
|
|
#define INTERVAL 100
|
|
#define PRINT_LOOP_COUNT (1000000/INTERVAL)
|
|
#define IA32_PERFEVTSEL0 0x186
|
|
#define IA32_PERFEVTSEL1 0x187
|
|
#define IA32_PERFCTR0 0xc1
|
|
#define IA32_PERFCTR1 0xc2
|
|
#define IA32_PERFEVT_BRANCH_HITS 0x05300c4
|
|
#define IA32_PERFEVT_BRANCH_MISS 0x05300c5
|
|
|
|
static float
|
|
apply_policy(int core)
|
|
{
|
|
struct core_info *ci;
|
|
uint64_t counter = 0;
|
|
uint64_t branches, branch_misses;
|
|
uint64_t last_branches, last_branch_misses;
|
|
int64_t hits_diff, miss_diff;
|
|
float ratio;
|
|
int ret;
|
|
int freq_window_idx, up_count = 0, i;
|
|
|
|
g_active = 0;
|
|
ci = get_core_info();
|
|
|
|
last_branches = ci->cd[core].last_branches;
|
|
last_branch_misses = ci->cd[core].last_branch_misses;
|
|
|
|
ret = pread(ci->cd[core].msr_fd, &counter,
|
|
sizeof(counter), IA32_PERFCTR0);
|
|
if (ret < 0)
|
|
RTE_LOG(ERR, POWER_MANAGER,
|
|
"unable to read counter for core %u\n",
|
|
core);
|
|
branches = counter;
|
|
|
|
counter = 0;
|
|
ret = pread(ci->cd[core].msr_fd, &counter,
|
|
sizeof(counter), IA32_PERFCTR1);
|
|
if (ret < 0)
|
|
RTE_LOG(ERR, POWER_MANAGER,
|
|
"unable to read counter for core %u\n",
|
|
core);
|
|
branch_misses = counter;
|
|
|
|
|
|
ci->cd[core].last_branches = branches;
|
|
ci->cd[core].last_branch_misses = branch_misses;
|
|
|
|
/*
|
|
* Intentional right shift to make MSB 0 to avoid
|
|
* possible signed overflow or truncation.
|
|
*/
|
|
branches >>= 1;
|
|
last_branches >>= 1;
|
|
hits_diff = (int64_t)branches - (int64_t)last_branches;
|
|
if (hits_diff <= 0) {
|
|
/* Likely a counter overflow condition, skip this round */
|
|
return -1.0;
|
|
}
|
|
|
|
/*
|
|
* Intentional right shift to make MSB 0 to avoid
|
|
* possible signed overflow or truncation.
|
|
*/
|
|
branch_misses >>= 1;
|
|
last_branch_misses >>= 1;
|
|
miss_diff = (int64_t)branch_misses - (int64_t)last_branch_misses;
|
|
if (miss_diff <= 0) {
|
|
/* Likely a counter overflow condition, skip this round */
|
|
return -1.0;
|
|
}
|
|
|
|
g_branches = hits_diff;
|
|
g_branch_misses = miss_diff;
|
|
|
|
if (hits_diff < (INTERVAL*100)) {
|
|
/* Likely no workload running on this core. */
|
|
ratio = 0.0;
|
|
} else {
|
|
ratio = (float)miss_diff * (float)100 / (float)hits_diff;
|
|
}
|
|
|
|
/*
|
|
* Store the last few directions that the ratio indicates
|
|
* we should take. If there's on 'up', then we scale up
|
|
* quickly. If all indicate 'down', only then do we scale
|
|
* down. Each core_details struct has it's own array.
|
|
*/
|
|
freq_window_idx = ci->cd[core].freq_window_idx;
|
|
if (ratio > ci->cd[core].branch_ratio_threshold)
|
|
ci->cd[core].freq_directions[freq_window_idx] = 1;
|
|
else
|
|
ci->cd[core].freq_directions[freq_window_idx] = 0;
|
|
|
|
freq_window_idx++;
|
|
freq_window_idx = freq_window_idx & (FREQ_WINDOW_SIZE-1);
|
|
ci->cd[core].freq_window_idx = freq_window_idx;
|
|
|
|
up_count = 0;
|
|
for (i = 0; i < FREQ_WINDOW_SIZE; i++)
|
|
up_count += ci->cd[core].freq_directions[i];
|
|
|
|
if (up_count == 0) {
|
|
if (ci->cd[core].freq_state != FREQ_MIN) {
|
|
power_manager_scale_core_min(core);
|
|
ci->cd[core].freq_state = FREQ_MIN;
|
|
}
|
|
} else {
|
|
if (ci->cd[core].freq_state != FREQ_MAX) {
|
|
power_manager_scale_core_max(core);
|
|
ci->cd[core].freq_state = FREQ_MAX;
|
|
}
|
|
}
|
|
|
|
g_active = 1;
|
|
return ratio;
|
|
}
|
|
|
|
int
|
|
add_core_to_monitor(int core)
|
|
{
|
|
struct core_info *ci;
|
|
char proc_file[UNIX_PATH_MAX];
|
|
int ret;
|
|
|
|
ci = get_core_info();
|
|
|
|
if (core < ci->core_count) {
|
|
long setup;
|
|
|
|
snprintf(proc_file, UNIX_PATH_MAX, "/dev/cpu/%d/msr", core);
|
|
ci->cd[core].msr_fd = open(proc_file, O_RDWR | O_SYNC);
|
|
if (ci->cd[core].msr_fd < 0) {
|
|
RTE_LOG(ERR, POWER_MANAGER,
|
|
"Error opening MSR file for core %d "
|
|
"(is msr kernel module loaded?)\n",
|
|
core);
|
|
return -1;
|
|
}
|
|
/*
|
|
* Set up branch counters
|
|
*/
|
|
setup = IA32_PERFEVT_BRANCH_HITS;
|
|
ret = pwrite(ci->cd[core].msr_fd, &setup,
|
|
sizeof(setup), IA32_PERFEVTSEL0);
|
|
if (ret < 0) {
|
|
RTE_LOG(ERR, POWER_MANAGER,
|
|
"unable to set counter for core %u\n",
|
|
core);
|
|
return ret;
|
|
}
|
|
setup = IA32_PERFEVT_BRANCH_MISS;
|
|
ret = pwrite(ci->cd[core].msr_fd, &setup,
|
|
sizeof(setup), IA32_PERFEVTSEL1);
|
|
if (ret < 0) {
|
|
RTE_LOG(ERR, POWER_MANAGER,
|
|
"unable to set counter for core %u\n",
|
|
core);
|
|
return ret;
|
|
}
|
|
/*
|
|
* Close the file and re-open as read only so
|
|
* as not to hog the resource
|
|
*/
|
|
close(ci->cd[core].msr_fd);
|
|
ci->cd[core].msr_fd = open(proc_file, O_RDONLY);
|
|
if (ci->cd[core].msr_fd < 0) {
|
|
RTE_LOG(ERR, POWER_MANAGER,
|
|
"Error opening MSR file for core %d "
|
|
"(is msr kernel module loaded?)\n",
|
|
core);
|
|
return -1;
|
|
}
|
|
ci->cd[core].oob_enabled = 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
remove_core_from_monitor(int core)
|
|
{
|
|
struct core_info *ci;
|
|
char proc_file[UNIX_PATH_MAX];
|
|
int ret;
|
|
|
|
ci = get_core_info();
|
|
|
|
if (ci->cd[core].oob_enabled) {
|
|
long setup;
|
|
|
|
/*
|
|
* close the msr file, then reopen rw so we can
|
|
* disable the counters
|
|
*/
|
|
if (ci->cd[core].msr_fd != 0)
|
|
close(ci->cd[core].msr_fd);
|
|
snprintf(proc_file, UNIX_PATH_MAX, "/dev/cpu/%d/msr", core);
|
|
ci->cd[core].msr_fd = open(proc_file, O_RDWR | O_SYNC);
|
|
if (ci->cd[core].msr_fd < 0) {
|
|
RTE_LOG(ERR, POWER_MANAGER,
|
|
"Error opening MSR file for core %d "
|
|
"(is msr kernel module loaded?)\n",
|
|
core);
|
|
return -1;
|
|
}
|
|
setup = 0x0; /* clear event */
|
|
ret = pwrite(ci->cd[core].msr_fd, &setup,
|
|
sizeof(setup), IA32_PERFEVTSEL0);
|
|
if (ret < 0) {
|
|
RTE_LOG(ERR, POWER_MANAGER,
|
|
"unable to set counter for core %u\n",
|
|
core);
|
|
return ret;
|
|
}
|
|
setup = 0x0; /* clear event */
|
|
ret = pwrite(ci->cd[core].msr_fd, &setup,
|
|
sizeof(setup), IA32_PERFEVTSEL1);
|
|
if (ret < 0) {
|
|
RTE_LOG(ERR, POWER_MANAGER,
|
|
"unable to set counter for core %u\n",
|
|
core);
|
|
return ret;
|
|
}
|
|
|
|
close(ci->cd[core].msr_fd);
|
|
ci->cd[core].msr_fd = 0;
|
|
ci->cd[core].oob_enabled = 0;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
branch_monitor_init(void)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
void
|
|
run_branch_monitor(void)
|
|
{
|
|
struct core_info *ci;
|
|
int print = 0;
|
|
float ratio;
|
|
int printed;
|
|
int reads = 0;
|
|
|
|
ci = get_core_info();
|
|
|
|
while (run_loop) {
|
|
|
|
if (!run_loop)
|
|
break;
|
|
usleep(INTERVAL);
|
|
int j;
|
|
print++;
|
|
printed = 0;
|
|
for (j = 0; j < ci->core_count; j++) {
|
|
if (ci->cd[j].oob_enabled) {
|
|
ratio = apply_policy(j);
|
|
if ((print > PRINT_LOOP_COUNT) && (g_active)) {
|
|
printf(" %d: %.4f {%lu} {%d}", j,
|
|
ratio, g_branches,
|
|
reads);
|
|
printed = 1;
|
|
reads = 0;
|
|
} else {
|
|
reads++;
|
|
}
|
|
}
|
|
}
|
|
if (print > PRINT_LOOP_COUNT) {
|
|
if (printed)
|
|
printf("\n");
|
|
print = 0;
|
|
}
|
|
}
|
|
}
|