numam-dpdk/lib/power/rte_power_empty_poll.c
Bruce Richardson 99a2dd955f lib: remove librte_ prefix from directory names
There is no reason for the DPDK libraries to all have 'librte_' prefix on
the directory names. This prefix makes the directory names longer and also
makes it awkward to add features referring to individual libraries in the
build - should the lib names be specified with or without the prefix.
Therefore, we can just remove the library prefix and use the library's
unique name as the directory name, i.e. 'eal' rather than 'librte_eal'

Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
2021-04-21 14:04:09 +02:00

543 lines
12 KiB
C

/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(c) 2010-2018 Intel Corporation
*/
#include <string.h>
#include <rte_lcore.h>
#include <rte_cycles.h>
#include <rte_atomic.h>
#include <rte_malloc.h>
#include <inttypes.h>
#include "rte_power.h"
#include "rte_power_empty_poll.h"
#define INTERVALS_PER_SECOND 100 /* (10ms) */
#define SECONDS_TO_TRAIN_FOR 2
#define DEFAULT_MED_TO_HIGH_PERCENT_THRESHOLD 70
#define DEFAULT_HIGH_TO_MED_PERCENT_THRESHOLD 30
#define DEFAULT_CYCLES_PER_PACKET 800
static struct ep_params *ep_params;
static uint32_t med_to_high_threshold = DEFAULT_MED_TO_HIGH_PERCENT_THRESHOLD;
static uint32_t high_to_med_threshold = DEFAULT_HIGH_TO_MED_PERCENT_THRESHOLD;
static uint32_t avail_freqs[RTE_MAX_LCORE][NUM_FREQS];
static uint32_t total_avail_freqs[RTE_MAX_LCORE];
static uint32_t freq_index[NUM_FREQ];
static uint32_t
get_freq_index(enum freq_val index)
{
return freq_index[index];
}
static int
set_power_freq(int lcore_id, enum freq_val freq, bool specific_freq)
{
int err = 0;
uint32_t power_freq_index;
if (!specific_freq)
power_freq_index = get_freq_index(freq);
else
power_freq_index = freq;
err = rte_power_set_freq(lcore_id, power_freq_index);
return err;
}
static __rte_always_inline void
exit_training_state(struct priority_worker *poll_stats)
{
RTE_SET_USED(poll_stats);
}
static __rte_always_inline void
enter_training_state(struct priority_worker *poll_stats)
{
poll_stats->iter_counter = 0;
poll_stats->cur_freq = LOW;
poll_stats->queue_state = TRAINING;
}
static __rte_always_inline void
enter_normal_state(struct priority_worker *poll_stats)
{
/* Clear the averages arrays and strs */
memset(poll_stats->edpi_av, 0, sizeof(poll_stats->edpi_av));
poll_stats->ec = 0;
memset(poll_stats->ppi_av, 0, sizeof(poll_stats->ppi_av));
poll_stats->pc = 0;
poll_stats->cur_freq = MED;
poll_stats->iter_counter = 0;
poll_stats->threshold_ctr = 0;
poll_stats->queue_state = MED_NORMAL;
RTE_LOG(INFO, POWER, "Set the power freq to MED\n");
set_power_freq(poll_stats->lcore_id, MED, false);
poll_stats->thresh[MED].threshold_percent = med_to_high_threshold;
poll_stats->thresh[HGH].threshold_percent = high_to_med_threshold;
}
static __rte_always_inline void
enter_busy_state(struct priority_worker *poll_stats)
{
memset(poll_stats->edpi_av, 0, sizeof(poll_stats->edpi_av));
poll_stats->ec = 0;
memset(poll_stats->ppi_av, 0, sizeof(poll_stats->ppi_av));
poll_stats->pc = 0;
poll_stats->cur_freq = HGH;
poll_stats->iter_counter = 0;
poll_stats->threshold_ctr = 0;
poll_stats->queue_state = HGH_BUSY;
set_power_freq(poll_stats->lcore_id, HGH, false);
}
static __rte_always_inline void
enter_purge_state(struct priority_worker *poll_stats)
{
poll_stats->iter_counter = 0;
poll_stats->queue_state = LOW_PURGE;
}
static __rte_always_inline void
set_state(struct priority_worker *poll_stats,
enum queue_state new_state)
{
enum queue_state old_state = poll_stats->queue_state;
if (old_state != new_state) {
/* Call any old state exit functions */
if (old_state == TRAINING)
exit_training_state(poll_stats);
/* Call any new state entry functions */
if (new_state == TRAINING)
enter_training_state(poll_stats);
if (new_state == MED_NORMAL)
enter_normal_state(poll_stats);
if (new_state == HGH_BUSY)
enter_busy_state(poll_stats);
if (new_state == LOW_PURGE)
enter_purge_state(poll_stats);
}
}
static __rte_always_inline void
set_policy(struct priority_worker *poll_stats,
struct ep_policy *policy)
{
set_state(poll_stats, policy->state);
if (policy->state == TRAINING)
return;
poll_stats->thresh[MED_NORMAL].base_edpi = policy->med_base_edpi;
poll_stats->thresh[HGH_BUSY].base_edpi = policy->hgh_base_edpi;
poll_stats->thresh[MED_NORMAL].trained = true;
poll_stats->thresh[HGH_BUSY].trained = true;
}
static void
update_training_stats(struct priority_worker *poll_stats,
uint32_t freq,
bool specific_freq,
uint32_t max_train_iter)
{
RTE_SET_USED(specific_freq);
uint64_t p0_empty_deq;
if (poll_stats->cur_freq == freq &&
poll_stats->thresh[freq].trained == false) {
if (poll_stats->thresh[freq].cur_train_iter == 0) {
set_power_freq(poll_stats->lcore_id,
freq, specific_freq);
poll_stats->empty_dequeues_prev =
poll_stats->empty_dequeues;
poll_stats->thresh[freq].cur_train_iter++;
return;
} else if (poll_stats->thresh[freq].cur_train_iter
<= max_train_iter) {
p0_empty_deq = poll_stats->empty_dequeues -
poll_stats->empty_dequeues_prev;
poll_stats->empty_dequeues_prev =
poll_stats->empty_dequeues;
poll_stats->thresh[freq].base_edpi += p0_empty_deq;
poll_stats->thresh[freq].cur_train_iter++;
} else {
if (poll_stats->thresh[freq].trained == false) {
poll_stats->thresh[freq].base_edpi =
poll_stats->thresh[freq].base_edpi /
max_train_iter;
/* Add on a factor of 0.05%
* this should remove any
* false negatives when the system is 0% busy
*/
poll_stats->thresh[freq].base_edpi +=
poll_stats->thresh[freq].base_edpi / 2000;
poll_stats->thresh[freq].trained = true;
poll_stats->cur_freq++;
}
}
}
}
static __rte_always_inline uint32_t
update_stats(struct priority_worker *poll_stats)
{
uint64_t tot_edpi = 0, tot_ppi = 0;
uint32_t j, percent;
struct priority_worker *s = poll_stats;
uint64_t cur_edpi = s->empty_dequeues - s->empty_dequeues_prev;
s->empty_dequeues_prev = s->empty_dequeues;
uint64_t ppi = s->num_dequeue_pkts - s->num_dequeue_pkts_prev;
s->num_dequeue_pkts_prev = s->num_dequeue_pkts;
if (s->thresh[s->cur_freq].base_edpi < cur_edpi) {
/* edpi mean empty poll counter difference per interval */
RTE_LOG(DEBUG, POWER, "cur_edpi is too large "
"cur edpi %"PRId64" "
"base edpi %"PRId64"\n",
cur_edpi,
s->thresh[s->cur_freq].base_edpi);
/* Value to make us fail need debug log*/
return 1000UL;
}
s->edpi_av[s->ec++ % BINS_AV] = cur_edpi;
s->ppi_av[s->pc++ % BINS_AV] = ppi;
for (j = 0; j < BINS_AV; j++) {
tot_edpi += s->edpi_av[j];
tot_ppi += s->ppi_av[j];
}
tot_edpi = tot_edpi / BINS_AV;
percent = 100 - (uint32_t)(((float)tot_edpi /
(float)s->thresh[s->cur_freq].base_edpi) * 100);
return (uint32_t)percent;
}
static __rte_always_inline void
update_stats_normal(struct priority_worker *poll_stats)
{
uint32_t percent;
if (poll_stats->thresh[poll_stats->cur_freq].base_edpi == 0) {
enum freq_val cur_freq = poll_stats->cur_freq;
/* edpi mean empty poll counter difference per interval */
RTE_LOG(DEBUG, POWER, "cure freq is %d, edpi is %"PRIu64"\n",
cur_freq,
poll_stats->thresh[cur_freq].base_edpi);
return;
}
percent = update_stats(poll_stats);
if (percent > 100) {
/* edpi mean empty poll counter difference per interval */
RTE_LOG(DEBUG, POWER, "Edpi is bigger than threshold\n");
return;
}
if (poll_stats->cur_freq == LOW)
RTE_LOG(INFO, POWER, "Purge Mode is not currently supported\n");
else if (poll_stats->cur_freq == MED) {
if (percent >
poll_stats->thresh[MED].threshold_percent) {
if (poll_stats->threshold_ctr < INTERVALS_PER_SECOND)
poll_stats->threshold_ctr++;
else {
set_state(poll_stats, HGH_BUSY);
RTE_LOG(INFO, POWER, "MOVE to HGH\n");
}
} else {
/* reset */
poll_stats->threshold_ctr = 0;
}
} else if (poll_stats->cur_freq == HGH) {
if (percent <
poll_stats->thresh[HGH].threshold_percent) {
if (poll_stats->threshold_ctr < INTERVALS_PER_SECOND)
poll_stats->threshold_ctr++;
else {
set_state(poll_stats, MED_NORMAL);
RTE_LOG(INFO, POWER, "MOVE to MED\n");
}
} else {
/* reset */
poll_stats->threshold_ctr = 0;
}
}
}
static int
empty_poll_training(struct priority_worker *poll_stats,
uint32_t max_train_iter)
{
if (poll_stats->iter_counter < INTERVALS_PER_SECOND) {
poll_stats->iter_counter++;
return 0;
}
update_training_stats(poll_stats,
LOW,
false,
max_train_iter);
update_training_stats(poll_stats,
MED,
false,
max_train_iter);
update_training_stats(poll_stats,
HGH,
false,
max_train_iter);
if (poll_stats->thresh[LOW].trained == true
&& poll_stats->thresh[MED].trained == true
&& poll_stats->thresh[HGH].trained == true) {
set_state(poll_stats, MED_NORMAL);
RTE_LOG(INFO, POWER, "LOW threshold is %"PRIu64"\n",
poll_stats->thresh[LOW].base_edpi);
RTE_LOG(INFO, POWER, "MED threshold is %"PRIu64"\n",
poll_stats->thresh[MED].base_edpi);
RTE_LOG(INFO, POWER, "HIGH threshold is %"PRIu64"\n",
poll_stats->thresh[HGH].base_edpi);
RTE_LOG(INFO, POWER, "Training is Complete for %d\n",
poll_stats->lcore_id);
}
return 0;
}
void
rte_empty_poll_detection(struct rte_timer *tim, void *arg)
{
uint32_t i;
struct priority_worker *poll_stats;
RTE_SET_USED(tim);
RTE_SET_USED(arg);
for (i = 0; i < NUM_NODES; i++) {
poll_stats = &(ep_params->wrk_data.wrk_stats[i]);
if (rte_lcore_is_enabled(poll_stats->lcore_id) == 0)
continue;
switch (poll_stats->queue_state) {
case(TRAINING):
empty_poll_training(poll_stats,
ep_params->max_train_iter);
break;
case(HGH_BUSY):
case(MED_NORMAL):
update_stats_normal(poll_stats);
break;
case(LOW_PURGE):
break;
default:
break;
}
}
}
int
rte_power_empty_poll_stat_init(struct ep_params **eptr, uint8_t *freq_tlb,
struct ep_policy *policy)
{
uint32_t i;
/* Allocate the ep_params structure */
ep_params = rte_zmalloc_socket(NULL,
sizeof(struct ep_params),
0,
rte_socket_id());
if (!ep_params)
return -1;
if (freq_tlb == NULL) {
freq_index[LOW] = 14;
freq_index[MED] = 9;
freq_index[HGH] = 1;
} else {
freq_index[LOW] = freq_tlb[LOW];
freq_index[MED] = freq_tlb[MED];
freq_index[HGH] = freq_tlb[HGH];
}
RTE_LOG(INFO, POWER, "Initialize the Empty Poll\n");
/* Train for pre-defined period */
ep_params->max_train_iter = INTERVALS_PER_SECOND * SECONDS_TO_TRAIN_FOR;
struct stats_data *w = &ep_params->wrk_data;
*eptr = ep_params;
/* initialize all wrk_stats state */
for (i = 0; i < NUM_NODES; i++) {
if (rte_lcore_is_enabled(i) == 0)
continue;
/*init the freqs table */
total_avail_freqs[i] = rte_power_freqs(i,
avail_freqs[i],
NUM_FREQS);
RTE_LOG(INFO, POWER, "total avail freq is %d , lcoreid %d\n",
total_avail_freqs[i],
i);
if (get_freq_index(LOW) > total_avail_freqs[i])
return -1;
if (rte_get_main_lcore() != i) {
w->wrk_stats[i].lcore_id = i;
set_policy(&w->wrk_stats[i], policy);
}
}
return 0;
}
void
rte_power_empty_poll_stat_free(void)
{
RTE_LOG(INFO, POWER, "Close the Empty Poll\n");
if (ep_params != NULL)
rte_free(ep_params);
}
int
rte_power_empty_poll_stat_update(unsigned int lcore_id)
{
struct priority_worker *poll_stats;
if (lcore_id >= NUM_NODES)
return -1;
poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
if (poll_stats->lcore_id == 0)
poll_stats->lcore_id = lcore_id;
poll_stats->empty_dequeues++;
return 0;
}
int
rte_power_poll_stat_update(unsigned int lcore_id, uint8_t nb_pkt)
{
struct priority_worker *poll_stats;
if (lcore_id >= NUM_NODES)
return -1;
poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
if (poll_stats->lcore_id == 0)
poll_stats->lcore_id = lcore_id;
poll_stats->num_dequeue_pkts += nb_pkt;
return 0;
}
uint64_t
rte_power_empty_poll_stat_fetch(unsigned int lcore_id)
{
struct priority_worker *poll_stats;
if (lcore_id >= NUM_NODES)
return -1;
poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
if (poll_stats->lcore_id == 0)
poll_stats->lcore_id = lcore_id;
return poll_stats->empty_dequeues;
}
uint64_t
rte_power_poll_stat_fetch(unsigned int lcore_id)
{
struct priority_worker *poll_stats;
if (lcore_id >= NUM_NODES)
return -1;
poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
if (poll_stats->lcore_id == 0)
poll_stats->lcore_id = lcore_id;
return poll_stats->num_dequeue_pkts;
}