numam-dpdk/lib/librte_power/rte_power_empty_poll.c
Thomas Monjalon 33011cb3df replace always-inline attributes
There is a macro __rte_always_inline, forcing functions to be inlined,
which is now used where appropriate for consistency.

Signed-off-by: Thomas Monjalon <thomas@monjalon.net>
2020-04-16 18:16:46 +02:00

543 lines
12 KiB
C

/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(c) 2010-2018 Intel Corporation
*/
#include <string.h>
#include <rte_lcore.h>
#include <rte_cycles.h>
#include <rte_atomic.h>
#include <rte_malloc.h>
#include <inttypes.h>
#include "rte_power.h"
#include "rte_power_empty_poll.h"
#define INTERVALS_PER_SECOND 100 /* (10ms) */
#define SECONDS_TO_TRAIN_FOR 2
#define DEFAULT_MED_TO_HIGH_PERCENT_THRESHOLD 70
#define DEFAULT_HIGH_TO_MED_PERCENT_THRESHOLD 30
#define DEFAULT_CYCLES_PER_PACKET 800
static struct ep_params *ep_params;
static uint32_t med_to_high_threshold = DEFAULT_MED_TO_HIGH_PERCENT_THRESHOLD;
static uint32_t high_to_med_threshold = DEFAULT_HIGH_TO_MED_PERCENT_THRESHOLD;
static uint32_t avail_freqs[RTE_MAX_LCORE][NUM_FREQS];
static uint32_t total_avail_freqs[RTE_MAX_LCORE];
static uint32_t freq_index[NUM_FREQ];
static uint32_t
get_freq_index(enum freq_val index)
{
return freq_index[index];
}
static int
set_power_freq(int lcore_id, enum freq_val freq, bool specific_freq)
{
int err = 0;
uint32_t power_freq_index;
if (!specific_freq)
power_freq_index = get_freq_index(freq);
else
power_freq_index = freq;
err = rte_power_set_freq(lcore_id, power_freq_index);
return err;
}
static __rte_always_inline void
exit_training_state(struct priority_worker *poll_stats)
{
RTE_SET_USED(poll_stats);
}
static __rte_always_inline void
enter_training_state(struct priority_worker *poll_stats)
{
poll_stats->iter_counter = 0;
poll_stats->cur_freq = LOW;
poll_stats->queue_state = TRAINING;
}
static __rte_always_inline void
enter_normal_state(struct priority_worker *poll_stats)
{
/* Clear the averages arrays and strs */
memset(poll_stats->edpi_av, 0, sizeof(poll_stats->edpi_av));
poll_stats->ec = 0;
memset(poll_stats->ppi_av, 0, sizeof(poll_stats->ppi_av));
poll_stats->pc = 0;
poll_stats->cur_freq = MED;
poll_stats->iter_counter = 0;
poll_stats->threshold_ctr = 0;
poll_stats->queue_state = MED_NORMAL;
RTE_LOG(INFO, POWER, "Set the power freq to MED\n");
set_power_freq(poll_stats->lcore_id, MED, false);
poll_stats->thresh[MED].threshold_percent = med_to_high_threshold;
poll_stats->thresh[HGH].threshold_percent = high_to_med_threshold;
}
static __rte_always_inline void
enter_busy_state(struct priority_worker *poll_stats)
{
memset(poll_stats->edpi_av, 0, sizeof(poll_stats->edpi_av));
poll_stats->ec = 0;
memset(poll_stats->ppi_av, 0, sizeof(poll_stats->ppi_av));
poll_stats->pc = 0;
poll_stats->cur_freq = HGH;
poll_stats->iter_counter = 0;
poll_stats->threshold_ctr = 0;
poll_stats->queue_state = HGH_BUSY;
set_power_freq(poll_stats->lcore_id, HGH, false);
}
static __rte_always_inline void
enter_purge_state(struct priority_worker *poll_stats)
{
poll_stats->iter_counter = 0;
poll_stats->queue_state = LOW_PURGE;
}
static __rte_always_inline void
set_state(struct priority_worker *poll_stats,
enum queue_state new_state)
{
enum queue_state old_state = poll_stats->queue_state;
if (old_state != new_state) {
/* Call any old state exit functions */
if (old_state == TRAINING)
exit_training_state(poll_stats);
/* Call any new state entry functions */
if (new_state == TRAINING)
enter_training_state(poll_stats);
if (new_state == MED_NORMAL)
enter_normal_state(poll_stats);
if (new_state == HGH_BUSY)
enter_busy_state(poll_stats);
if (new_state == LOW_PURGE)
enter_purge_state(poll_stats);
}
}
static __rte_always_inline void
set_policy(struct priority_worker *poll_stats,
struct ep_policy *policy)
{
set_state(poll_stats, policy->state);
if (policy->state == TRAINING)
return;
poll_stats->thresh[MED_NORMAL].base_edpi = policy->med_base_edpi;
poll_stats->thresh[HGH_BUSY].base_edpi = policy->hgh_base_edpi;
poll_stats->thresh[MED_NORMAL].trained = true;
poll_stats->thresh[HGH_BUSY].trained = true;
}
static void
update_training_stats(struct priority_worker *poll_stats,
uint32_t freq,
bool specific_freq,
uint32_t max_train_iter)
{
RTE_SET_USED(specific_freq);
uint64_t p0_empty_deq;
if (poll_stats->cur_freq == freq &&
poll_stats->thresh[freq].trained == false) {
if (poll_stats->thresh[freq].cur_train_iter == 0) {
set_power_freq(poll_stats->lcore_id,
freq, specific_freq);
poll_stats->empty_dequeues_prev =
poll_stats->empty_dequeues;
poll_stats->thresh[freq].cur_train_iter++;
return;
} else if (poll_stats->thresh[freq].cur_train_iter
<= max_train_iter) {
p0_empty_deq = poll_stats->empty_dequeues -
poll_stats->empty_dequeues_prev;
poll_stats->empty_dequeues_prev =
poll_stats->empty_dequeues;
poll_stats->thresh[freq].base_edpi += p0_empty_deq;
poll_stats->thresh[freq].cur_train_iter++;
} else {
if (poll_stats->thresh[freq].trained == false) {
poll_stats->thresh[freq].base_edpi =
poll_stats->thresh[freq].base_edpi /
max_train_iter;
/* Add on a factor of 0.05%
* this should remove any
* false negatives when the system is 0% busy
*/
poll_stats->thresh[freq].base_edpi +=
poll_stats->thresh[freq].base_edpi / 2000;
poll_stats->thresh[freq].trained = true;
poll_stats->cur_freq++;
}
}
}
}
static __rte_always_inline uint32_t
update_stats(struct priority_worker *poll_stats)
{
uint64_t tot_edpi = 0, tot_ppi = 0;
uint32_t j, percent;
struct priority_worker *s = poll_stats;
uint64_t cur_edpi = s->empty_dequeues - s->empty_dequeues_prev;
s->empty_dequeues_prev = s->empty_dequeues;
uint64_t ppi = s->num_dequeue_pkts - s->num_dequeue_pkts_prev;
s->num_dequeue_pkts_prev = s->num_dequeue_pkts;
if (s->thresh[s->cur_freq].base_edpi < cur_edpi) {
/* edpi mean empty poll counter difference per interval */
RTE_LOG(DEBUG, POWER, "cur_edpi is too large "
"cur edpi %"PRId64" "
"base edpi %"PRId64"\n",
cur_edpi,
s->thresh[s->cur_freq].base_edpi);
/* Value to make us fail need debug log*/
return 1000UL;
}
s->edpi_av[s->ec++ % BINS_AV] = cur_edpi;
s->ppi_av[s->pc++ % BINS_AV] = ppi;
for (j = 0; j < BINS_AV; j++) {
tot_edpi += s->edpi_av[j];
tot_ppi += s->ppi_av[j];
}
tot_edpi = tot_edpi / BINS_AV;
percent = 100 - (uint32_t)(((float)tot_edpi /
(float)s->thresh[s->cur_freq].base_edpi) * 100);
return (uint32_t)percent;
}
static __rte_always_inline void
update_stats_normal(struct priority_worker *poll_stats)
{
uint32_t percent;
if (poll_stats->thresh[poll_stats->cur_freq].base_edpi == 0) {
enum freq_val cur_freq = poll_stats->cur_freq;
/* edpi mean empty poll counter difference per interval */
RTE_LOG(DEBUG, POWER, "cure freq is %d, edpi is %"PRIu64"\n",
cur_freq,
poll_stats->thresh[cur_freq].base_edpi);
return;
}
percent = update_stats(poll_stats);
if (percent > 100) {
/* edpi mean empty poll counter difference per interval */
RTE_LOG(DEBUG, POWER, "Edpi is bigger than threshold\n");
return;
}
if (poll_stats->cur_freq == LOW)
RTE_LOG(INFO, POWER, "Purge Mode is not currently supported\n");
else if (poll_stats->cur_freq == MED) {
if (percent >
poll_stats->thresh[MED].threshold_percent) {
if (poll_stats->threshold_ctr < INTERVALS_PER_SECOND)
poll_stats->threshold_ctr++;
else {
set_state(poll_stats, HGH_BUSY);
RTE_LOG(INFO, POWER, "MOVE to HGH\n");
}
} else {
/* reset */
poll_stats->threshold_ctr = 0;
}
} else if (poll_stats->cur_freq == HGH) {
if (percent <
poll_stats->thresh[HGH].threshold_percent) {
if (poll_stats->threshold_ctr < INTERVALS_PER_SECOND)
poll_stats->threshold_ctr++;
else {
set_state(poll_stats, MED_NORMAL);
RTE_LOG(INFO, POWER, "MOVE to MED\n");
}
} else {
/* reset */
poll_stats->threshold_ctr = 0;
}
}
}
static int
empty_poll_training(struct priority_worker *poll_stats,
uint32_t max_train_iter)
{
if (poll_stats->iter_counter < INTERVALS_PER_SECOND) {
poll_stats->iter_counter++;
return 0;
}
update_training_stats(poll_stats,
LOW,
false,
max_train_iter);
update_training_stats(poll_stats,
MED,
false,
max_train_iter);
update_training_stats(poll_stats,
HGH,
false,
max_train_iter);
if (poll_stats->thresh[LOW].trained == true
&& poll_stats->thresh[MED].trained == true
&& poll_stats->thresh[HGH].trained == true) {
set_state(poll_stats, MED_NORMAL);
RTE_LOG(INFO, POWER, "LOW threshold is %"PRIu64"\n",
poll_stats->thresh[LOW].base_edpi);
RTE_LOG(INFO, POWER, "MED threshold is %"PRIu64"\n",
poll_stats->thresh[MED].base_edpi);
RTE_LOG(INFO, POWER, "HIGH threshold is %"PRIu64"\n",
poll_stats->thresh[HGH].base_edpi);
RTE_LOG(INFO, POWER, "Training is Complete for %d\n",
poll_stats->lcore_id);
}
return 0;
}
void
rte_empty_poll_detection(struct rte_timer *tim, void *arg)
{
uint32_t i;
struct priority_worker *poll_stats;
RTE_SET_USED(tim);
RTE_SET_USED(arg);
for (i = 0; i < NUM_NODES; i++) {
poll_stats = &(ep_params->wrk_data.wrk_stats[i]);
if (rte_lcore_is_enabled(poll_stats->lcore_id) == 0)
continue;
switch (poll_stats->queue_state) {
case(TRAINING):
empty_poll_training(poll_stats,
ep_params->max_train_iter);
break;
case(HGH_BUSY):
case(MED_NORMAL):
update_stats_normal(poll_stats);
break;
case(LOW_PURGE):
break;
default:
break;
}
}
}
int
rte_power_empty_poll_stat_init(struct ep_params **eptr, uint8_t *freq_tlb,
struct ep_policy *policy)
{
uint32_t i;
/* Allocate the ep_params structure */
ep_params = rte_zmalloc_socket(NULL,
sizeof(struct ep_params),
0,
rte_socket_id());
if (!ep_params)
return -1;
if (freq_tlb == NULL) {
freq_index[LOW] = 14;
freq_index[MED] = 9;
freq_index[HGH] = 1;
} else {
freq_index[LOW] = freq_tlb[LOW];
freq_index[MED] = freq_tlb[MED];
freq_index[HGH] = freq_tlb[HGH];
}
RTE_LOG(INFO, POWER, "Initialize the Empty Poll\n");
/* Train for pre-defined period */
ep_params->max_train_iter = INTERVALS_PER_SECOND * SECONDS_TO_TRAIN_FOR;
struct stats_data *w = &ep_params->wrk_data;
*eptr = ep_params;
/* initialize all wrk_stats state */
for (i = 0; i < NUM_NODES; i++) {
if (rte_lcore_is_enabled(i) == 0)
continue;
/*init the freqs table */
total_avail_freqs[i] = rte_power_freqs(i,
avail_freqs[i],
NUM_FREQS);
RTE_LOG(INFO, POWER, "total avail freq is %d , lcoreid %d\n",
total_avail_freqs[i],
i);
if (get_freq_index(LOW) > total_avail_freqs[i])
return -1;
if (rte_get_master_lcore() != i) {
w->wrk_stats[i].lcore_id = i;
set_policy(&w->wrk_stats[i], policy);
}
}
return 0;
}
void
rte_power_empty_poll_stat_free(void)
{
RTE_LOG(INFO, POWER, "Close the Empty Poll\n");
if (ep_params != NULL)
rte_free(ep_params);
}
int
rte_power_empty_poll_stat_update(unsigned int lcore_id)
{
struct priority_worker *poll_stats;
if (lcore_id >= NUM_NODES)
return -1;
poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
if (poll_stats->lcore_id == 0)
poll_stats->lcore_id = lcore_id;
poll_stats->empty_dequeues++;
return 0;
}
int
rte_power_poll_stat_update(unsigned int lcore_id, uint8_t nb_pkt)
{
struct priority_worker *poll_stats;
if (lcore_id >= NUM_NODES)
return -1;
poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
if (poll_stats->lcore_id == 0)
poll_stats->lcore_id = lcore_id;
poll_stats->num_dequeue_pkts += nb_pkt;
return 0;
}
uint64_t
rte_power_empty_poll_stat_fetch(unsigned int lcore_id)
{
struct priority_worker *poll_stats;
if (lcore_id >= NUM_NODES)
return -1;
poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
if (poll_stats->lcore_id == 0)
poll_stats->lcore_id = lcore_id;
return poll_stats->empty_dequeues;
}
uint64_t
rte_power_poll_stat_fetch(unsigned int lcore_id)
{
struct priority_worker *poll_stats;
if (lcore_id >= NUM_NODES)
return -1;
poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
if (poll_stats->lcore_id == 0)
poll_stats->lcore_id = lcore_id;
return poll_stats->num_dequeue_pkts;
}