numam-dpdk/lib/librte_power/rte_power_empty_poll.c
David Marchand cfe3aeb170 remove experimental tags from all symbol definitions
We had some inconsistencies between functions prototypes and actual
definitions.
Let's avoid this by only adding the experimental tag to the prototypes.
Tests with gcc and clang show it is enough.

git grep -l __rte_experimental |grep \.c$ |while read file; do
	sed -i -e '/^__rte_experimental$/d' $file;
	sed -i -e 's/  *__rte_experimental//' $file;
	sed -i -e 's/__rte_experimental  *//' $file;
done

Signed-off-by: David Marchand <david.marchand@redhat.com>
Acked-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
2019-06-29 19:04:43 +02:00

543 lines
12 KiB
C

/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(c) 2010-2018 Intel Corporation
*/
#include <string.h>
#include <rte_lcore.h>
#include <rte_cycles.h>
#include <rte_atomic.h>
#include <rte_malloc.h>
#include <inttypes.h>
#include "rte_power.h"
#include "rte_power_empty_poll.h"
#define INTERVALS_PER_SECOND 100 /* (10ms) */
#define SECONDS_TO_TRAIN_FOR 2
#define DEFAULT_MED_TO_HIGH_PERCENT_THRESHOLD 70
#define DEFAULT_HIGH_TO_MED_PERCENT_THRESHOLD 30
#define DEFAULT_CYCLES_PER_PACKET 800
static struct ep_params *ep_params;
static uint32_t med_to_high_threshold = DEFAULT_MED_TO_HIGH_PERCENT_THRESHOLD;
static uint32_t high_to_med_threshold = DEFAULT_HIGH_TO_MED_PERCENT_THRESHOLD;
static uint32_t avail_freqs[RTE_MAX_LCORE][NUM_FREQS];
static uint32_t total_avail_freqs[RTE_MAX_LCORE];
static uint32_t freq_index[NUM_FREQ];
static uint32_t
get_freq_index(enum freq_val index)
{
return freq_index[index];
}
static int
set_power_freq(int lcore_id, enum freq_val freq, bool specific_freq)
{
int err = 0;
uint32_t power_freq_index;
if (!specific_freq)
power_freq_index = get_freq_index(freq);
else
power_freq_index = freq;
err = rte_power_set_freq(lcore_id, power_freq_index);
return err;
}
static inline void __attribute__((always_inline))
exit_training_state(struct priority_worker *poll_stats)
{
RTE_SET_USED(poll_stats);
}
static inline void __attribute__((always_inline))
enter_training_state(struct priority_worker *poll_stats)
{
poll_stats->iter_counter = 0;
poll_stats->cur_freq = LOW;
poll_stats->queue_state = TRAINING;
}
static inline void __attribute__((always_inline))
enter_normal_state(struct priority_worker *poll_stats)
{
/* Clear the averages arrays and strs */
memset(poll_stats->edpi_av, 0, sizeof(poll_stats->edpi_av));
poll_stats->ec = 0;
memset(poll_stats->ppi_av, 0, sizeof(poll_stats->ppi_av));
poll_stats->pc = 0;
poll_stats->cur_freq = MED;
poll_stats->iter_counter = 0;
poll_stats->threshold_ctr = 0;
poll_stats->queue_state = MED_NORMAL;
RTE_LOG(INFO, POWER, "Set the power freq to MED\n");
set_power_freq(poll_stats->lcore_id, MED, false);
poll_stats->thresh[MED].threshold_percent = med_to_high_threshold;
poll_stats->thresh[HGH].threshold_percent = high_to_med_threshold;
}
static inline void __attribute__((always_inline))
enter_busy_state(struct priority_worker *poll_stats)
{
memset(poll_stats->edpi_av, 0, sizeof(poll_stats->edpi_av));
poll_stats->ec = 0;
memset(poll_stats->ppi_av, 0, sizeof(poll_stats->ppi_av));
poll_stats->pc = 0;
poll_stats->cur_freq = HGH;
poll_stats->iter_counter = 0;
poll_stats->threshold_ctr = 0;
poll_stats->queue_state = HGH_BUSY;
set_power_freq(poll_stats->lcore_id, HGH, false);
}
static inline void __attribute__((always_inline))
enter_purge_state(struct priority_worker *poll_stats)
{
poll_stats->iter_counter = 0;
poll_stats->queue_state = LOW_PURGE;
}
static inline void __attribute__((always_inline))
set_state(struct priority_worker *poll_stats,
enum queue_state new_state)
{
enum queue_state old_state = poll_stats->queue_state;
if (old_state != new_state) {
/* Call any old state exit functions */
if (old_state == TRAINING)
exit_training_state(poll_stats);
/* Call any new state entry functions */
if (new_state == TRAINING)
enter_training_state(poll_stats);
if (new_state == MED_NORMAL)
enter_normal_state(poll_stats);
if (new_state == HGH_BUSY)
enter_busy_state(poll_stats);
if (new_state == LOW_PURGE)
enter_purge_state(poll_stats);
}
}
static inline void __attribute__((always_inline))
set_policy(struct priority_worker *poll_stats,
struct ep_policy *policy)
{
set_state(poll_stats, policy->state);
if (policy->state == TRAINING)
return;
poll_stats->thresh[MED_NORMAL].base_edpi = policy->med_base_edpi;
poll_stats->thresh[HGH_BUSY].base_edpi = policy->hgh_base_edpi;
poll_stats->thresh[MED_NORMAL].trained = true;
poll_stats->thresh[HGH_BUSY].trained = true;
}
static void
update_training_stats(struct priority_worker *poll_stats,
uint32_t freq,
bool specific_freq,
uint32_t max_train_iter)
{
RTE_SET_USED(specific_freq);
uint64_t p0_empty_deq;
if (poll_stats->cur_freq == freq &&
poll_stats->thresh[freq].trained == false) {
if (poll_stats->thresh[freq].cur_train_iter == 0) {
set_power_freq(poll_stats->lcore_id,
freq, specific_freq);
poll_stats->empty_dequeues_prev =
poll_stats->empty_dequeues;
poll_stats->thresh[freq].cur_train_iter++;
return;
} else if (poll_stats->thresh[freq].cur_train_iter
<= max_train_iter) {
p0_empty_deq = poll_stats->empty_dequeues -
poll_stats->empty_dequeues_prev;
poll_stats->empty_dequeues_prev =
poll_stats->empty_dequeues;
poll_stats->thresh[freq].base_edpi += p0_empty_deq;
poll_stats->thresh[freq].cur_train_iter++;
} else {
if (poll_stats->thresh[freq].trained == false) {
poll_stats->thresh[freq].base_edpi =
poll_stats->thresh[freq].base_edpi /
max_train_iter;
/* Add on a factor of 0.05%
* this should remove any
* false negatives when the system is 0% busy
*/
poll_stats->thresh[freq].base_edpi +=
poll_stats->thresh[freq].base_edpi / 2000;
poll_stats->thresh[freq].trained = true;
poll_stats->cur_freq++;
}
}
}
}
static inline uint32_t __attribute__((always_inline))
update_stats(struct priority_worker *poll_stats)
{
uint64_t tot_edpi = 0, tot_ppi = 0;
uint32_t j, percent;
struct priority_worker *s = poll_stats;
uint64_t cur_edpi = s->empty_dequeues - s->empty_dequeues_prev;
s->empty_dequeues_prev = s->empty_dequeues;
uint64_t ppi = s->num_dequeue_pkts - s->num_dequeue_pkts_prev;
s->num_dequeue_pkts_prev = s->num_dequeue_pkts;
if (s->thresh[s->cur_freq].base_edpi < cur_edpi) {
/* edpi mean empty poll counter difference per interval */
RTE_LOG(DEBUG, POWER, "cur_edpi is too large "
"cur edpi %"PRId64" "
"base edpi %"PRId64"\n",
cur_edpi,
s->thresh[s->cur_freq].base_edpi);
/* Value to make us fail need debug log*/
return 1000UL;
}
s->edpi_av[s->ec++ % BINS_AV] = cur_edpi;
s->ppi_av[s->pc++ % BINS_AV] = ppi;
for (j = 0; j < BINS_AV; j++) {
tot_edpi += s->edpi_av[j];
tot_ppi += s->ppi_av[j];
}
tot_edpi = tot_edpi / BINS_AV;
percent = 100 - (uint32_t)(((float)tot_edpi /
(float)s->thresh[s->cur_freq].base_edpi) * 100);
return (uint32_t)percent;
}
static inline void __attribute__((always_inline))
update_stats_normal(struct priority_worker *poll_stats)
{
uint32_t percent;
if (poll_stats->thresh[poll_stats->cur_freq].base_edpi == 0) {
enum freq_val cur_freq = poll_stats->cur_freq;
/* edpi mean empty poll counter difference per interval */
RTE_LOG(DEBUG, POWER, "cure freq is %d, edpi is %"PRIu64"\n",
cur_freq,
poll_stats->thresh[cur_freq].base_edpi);
return;
}
percent = update_stats(poll_stats);
if (percent > 100) {
/* edpi mean empty poll counter difference per interval */
RTE_LOG(DEBUG, POWER, "Edpi is bigger than threshold\n");
return;
}
if (poll_stats->cur_freq == LOW)
RTE_LOG(INFO, POWER, "Purge Mode is not currently supported\n");
else if (poll_stats->cur_freq == MED) {
if (percent >
poll_stats->thresh[MED].threshold_percent) {
if (poll_stats->threshold_ctr < INTERVALS_PER_SECOND)
poll_stats->threshold_ctr++;
else {
set_state(poll_stats, HGH_BUSY);
RTE_LOG(INFO, POWER, "MOVE to HGH\n");
}
} else {
/* reset */
poll_stats->threshold_ctr = 0;
}
} else if (poll_stats->cur_freq == HGH) {
if (percent <
poll_stats->thresh[HGH].threshold_percent) {
if (poll_stats->threshold_ctr < INTERVALS_PER_SECOND)
poll_stats->threshold_ctr++;
else {
set_state(poll_stats, MED_NORMAL);
RTE_LOG(INFO, POWER, "MOVE to MED\n");
}
} else {
/* reset */
poll_stats->threshold_ctr = 0;
}
}
}
static int
empty_poll_training(struct priority_worker *poll_stats,
uint32_t max_train_iter)
{
if (poll_stats->iter_counter < INTERVALS_PER_SECOND) {
poll_stats->iter_counter++;
return 0;
}
update_training_stats(poll_stats,
LOW,
false,
max_train_iter);
update_training_stats(poll_stats,
MED,
false,
max_train_iter);
update_training_stats(poll_stats,
HGH,
false,
max_train_iter);
if (poll_stats->thresh[LOW].trained == true
&& poll_stats->thresh[MED].trained == true
&& poll_stats->thresh[HGH].trained == true) {
set_state(poll_stats, MED_NORMAL);
RTE_LOG(INFO, POWER, "LOW threshold is %"PRIu64"\n",
poll_stats->thresh[LOW].base_edpi);
RTE_LOG(INFO, POWER, "MED threshold is %"PRIu64"\n",
poll_stats->thresh[MED].base_edpi);
RTE_LOG(INFO, POWER, "HIGH threshold is %"PRIu64"\n",
poll_stats->thresh[HGH].base_edpi);
RTE_LOG(INFO, POWER, "Training is Complete for %d\n",
poll_stats->lcore_id);
}
return 0;
}
void
rte_empty_poll_detection(struct rte_timer *tim, void *arg)
{
uint32_t i;
struct priority_worker *poll_stats;
RTE_SET_USED(tim);
RTE_SET_USED(arg);
for (i = 0; i < NUM_NODES; i++) {
poll_stats = &(ep_params->wrk_data.wrk_stats[i]);
if (rte_lcore_is_enabled(poll_stats->lcore_id) == 0)
continue;
switch (poll_stats->queue_state) {
case(TRAINING):
empty_poll_training(poll_stats,
ep_params->max_train_iter);
break;
case(HGH_BUSY):
case(MED_NORMAL):
update_stats_normal(poll_stats);
break;
case(LOW_PURGE):
break;
default:
break;
}
}
}
int
rte_power_empty_poll_stat_init(struct ep_params **eptr, uint8_t *freq_tlb,
struct ep_policy *policy)
{
uint32_t i;
/* Allocate the ep_params structure */
ep_params = rte_zmalloc_socket(NULL,
sizeof(struct ep_params),
0,
rte_socket_id());
if (!ep_params)
return -1;
if (freq_tlb == NULL) {
freq_index[LOW] = 14;
freq_index[MED] = 9;
freq_index[HGH] = 1;
} else {
freq_index[LOW] = freq_tlb[LOW];
freq_index[MED] = freq_tlb[MED];
freq_index[HGH] = freq_tlb[HGH];
}
RTE_LOG(INFO, POWER, "Initialize the Empty Poll\n");
/* Train for pre-defined period */
ep_params->max_train_iter = INTERVALS_PER_SECOND * SECONDS_TO_TRAIN_FOR;
struct stats_data *w = &ep_params->wrk_data;
*eptr = ep_params;
/* initialize all wrk_stats state */
for (i = 0; i < NUM_NODES; i++) {
if (rte_lcore_is_enabled(i) == 0)
continue;
/*init the freqs table */
total_avail_freqs[i] = rte_power_freqs(i,
avail_freqs[i],
NUM_FREQS);
RTE_LOG(INFO, POWER, "total avail freq is %d , lcoreid %d\n",
total_avail_freqs[i],
i);
if (get_freq_index(LOW) > total_avail_freqs[i])
return -1;
if (rte_get_master_lcore() != i) {
w->wrk_stats[i].lcore_id = i;
set_policy(&w->wrk_stats[i], policy);
}
}
return 0;
}
void
rte_power_empty_poll_stat_free(void)
{
RTE_LOG(INFO, POWER, "Close the Empty Poll\n");
if (ep_params != NULL)
rte_free(ep_params);
}
int
rte_power_empty_poll_stat_update(unsigned int lcore_id)
{
struct priority_worker *poll_stats;
if (lcore_id >= NUM_NODES)
return -1;
poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
if (poll_stats->lcore_id == 0)
poll_stats->lcore_id = lcore_id;
poll_stats->empty_dequeues++;
return 0;
}
int
rte_power_poll_stat_update(unsigned int lcore_id, uint8_t nb_pkt)
{
struct priority_worker *poll_stats;
if (lcore_id >= NUM_NODES)
return -1;
poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
if (poll_stats->lcore_id == 0)
poll_stats->lcore_id = lcore_id;
poll_stats->num_dequeue_pkts += nb_pkt;
return 0;
}
uint64_t
rte_power_empty_poll_stat_fetch(unsigned int lcore_id)
{
struct priority_worker *poll_stats;
if (lcore_id >= NUM_NODES)
return -1;
poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
if (poll_stats->lcore_id == 0)
poll_stats->lcore_id = lcore_id;
return poll_stats->empty_dequeues;
}
uint64_t
rte_power_poll_stat_fetch(unsigned int lcore_id)
{
struct priority_worker *poll_stats;
if (lcore_id >= NUM_NODES)
return -1;
poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
if (poll_stats->lcore_id == 0)
poll_stats->lcore_id = lcore_id;
return poll_stats->num_dequeue_pkts;
}