99a2dd955f
There is no reason for the DPDK libraries to all have 'librte_' prefix on the directory names. This prefix makes the directory names longer and also makes it awkward to add features referring to individual libraries in the build - should the lib names be specified with or without the prefix. Therefore, we can just remove the library prefix and use the library's unique name as the directory name, i.e. 'eal' rather than 'librte_eal' Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
543 lines
12 KiB
C
543 lines
12 KiB
C
/* SPDX-License-Identifier: BSD-3-Clause
|
|
* Copyright(c) 2010-2018 Intel Corporation
|
|
*/
|
|
|
|
#include <string.h>
|
|
|
|
#include <rte_lcore.h>
|
|
#include <rte_cycles.h>
|
|
#include <rte_atomic.h>
|
|
#include <rte_malloc.h>
|
|
#include <inttypes.h>
|
|
|
|
#include "rte_power.h"
|
|
#include "rte_power_empty_poll.h"
|
|
|
|
#define INTERVALS_PER_SECOND 100 /* (10ms) */
|
|
#define SECONDS_TO_TRAIN_FOR 2
|
|
#define DEFAULT_MED_TO_HIGH_PERCENT_THRESHOLD 70
|
|
#define DEFAULT_HIGH_TO_MED_PERCENT_THRESHOLD 30
|
|
#define DEFAULT_CYCLES_PER_PACKET 800
|
|
|
|
static struct ep_params *ep_params;
|
|
static uint32_t med_to_high_threshold = DEFAULT_MED_TO_HIGH_PERCENT_THRESHOLD;
|
|
static uint32_t high_to_med_threshold = DEFAULT_HIGH_TO_MED_PERCENT_THRESHOLD;
|
|
|
|
static uint32_t avail_freqs[RTE_MAX_LCORE][NUM_FREQS];
|
|
|
|
static uint32_t total_avail_freqs[RTE_MAX_LCORE];
|
|
|
|
static uint32_t freq_index[NUM_FREQ];
|
|
|
|
static uint32_t
|
|
get_freq_index(enum freq_val index)
|
|
{
|
|
return freq_index[index];
|
|
}
|
|
|
|
|
|
static int
|
|
set_power_freq(int lcore_id, enum freq_val freq, bool specific_freq)
|
|
{
|
|
int err = 0;
|
|
uint32_t power_freq_index;
|
|
if (!specific_freq)
|
|
power_freq_index = get_freq_index(freq);
|
|
else
|
|
power_freq_index = freq;
|
|
|
|
err = rte_power_set_freq(lcore_id, power_freq_index);
|
|
|
|
return err;
|
|
}
|
|
|
|
|
|
static __rte_always_inline void
|
|
exit_training_state(struct priority_worker *poll_stats)
|
|
{
|
|
RTE_SET_USED(poll_stats);
|
|
}
|
|
|
|
static __rte_always_inline void
|
|
enter_training_state(struct priority_worker *poll_stats)
|
|
{
|
|
poll_stats->iter_counter = 0;
|
|
poll_stats->cur_freq = LOW;
|
|
poll_stats->queue_state = TRAINING;
|
|
}
|
|
|
|
static __rte_always_inline void
|
|
enter_normal_state(struct priority_worker *poll_stats)
|
|
{
|
|
/* Clear the averages arrays and strs */
|
|
memset(poll_stats->edpi_av, 0, sizeof(poll_stats->edpi_av));
|
|
poll_stats->ec = 0;
|
|
memset(poll_stats->ppi_av, 0, sizeof(poll_stats->ppi_av));
|
|
poll_stats->pc = 0;
|
|
|
|
poll_stats->cur_freq = MED;
|
|
poll_stats->iter_counter = 0;
|
|
poll_stats->threshold_ctr = 0;
|
|
poll_stats->queue_state = MED_NORMAL;
|
|
RTE_LOG(INFO, POWER, "Set the power freq to MED\n");
|
|
set_power_freq(poll_stats->lcore_id, MED, false);
|
|
|
|
poll_stats->thresh[MED].threshold_percent = med_to_high_threshold;
|
|
poll_stats->thresh[HGH].threshold_percent = high_to_med_threshold;
|
|
}
|
|
|
|
static __rte_always_inline void
|
|
enter_busy_state(struct priority_worker *poll_stats)
|
|
{
|
|
memset(poll_stats->edpi_av, 0, sizeof(poll_stats->edpi_av));
|
|
poll_stats->ec = 0;
|
|
memset(poll_stats->ppi_av, 0, sizeof(poll_stats->ppi_av));
|
|
poll_stats->pc = 0;
|
|
|
|
poll_stats->cur_freq = HGH;
|
|
poll_stats->iter_counter = 0;
|
|
poll_stats->threshold_ctr = 0;
|
|
poll_stats->queue_state = HGH_BUSY;
|
|
set_power_freq(poll_stats->lcore_id, HGH, false);
|
|
}
|
|
|
|
static __rte_always_inline void
|
|
enter_purge_state(struct priority_worker *poll_stats)
|
|
{
|
|
poll_stats->iter_counter = 0;
|
|
poll_stats->queue_state = LOW_PURGE;
|
|
}
|
|
|
|
static __rte_always_inline void
|
|
set_state(struct priority_worker *poll_stats,
|
|
enum queue_state new_state)
|
|
{
|
|
enum queue_state old_state = poll_stats->queue_state;
|
|
if (old_state != new_state) {
|
|
|
|
/* Call any old state exit functions */
|
|
if (old_state == TRAINING)
|
|
exit_training_state(poll_stats);
|
|
|
|
/* Call any new state entry functions */
|
|
if (new_state == TRAINING)
|
|
enter_training_state(poll_stats);
|
|
if (new_state == MED_NORMAL)
|
|
enter_normal_state(poll_stats);
|
|
if (new_state == HGH_BUSY)
|
|
enter_busy_state(poll_stats);
|
|
if (new_state == LOW_PURGE)
|
|
enter_purge_state(poll_stats);
|
|
}
|
|
}
|
|
|
|
static __rte_always_inline void
|
|
set_policy(struct priority_worker *poll_stats,
|
|
struct ep_policy *policy)
|
|
{
|
|
set_state(poll_stats, policy->state);
|
|
|
|
if (policy->state == TRAINING)
|
|
return;
|
|
|
|
poll_stats->thresh[MED_NORMAL].base_edpi = policy->med_base_edpi;
|
|
poll_stats->thresh[HGH_BUSY].base_edpi = policy->hgh_base_edpi;
|
|
|
|
poll_stats->thresh[MED_NORMAL].trained = true;
|
|
poll_stats->thresh[HGH_BUSY].trained = true;
|
|
|
|
}
|
|
|
|
static void
|
|
update_training_stats(struct priority_worker *poll_stats,
|
|
uint32_t freq,
|
|
bool specific_freq,
|
|
uint32_t max_train_iter)
|
|
{
|
|
RTE_SET_USED(specific_freq);
|
|
|
|
uint64_t p0_empty_deq;
|
|
|
|
if (poll_stats->cur_freq == freq &&
|
|
poll_stats->thresh[freq].trained == false) {
|
|
if (poll_stats->thresh[freq].cur_train_iter == 0) {
|
|
|
|
set_power_freq(poll_stats->lcore_id,
|
|
freq, specific_freq);
|
|
|
|
poll_stats->empty_dequeues_prev =
|
|
poll_stats->empty_dequeues;
|
|
|
|
poll_stats->thresh[freq].cur_train_iter++;
|
|
|
|
return;
|
|
} else if (poll_stats->thresh[freq].cur_train_iter
|
|
<= max_train_iter) {
|
|
|
|
p0_empty_deq = poll_stats->empty_dequeues -
|
|
poll_stats->empty_dequeues_prev;
|
|
|
|
poll_stats->empty_dequeues_prev =
|
|
poll_stats->empty_dequeues;
|
|
|
|
poll_stats->thresh[freq].base_edpi += p0_empty_deq;
|
|
poll_stats->thresh[freq].cur_train_iter++;
|
|
|
|
} else {
|
|
if (poll_stats->thresh[freq].trained == false) {
|
|
poll_stats->thresh[freq].base_edpi =
|
|
poll_stats->thresh[freq].base_edpi /
|
|
max_train_iter;
|
|
|
|
/* Add on a factor of 0.05%
|
|
* this should remove any
|
|
* false negatives when the system is 0% busy
|
|
*/
|
|
poll_stats->thresh[freq].base_edpi +=
|
|
poll_stats->thresh[freq].base_edpi / 2000;
|
|
|
|
poll_stats->thresh[freq].trained = true;
|
|
poll_stats->cur_freq++;
|
|
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static __rte_always_inline uint32_t
|
|
update_stats(struct priority_worker *poll_stats)
|
|
{
|
|
uint64_t tot_edpi = 0, tot_ppi = 0;
|
|
uint32_t j, percent;
|
|
|
|
struct priority_worker *s = poll_stats;
|
|
|
|
uint64_t cur_edpi = s->empty_dequeues - s->empty_dequeues_prev;
|
|
|
|
s->empty_dequeues_prev = s->empty_dequeues;
|
|
|
|
uint64_t ppi = s->num_dequeue_pkts - s->num_dequeue_pkts_prev;
|
|
|
|
s->num_dequeue_pkts_prev = s->num_dequeue_pkts;
|
|
|
|
if (s->thresh[s->cur_freq].base_edpi < cur_edpi) {
|
|
|
|
/* edpi mean empty poll counter difference per interval */
|
|
RTE_LOG(DEBUG, POWER, "cur_edpi is too large "
|
|
"cur edpi %"PRId64" "
|
|
"base edpi %"PRId64"\n",
|
|
cur_edpi,
|
|
s->thresh[s->cur_freq].base_edpi);
|
|
/* Value to make us fail need debug log*/
|
|
return 1000UL;
|
|
}
|
|
|
|
s->edpi_av[s->ec++ % BINS_AV] = cur_edpi;
|
|
s->ppi_av[s->pc++ % BINS_AV] = ppi;
|
|
|
|
for (j = 0; j < BINS_AV; j++) {
|
|
tot_edpi += s->edpi_av[j];
|
|
tot_ppi += s->ppi_av[j];
|
|
}
|
|
|
|
tot_edpi = tot_edpi / BINS_AV;
|
|
|
|
percent = 100 - (uint32_t)(((float)tot_edpi /
|
|
(float)s->thresh[s->cur_freq].base_edpi) * 100);
|
|
|
|
return (uint32_t)percent;
|
|
}
|
|
|
|
|
|
static __rte_always_inline void
|
|
update_stats_normal(struct priority_worker *poll_stats)
|
|
{
|
|
uint32_t percent;
|
|
|
|
if (poll_stats->thresh[poll_stats->cur_freq].base_edpi == 0) {
|
|
|
|
enum freq_val cur_freq = poll_stats->cur_freq;
|
|
|
|
/* edpi mean empty poll counter difference per interval */
|
|
RTE_LOG(DEBUG, POWER, "cure freq is %d, edpi is %"PRIu64"\n",
|
|
cur_freq,
|
|
poll_stats->thresh[cur_freq].base_edpi);
|
|
return;
|
|
}
|
|
|
|
percent = update_stats(poll_stats);
|
|
|
|
if (percent > 100) {
|
|
/* edpi mean empty poll counter difference per interval */
|
|
RTE_LOG(DEBUG, POWER, "Edpi is bigger than threshold\n");
|
|
return;
|
|
}
|
|
|
|
if (poll_stats->cur_freq == LOW)
|
|
RTE_LOG(INFO, POWER, "Purge Mode is not currently supported\n");
|
|
else if (poll_stats->cur_freq == MED) {
|
|
|
|
if (percent >
|
|
poll_stats->thresh[MED].threshold_percent) {
|
|
|
|
if (poll_stats->threshold_ctr < INTERVALS_PER_SECOND)
|
|
poll_stats->threshold_ctr++;
|
|
else {
|
|
set_state(poll_stats, HGH_BUSY);
|
|
RTE_LOG(INFO, POWER, "MOVE to HGH\n");
|
|
}
|
|
|
|
} else {
|
|
/* reset */
|
|
poll_stats->threshold_ctr = 0;
|
|
}
|
|
|
|
} else if (poll_stats->cur_freq == HGH) {
|
|
|
|
if (percent <
|
|
poll_stats->thresh[HGH].threshold_percent) {
|
|
|
|
if (poll_stats->threshold_ctr < INTERVALS_PER_SECOND)
|
|
poll_stats->threshold_ctr++;
|
|
else {
|
|
set_state(poll_stats, MED_NORMAL);
|
|
RTE_LOG(INFO, POWER, "MOVE to MED\n");
|
|
}
|
|
} else {
|
|
/* reset */
|
|
poll_stats->threshold_ctr = 0;
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
static int
|
|
empty_poll_training(struct priority_worker *poll_stats,
|
|
uint32_t max_train_iter)
|
|
{
|
|
|
|
if (poll_stats->iter_counter < INTERVALS_PER_SECOND) {
|
|
poll_stats->iter_counter++;
|
|
return 0;
|
|
}
|
|
|
|
|
|
update_training_stats(poll_stats,
|
|
LOW,
|
|
false,
|
|
max_train_iter);
|
|
|
|
update_training_stats(poll_stats,
|
|
MED,
|
|
false,
|
|
max_train_iter);
|
|
|
|
update_training_stats(poll_stats,
|
|
HGH,
|
|
false,
|
|
max_train_iter);
|
|
|
|
|
|
if (poll_stats->thresh[LOW].trained == true
|
|
&& poll_stats->thresh[MED].trained == true
|
|
&& poll_stats->thresh[HGH].trained == true) {
|
|
|
|
set_state(poll_stats, MED_NORMAL);
|
|
|
|
RTE_LOG(INFO, POWER, "LOW threshold is %"PRIu64"\n",
|
|
poll_stats->thresh[LOW].base_edpi);
|
|
|
|
RTE_LOG(INFO, POWER, "MED threshold is %"PRIu64"\n",
|
|
poll_stats->thresh[MED].base_edpi);
|
|
|
|
|
|
RTE_LOG(INFO, POWER, "HIGH threshold is %"PRIu64"\n",
|
|
poll_stats->thresh[HGH].base_edpi);
|
|
|
|
RTE_LOG(INFO, POWER, "Training is Complete for %d\n",
|
|
poll_stats->lcore_id);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
void
|
|
rte_empty_poll_detection(struct rte_timer *tim, void *arg)
|
|
{
|
|
|
|
uint32_t i;
|
|
|
|
struct priority_worker *poll_stats;
|
|
|
|
RTE_SET_USED(tim);
|
|
|
|
RTE_SET_USED(arg);
|
|
|
|
for (i = 0; i < NUM_NODES; i++) {
|
|
|
|
poll_stats = &(ep_params->wrk_data.wrk_stats[i]);
|
|
|
|
if (rte_lcore_is_enabled(poll_stats->lcore_id) == 0)
|
|
continue;
|
|
|
|
switch (poll_stats->queue_state) {
|
|
case(TRAINING):
|
|
empty_poll_training(poll_stats,
|
|
ep_params->max_train_iter);
|
|
break;
|
|
|
|
case(HGH_BUSY):
|
|
case(MED_NORMAL):
|
|
update_stats_normal(poll_stats);
|
|
break;
|
|
|
|
case(LOW_PURGE):
|
|
break;
|
|
default:
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
int
|
|
rte_power_empty_poll_stat_init(struct ep_params **eptr, uint8_t *freq_tlb,
|
|
struct ep_policy *policy)
|
|
{
|
|
uint32_t i;
|
|
/* Allocate the ep_params structure */
|
|
ep_params = rte_zmalloc_socket(NULL,
|
|
sizeof(struct ep_params),
|
|
0,
|
|
rte_socket_id());
|
|
|
|
if (!ep_params)
|
|
return -1;
|
|
|
|
if (freq_tlb == NULL) {
|
|
freq_index[LOW] = 14;
|
|
freq_index[MED] = 9;
|
|
freq_index[HGH] = 1;
|
|
} else {
|
|
freq_index[LOW] = freq_tlb[LOW];
|
|
freq_index[MED] = freq_tlb[MED];
|
|
freq_index[HGH] = freq_tlb[HGH];
|
|
}
|
|
|
|
RTE_LOG(INFO, POWER, "Initialize the Empty Poll\n");
|
|
|
|
/* Train for pre-defined period */
|
|
ep_params->max_train_iter = INTERVALS_PER_SECOND * SECONDS_TO_TRAIN_FOR;
|
|
|
|
struct stats_data *w = &ep_params->wrk_data;
|
|
|
|
*eptr = ep_params;
|
|
|
|
/* initialize all wrk_stats state */
|
|
for (i = 0; i < NUM_NODES; i++) {
|
|
|
|
if (rte_lcore_is_enabled(i) == 0)
|
|
continue;
|
|
/*init the freqs table */
|
|
total_avail_freqs[i] = rte_power_freqs(i,
|
|
avail_freqs[i],
|
|
NUM_FREQS);
|
|
|
|
RTE_LOG(INFO, POWER, "total avail freq is %d , lcoreid %d\n",
|
|
total_avail_freqs[i],
|
|
i);
|
|
|
|
if (get_freq_index(LOW) > total_avail_freqs[i])
|
|
return -1;
|
|
|
|
if (rte_get_main_lcore() != i) {
|
|
w->wrk_stats[i].lcore_id = i;
|
|
set_policy(&w->wrk_stats[i], policy);
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
void
|
|
rte_power_empty_poll_stat_free(void)
|
|
{
|
|
|
|
RTE_LOG(INFO, POWER, "Close the Empty Poll\n");
|
|
|
|
if (ep_params != NULL)
|
|
rte_free(ep_params);
|
|
}
|
|
|
|
int
|
|
rte_power_empty_poll_stat_update(unsigned int lcore_id)
|
|
{
|
|
struct priority_worker *poll_stats;
|
|
|
|
if (lcore_id >= NUM_NODES)
|
|
return -1;
|
|
|
|
poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
|
|
|
|
if (poll_stats->lcore_id == 0)
|
|
poll_stats->lcore_id = lcore_id;
|
|
|
|
poll_stats->empty_dequeues++;
|
|
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
rte_power_poll_stat_update(unsigned int lcore_id, uint8_t nb_pkt)
|
|
{
|
|
|
|
struct priority_worker *poll_stats;
|
|
|
|
if (lcore_id >= NUM_NODES)
|
|
return -1;
|
|
|
|
poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
|
|
|
|
if (poll_stats->lcore_id == 0)
|
|
poll_stats->lcore_id = lcore_id;
|
|
|
|
poll_stats->num_dequeue_pkts += nb_pkt;
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
uint64_t
|
|
rte_power_empty_poll_stat_fetch(unsigned int lcore_id)
|
|
{
|
|
struct priority_worker *poll_stats;
|
|
|
|
if (lcore_id >= NUM_NODES)
|
|
return -1;
|
|
|
|
poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
|
|
|
|
if (poll_stats->lcore_id == 0)
|
|
poll_stats->lcore_id = lcore_id;
|
|
|
|
return poll_stats->empty_dequeues;
|
|
}
|
|
|
|
uint64_t
|
|
rte_power_poll_stat_fetch(unsigned int lcore_id)
|
|
{
|
|
struct priority_worker *poll_stats;
|
|
|
|
if (lcore_id >= NUM_NODES)
|
|
return -1;
|
|
|
|
poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
|
|
|
|
if (poll_stats->lcore_id == 0)
|
|
poll_stats->lcore_id = lcore_id;
|
|
|
|
return poll_stats->num_dequeue_pkts;
|
|
}
|