Anatoly Burakov f9d6cd8bfe timer: fix resource leak in finalize
Currently, whenever timer library is initialized, the memory
is leaked because there is no telling when primary or secondary
processes get to use the state, and there is no way to
initialize/deinitialize timer library state without race
conditions [1] because the data itself must live in shared memory.

Add a spinlock to the shared mem config to have a way to
exclusively initialize/deinitialize the timer library without
any races, and implement the synchronization mechanism based
on this lock in the timer library.

Also, update the API doc. Note that the behavior of the API
itself did not change - the requirement to call init in every
process was simply not documented explicitly.

[1] See the following email thread:
https://mails.dpdk.org/archives/dev/2019-May/131498.html

Fixes: c0749f7096c7 ("timer: allow management in shared memory")
Cc: stable@dpdk.org

Signed-off-by: Erik Gabriel Carrillo <erik.g.carrillo@intel.com>
Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
Acked-by: Erik Gabriel Carrillo <erik.g.carrillo@intel.com>
Reviewed-by: David Marchand <david.marchand@redhat.com>
2019-07-06 10:32:40 +02:00

1103 lines
30 KiB
C

/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(c) 2010-2014 Intel Corporation
*/
#include <string.h>
#include <stdio.h>
#include <stdint.h>
#include <stdbool.h>
#include <inttypes.h>
#include <assert.h>
#include <sys/queue.h>
#include <rte_atomic.h>
#include <rte_common.h>
#include <rte_cycles.h>
#include <rte_eal_memconfig.h>
#include <rte_per_lcore.h>
#include <rte_memory.h>
#include <rte_launch.h>
#include <rte_eal.h>
#include <rte_lcore.h>
#include <rte_branch_prediction.h>
#include <rte_spinlock.h>
#include <rte_random.h>
#include <rte_pause.h>
#include <rte_memzone.h>
#include <rte_malloc.h>
#include <rte_compat.h>
#include <rte_errno.h>
#include "rte_timer.h"
/**
* Per-lcore info for timers.
*/
struct priv_timer {
struct rte_timer pending_head; /**< dummy timer instance to head up list */
rte_spinlock_t list_lock; /**< lock to protect list access */
/** per-core variable that true if a timer was updated on this
* core since last reset of the variable */
int updated;
/** track the current depth of the skiplist */
unsigned curr_skiplist_depth;
unsigned prev_lcore; /**< used for lcore round robin */
/** running timer on this lcore now */
struct rte_timer *running_tim;
#ifdef RTE_LIBRTE_TIMER_DEBUG
/** per-lcore statistics */
struct rte_timer_debug_stats stats;
#endif
} __rte_cache_aligned;
#define FL_ALLOCATED (1 << 0)
struct rte_timer_data {
struct priv_timer priv_timer[RTE_MAX_LCORE];
uint8_t internal_flags;
};
#define RTE_MAX_DATA_ELS 64
static const struct rte_memzone *rte_timer_data_mz;
static int *volatile rte_timer_mz_refcnt;
static struct rte_timer_data *rte_timer_data_arr;
static const uint32_t default_data_id;
static uint32_t rte_timer_subsystem_initialized;
/* For maintaining older interfaces for a period */
static struct rte_timer_data default_timer_data;
/* when debug is enabled, store some statistics */
#ifdef RTE_LIBRTE_TIMER_DEBUG
#define __TIMER_STAT_ADD(priv_timer, name, n) do { \
unsigned __lcore_id = rte_lcore_id(); \
if (__lcore_id < RTE_MAX_LCORE) \
priv_timer[__lcore_id].stats.name += (n); \
} while(0)
#else
#define __TIMER_STAT_ADD(priv_timer, name, n) do {} while (0)
#endif
static inline int
timer_data_valid(uint32_t id)
{
return !!(rte_timer_data_arr[id].internal_flags & FL_ALLOCATED);
}
/* validate ID and retrieve timer data pointer, or return error value */
#define TIMER_DATA_VALID_GET_OR_ERR_RET(id, timer_data, retval) do { \
if (id >= RTE_MAX_DATA_ELS || !timer_data_valid(id)) \
return retval; \
timer_data = &rte_timer_data_arr[id]; \
} while (0)
int
rte_timer_data_alloc(uint32_t *id_ptr)
{
int i;
struct rte_timer_data *data;
if (!rte_timer_subsystem_initialized)
return -ENOMEM;
for (i = 0; i < RTE_MAX_DATA_ELS; i++) {
data = &rte_timer_data_arr[i];
if (!(data->internal_flags & FL_ALLOCATED)) {
data->internal_flags |= FL_ALLOCATED;
if (id_ptr)
*id_ptr = i;
return 0;
}
}
return -ENOSPC;
}
int
rte_timer_data_dealloc(uint32_t id)
{
struct rte_timer_data *timer_data;
TIMER_DATA_VALID_GET_OR_ERR_RET(id, timer_data, -EINVAL);
timer_data->internal_flags &= ~(FL_ALLOCATED);
return 0;
}
void
rte_timer_subsystem_init_v20(void)
{
unsigned lcore_id;
struct priv_timer *priv_timer = default_timer_data.priv_timer;
/* since priv_timer is static, it's zeroed by default, so only init some
* fields.
*/
for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id ++) {
rte_spinlock_init(&priv_timer[lcore_id].list_lock);
priv_timer[lcore_id].prev_lcore = lcore_id;
}
}
VERSION_SYMBOL(rte_timer_subsystem_init, _v20, 2.0);
/* Init the timer library. Allocate an array of timer data structs in shared
* memory, and allocate the zeroth entry for use with original timer
* APIs. Since the intersection of the sets of lcore ids in primary and
* secondary processes should be empty, the zeroth entry can be shared by
* multiple processes.
*/
int
rte_timer_subsystem_init_v1905(void)
{
const struct rte_memzone *mz;
struct rte_timer_data *data;
int i, lcore_id;
static const char *mz_name = "rte_timer_mz";
const size_t data_arr_size =
RTE_MAX_DATA_ELS * sizeof(*rte_timer_data_arr);
const size_t mem_size = data_arr_size + sizeof(*rte_timer_mz_refcnt);
bool do_full_init = true;
if (rte_timer_subsystem_initialized)
return -EALREADY;
rte_mcfg_timer_lock();
mz = rte_memzone_lookup(mz_name);
if (mz == NULL) {
mz = rte_memzone_reserve_aligned(mz_name, mem_size,
SOCKET_ID_ANY, 0, RTE_CACHE_LINE_SIZE);
if (mz == NULL) {
rte_mcfg_timer_unlock();
return -ENOMEM;
}
do_full_init = true;
} else
do_full_init = false;
rte_timer_data_mz = mz;
rte_timer_data_arr = mz->addr;
rte_timer_mz_refcnt = (void *)((char *)mz->addr + data_arr_size);
if (do_full_init) {
for (i = 0; i < RTE_MAX_DATA_ELS; i++) {
data = &rte_timer_data_arr[i];
for (lcore_id = 0; lcore_id < RTE_MAX_LCORE;
lcore_id++) {
rte_spinlock_init(
&data->priv_timer[lcore_id].list_lock);
data->priv_timer[lcore_id].prev_lcore =
lcore_id;
}
}
}
rte_timer_data_arr[default_data_id].internal_flags |= FL_ALLOCATED;
(*rte_timer_mz_refcnt)++;
rte_mcfg_timer_unlock();
rte_timer_subsystem_initialized = 1;
return 0;
}
MAP_STATIC_SYMBOL(int rte_timer_subsystem_init(void),
rte_timer_subsystem_init_v1905);
BIND_DEFAULT_SYMBOL(rte_timer_subsystem_init, _v1905, 19.05);
void
rte_timer_subsystem_finalize(void)
{
if (!rte_timer_subsystem_initialized)
return;
rte_mcfg_timer_lock();
if (--(*rte_timer_mz_refcnt) == 0)
rte_memzone_free(rte_timer_data_mz);
rte_mcfg_timer_unlock();
rte_timer_subsystem_initialized = 0;
}
/* Initialize the timer handle tim for use */
void
rte_timer_init(struct rte_timer *tim)
{
union rte_timer_status status;
status.state = RTE_TIMER_STOP;
status.owner = RTE_TIMER_NO_OWNER;
tim->status.u32 = status.u32;
}
/*
* if timer is pending or stopped (or running on the same core than
* us), mark timer as configuring, and on success return the previous
* status of the timer
*/
static int
timer_set_config_state(struct rte_timer *tim,
union rte_timer_status *ret_prev_status,
struct priv_timer *priv_timer)
{
union rte_timer_status prev_status, status;
int success = 0;
unsigned lcore_id;
lcore_id = rte_lcore_id();
/* wait that the timer is in correct status before update,
* and mark it as being configured */
while (success == 0) {
prev_status.u32 = tim->status.u32;
/* timer is running on another core
* or ready to run on local core, exit
*/
if (prev_status.state == RTE_TIMER_RUNNING &&
(prev_status.owner != (uint16_t)lcore_id ||
tim != priv_timer[lcore_id].running_tim))
return -1;
/* timer is being configured on another core */
if (prev_status.state == RTE_TIMER_CONFIG)
return -1;
/* here, we know that timer is stopped or pending,
* mark it atomically as being configured */
status.state = RTE_TIMER_CONFIG;
status.owner = (int16_t)lcore_id;
success = rte_atomic32_cmpset(&tim->status.u32,
prev_status.u32,
status.u32);
}
ret_prev_status->u32 = prev_status.u32;
return 0;
}
/*
* if timer is pending, mark timer as running
*/
static int
timer_set_running_state(struct rte_timer *tim)
{
union rte_timer_status prev_status, status;
unsigned lcore_id = rte_lcore_id();
int success = 0;
/* wait that the timer is in correct status before update,
* and mark it as running */
while (success == 0) {
prev_status.u32 = tim->status.u32;
/* timer is not pending anymore */
if (prev_status.state != RTE_TIMER_PENDING)
return -1;
/* here, we know that timer is stopped or pending,
* mark it atomically as being configured */
status.state = RTE_TIMER_RUNNING;
status.owner = (int16_t)lcore_id;
success = rte_atomic32_cmpset(&tim->status.u32,
prev_status.u32,
status.u32);
}
return 0;
}
/*
* Return a skiplist level for a new entry.
* This probabilistically gives a level with p=1/4 that an entry at level n
* will also appear at level n+1.
*/
static uint32_t
timer_get_skiplist_level(unsigned curr_depth)
{
#ifdef RTE_LIBRTE_TIMER_DEBUG
static uint32_t i, count = 0;
static uint32_t levels[MAX_SKIPLIST_DEPTH] = {0};
#endif
/* probability value is 1/4, i.e. all at level 0, 1 in 4 is at level 1,
* 1 in 16 at level 2, 1 in 64 at level 3, etc. Calculated using lowest
* bit position of a (pseudo)random number.
*/
uint32_t rand = rte_rand() & (UINT32_MAX - 1);
uint32_t level = rand == 0 ? MAX_SKIPLIST_DEPTH : (rte_bsf32(rand)-1) / 2;
/* limit the levels used to one above our current level, so we don't,
* for instance, have a level 0 and a level 7 without anything between
*/
if (level > curr_depth)
level = curr_depth;
if (level >= MAX_SKIPLIST_DEPTH)
level = MAX_SKIPLIST_DEPTH-1;
#ifdef RTE_LIBRTE_TIMER_DEBUG
count ++;
levels[level]++;
if (count % 10000 == 0)
for (i = 0; i < MAX_SKIPLIST_DEPTH; i++)
printf("Level %u: %u\n", (unsigned)i, (unsigned)levels[i]);
#endif
return level;
}
/*
* For a given time value, get the entries at each level which
* are <= that time value.
*/
static void
timer_get_prev_entries(uint64_t time_val, unsigned tim_lcore,
struct rte_timer **prev, struct priv_timer *priv_timer)
{
unsigned lvl = priv_timer[tim_lcore].curr_skiplist_depth;
prev[lvl] = &priv_timer[tim_lcore].pending_head;
while(lvl != 0) {
lvl--;
prev[lvl] = prev[lvl+1];
while (prev[lvl]->sl_next[lvl] &&
prev[lvl]->sl_next[lvl]->expire <= time_val)
prev[lvl] = prev[lvl]->sl_next[lvl];
}
}
/*
* Given a timer node in the skiplist, find the previous entries for it at
* all skiplist levels.
*/
static void
timer_get_prev_entries_for_node(struct rte_timer *tim, unsigned tim_lcore,
struct rte_timer **prev,
struct priv_timer *priv_timer)
{
int i;
/* to get a specific entry in the list, look for just lower than the time
* values, and then increment on each level individually if necessary
*/
timer_get_prev_entries(tim->expire - 1, tim_lcore, prev, priv_timer);
for (i = priv_timer[tim_lcore].curr_skiplist_depth - 1; i >= 0; i--) {
while (prev[i]->sl_next[i] != NULL &&
prev[i]->sl_next[i] != tim &&
prev[i]->sl_next[i]->expire <= tim->expire)
prev[i] = prev[i]->sl_next[i];
}
}
/* call with lock held as necessary
* add in list
* timer must be in config state
* timer must not be in a list
*/
static void
timer_add(struct rte_timer *tim, unsigned int tim_lcore,
struct priv_timer *priv_timer)
{
unsigned lvl;
struct rte_timer *prev[MAX_SKIPLIST_DEPTH+1];
/* find where exactly this element goes in the list of elements
* for each depth. */
timer_get_prev_entries(tim->expire, tim_lcore, prev, priv_timer);
/* now assign it a new level and add at that level */
const unsigned tim_level = timer_get_skiplist_level(
priv_timer[tim_lcore].curr_skiplist_depth);
if (tim_level == priv_timer[tim_lcore].curr_skiplist_depth)
priv_timer[tim_lcore].curr_skiplist_depth++;
lvl = tim_level;
while (lvl > 0) {
tim->sl_next[lvl] = prev[lvl]->sl_next[lvl];
prev[lvl]->sl_next[lvl] = tim;
lvl--;
}
tim->sl_next[0] = prev[0]->sl_next[0];
prev[0]->sl_next[0] = tim;
/* save the lowest list entry into the expire field of the dummy hdr
* NOTE: this is not atomic on 32-bit*/
priv_timer[tim_lcore].pending_head.expire = priv_timer[tim_lcore].\
pending_head.sl_next[0]->expire;
}
/*
* del from list, lock if needed
* timer must be in config state
* timer must be in a list
*/
static void
timer_del(struct rte_timer *tim, union rte_timer_status prev_status,
int local_is_locked, struct priv_timer *priv_timer)
{
unsigned lcore_id = rte_lcore_id();
unsigned prev_owner = prev_status.owner;
int i;
struct rte_timer *prev[MAX_SKIPLIST_DEPTH+1];
/* if timer needs is pending another core, we need to lock the
* list; if it is on local core, we need to lock if we are not
* called from rte_timer_manage() */
if (prev_owner != lcore_id || !local_is_locked)
rte_spinlock_lock(&priv_timer[prev_owner].list_lock);
/* save the lowest list entry into the expire field of the dummy hdr.
* NOTE: this is not atomic on 32-bit */
if (tim == priv_timer[prev_owner].pending_head.sl_next[0])
priv_timer[prev_owner].pending_head.expire =
((tim->sl_next[0] == NULL) ? 0 : tim->sl_next[0]->expire);
/* adjust pointers from previous entries to point past this */
timer_get_prev_entries_for_node(tim, prev_owner, prev, priv_timer);
for (i = priv_timer[prev_owner].curr_skiplist_depth - 1; i >= 0; i--) {
if (prev[i]->sl_next[i] == tim)
prev[i]->sl_next[i] = tim->sl_next[i];
}
/* in case we deleted last entry at a level, adjust down max level */
for (i = priv_timer[prev_owner].curr_skiplist_depth - 1; i >= 0; i--)
if (priv_timer[prev_owner].pending_head.sl_next[i] == NULL)
priv_timer[prev_owner].curr_skiplist_depth --;
else
break;
if (prev_owner != lcore_id || !local_is_locked)
rte_spinlock_unlock(&priv_timer[prev_owner].list_lock);
}
/* Reset and start the timer associated with the timer handle (private func) */
static int
__rte_timer_reset(struct rte_timer *tim, uint64_t expire,
uint64_t period, unsigned tim_lcore,
rte_timer_cb_t fct, void *arg,
int local_is_locked,
struct rte_timer_data *timer_data)
{
union rte_timer_status prev_status, status;
int ret;
unsigned lcore_id = rte_lcore_id();
struct priv_timer *priv_timer = timer_data->priv_timer;
/* round robin for tim_lcore */
if (tim_lcore == (unsigned)LCORE_ID_ANY) {
if (lcore_id < RTE_MAX_LCORE) {
/* EAL thread with valid lcore_id */
tim_lcore = rte_get_next_lcore(
priv_timer[lcore_id].prev_lcore,
0, 1);
priv_timer[lcore_id].prev_lcore = tim_lcore;
} else
/* non-EAL thread do not run rte_timer_manage(),
* so schedule the timer on the first enabled lcore. */
tim_lcore = rte_get_next_lcore(LCORE_ID_ANY, 0, 1);
}
/* wait that the timer is in correct status before update,
* and mark it as being configured */
ret = timer_set_config_state(tim, &prev_status, priv_timer);
if (ret < 0)
return -1;
__TIMER_STAT_ADD(priv_timer, reset, 1);
if (prev_status.state == RTE_TIMER_RUNNING &&
lcore_id < RTE_MAX_LCORE) {
priv_timer[lcore_id].updated = 1;
}
/* remove it from list */
if (prev_status.state == RTE_TIMER_PENDING) {
timer_del(tim, prev_status, local_is_locked, priv_timer);
__TIMER_STAT_ADD(priv_timer, pending, -1);
}
tim->period = period;
tim->expire = expire;
tim->f = fct;
tim->arg = arg;
/* if timer needs to be scheduled on another core, we need to
* lock the destination list; if it is on local core, we need to lock if
* we are not called from rte_timer_manage()
*/
if (tim_lcore != lcore_id || !local_is_locked)
rte_spinlock_lock(&priv_timer[tim_lcore].list_lock);
__TIMER_STAT_ADD(priv_timer, pending, 1);
timer_add(tim, tim_lcore, priv_timer);
/* update state: as we are in CONFIG state, only us can modify
* the state so we don't need to use cmpset() here */
rte_wmb();
status.state = RTE_TIMER_PENDING;
status.owner = (int16_t)tim_lcore;
tim->status.u32 = status.u32;
if (tim_lcore != lcore_id || !local_is_locked)
rte_spinlock_unlock(&priv_timer[tim_lcore].list_lock);
return 0;
}
/* Reset and start the timer associated with the timer handle tim */
int
rte_timer_reset_v20(struct rte_timer *tim, uint64_t ticks,
enum rte_timer_type type, unsigned int tim_lcore,
rte_timer_cb_t fct, void *arg)
{
uint64_t cur_time = rte_get_timer_cycles();
uint64_t period;
if (unlikely((tim_lcore != (unsigned)LCORE_ID_ANY) &&
!(rte_lcore_is_enabled(tim_lcore) ||
rte_lcore_has_role(tim_lcore, ROLE_SERVICE))))
return -1;
if (type == PERIODICAL)
period = ticks;
else
period = 0;
return __rte_timer_reset(tim, cur_time + ticks, period, tim_lcore,
fct, arg, 0, &default_timer_data);
}
VERSION_SYMBOL(rte_timer_reset, _v20, 2.0);
int
rte_timer_reset_v1905(struct rte_timer *tim, uint64_t ticks,
enum rte_timer_type type, unsigned int tim_lcore,
rte_timer_cb_t fct, void *arg)
{
return rte_timer_alt_reset(default_data_id, tim, ticks, type,
tim_lcore, fct, arg);
}
MAP_STATIC_SYMBOL(int rte_timer_reset(struct rte_timer *tim, uint64_t ticks,
enum rte_timer_type type,
unsigned int tim_lcore,
rte_timer_cb_t fct, void *arg),
rte_timer_reset_v1905);
BIND_DEFAULT_SYMBOL(rte_timer_reset, _v1905, 19.05);
int
rte_timer_alt_reset(uint32_t timer_data_id, struct rte_timer *tim,
uint64_t ticks, enum rte_timer_type type,
unsigned int tim_lcore, rte_timer_cb_t fct, void *arg)
{
uint64_t cur_time = rte_get_timer_cycles();
uint64_t period;
struct rte_timer_data *timer_data;
TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, timer_data, -EINVAL);
if (type == PERIODICAL)
period = ticks;
else
period = 0;
return __rte_timer_reset(tim, cur_time + ticks, period, tim_lcore,
fct, arg, 0, timer_data);
}
/* loop until rte_timer_reset() succeed */
void
rte_timer_reset_sync(struct rte_timer *tim, uint64_t ticks,
enum rte_timer_type type, unsigned tim_lcore,
rte_timer_cb_t fct, void *arg)
{
while (rte_timer_reset(tim, ticks, type, tim_lcore,
fct, arg) != 0)
rte_pause();
}
static int
__rte_timer_stop(struct rte_timer *tim, int local_is_locked,
struct rte_timer_data *timer_data)
{
union rte_timer_status prev_status, status;
unsigned lcore_id = rte_lcore_id();
int ret;
struct priv_timer *priv_timer = timer_data->priv_timer;
/* wait that the timer is in correct status before update,
* and mark it as being configured */
ret = timer_set_config_state(tim, &prev_status, priv_timer);
if (ret < 0)
return -1;
__TIMER_STAT_ADD(priv_timer, stop, 1);
if (prev_status.state == RTE_TIMER_RUNNING &&
lcore_id < RTE_MAX_LCORE) {
priv_timer[lcore_id].updated = 1;
}
/* remove it from list */
if (prev_status.state == RTE_TIMER_PENDING) {
timer_del(tim, prev_status, local_is_locked, priv_timer);
__TIMER_STAT_ADD(priv_timer, pending, -1);
}
/* mark timer as stopped */
rte_wmb();
status.state = RTE_TIMER_STOP;
status.owner = RTE_TIMER_NO_OWNER;
tim->status.u32 = status.u32;
return 0;
}
/* Stop the timer associated with the timer handle tim */
int
rte_timer_stop_v20(struct rte_timer *tim)
{
return __rte_timer_stop(tim, 0, &default_timer_data);
}
VERSION_SYMBOL(rte_timer_stop, _v20, 2.0);
int
rte_timer_stop_v1905(struct rte_timer *tim)
{
return rte_timer_alt_stop(default_data_id, tim);
}
MAP_STATIC_SYMBOL(int rte_timer_stop(struct rte_timer *tim),
rte_timer_stop_v1905);
BIND_DEFAULT_SYMBOL(rte_timer_stop, _v1905, 19.05);
int
rte_timer_alt_stop(uint32_t timer_data_id, struct rte_timer *tim)
{
struct rte_timer_data *timer_data;
TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, timer_data, -EINVAL);
return __rte_timer_stop(tim, 0, timer_data);
}
/* loop until rte_timer_stop() succeed */
void
rte_timer_stop_sync(struct rte_timer *tim)
{
while (rte_timer_stop(tim) != 0)
rte_pause();
}
/* Test the PENDING status of the timer handle tim */
int
rte_timer_pending(struct rte_timer *tim)
{
return tim->status.state == RTE_TIMER_PENDING;
}
/* must be called periodically, run all timer that expired */
static void
__rte_timer_manage(struct rte_timer_data *timer_data)
{
union rte_timer_status status;
struct rte_timer *tim, *next_tim;
struct rte_timer *run_first_tim, **pprev;
unsigned lcore_id = rte_lcore_id();
struct rte_timer *prev[MAX_SKIPLIST_DEPTH + 1];
uint64_t cur_time;
int i, ret;
struct priv_timer *priv_timer = timer_data->priv_timer;
/* timer manager only runs on EAL thread with valid lcore_id */
assert(lcore_id < RTE_MAX_LCORE);
__TIMER_STAT_ADD(priv_timer, manage, 1);
/* optimize for the case where per-cpu list is empty */
if (priv_timer[lcore_id].pending_head.sl_next[0] == NULL)
return;
cur_time = rte_get_timer_cycles();
#ifdef RTE_ARCH_64
/* on 64-bit the value cached in the pending_head.expired will be
* updated atomically, so we can consult that for a quick check here
* outside the lock */
if (likely(priv_timer[lcore_id].pending_head.expire > cur_time))
return;
#endif
/* browse ordered list, add expired timers in 'expired' list */
rte_spinlock_lock(&priv_timer[lcore_id].list_lock);
/* if nothing to do just unlock and return */
if (priv_timer[lcore_id].pending_head.sl_next[0] == NULL ||
priv_timer[lcore_id].pending_head.sl_next[0]->expire > cur_time) {
rte_spinlock_unlock(&priv_timer[lcore_id].list_lock);
return;
}
/* save start of list of expired timers */
tim = priv_timer[lcore_id].pending_head.sl_next[0];
/* break the existing list at current time point */
timer_get_prev_entries(cur_time, lcore_id, prev, priv_timer);
for (i = priv_timer[lcore_id].curr_skiplist_depth -1; i >= 0; i--) {
if (prev[i] == &priv_timer[lcore_id].pending_head)
continue;
priv_timer[lcore_id].pending_head.sl_next[i] =
prev[i]->sl_next[i];
if (prev[i]->sl_next[i] == NULL)
priv_timer[lcore_id].curr_skiplist_depth--;
prev[i] ->sl_next[i] = NULL;
}
/* transition run-list from PENDING to RUNNING */
run_first_tim = tim;
pprev = &run_first_tim;
for ( ; tim != NULL; tim = next_tim) {
next_tim = tim->sl_next[0];
ret = timer_set_running_state(tim);
if (likely(ret == 0)) {
pprev = &tim->sl_next[0];
} else {
/* another core is trying to re-config this one,
* remove it from local expired list
*/
*pprev = next_tim;
}
}
/* update the next to expire timer value */
priv_timer[lcore_id].pending_head.expire =
(priv_timer[lcore_id].pending_head.sl_next[0] == NULL) ? 0 :
priv_timer[lcore_id].pending_head.sl_next[0]->expire;
rte_spinlock_unlock(&priv_timer[lcore_id].list_lock);
/* now scan expired list and call callbacks */
for (tim = run_first_tim; tim != NULL; tim = next_tim) {
next_tim = tim->sl_next[0];
priv_timer[lcore_id].updated = 0;
priv_timer[lcore_id].running_tim = tim;
/* execute callback function with list unlocked */
tim->f(tim, tim->arg);
__TIMER_STAT_ADD(priv_timer, pending, -1);
/* the timer was stopped or reloaded by the callback
* function, we have nothing to do here */
if (priv_timer[lcore_id].updated == 1)
continue;
if (tim->period == 0) {
/* remove from done list and mark timer as stopped */
status.state = RTE_TIMER_STOP;
status.owner = RTE_TIMER_NO_OWNER;
rte_wmb();
tim->status.u32 = status.u32;
}
else {
/* keep it in list and mark timer as pending */
rte_spinlock_lock(&priv_timer[lcore_id].list_lock);
status.state = RTE_TIMER_PENDING;
__TIMER_STAT_ADD(priv_timer, pending, 1);
status.owner = (int16_t)lcore_id;
rte_wmb();
tim->status.u32 = status.u32;
__rte_timer_reset(tim, tim->expire + tim->period,
tim->period, lcore_id, tim->f, tim->arg, 1,
timer_data);
rte_spinlock_unlock(&priv_timer[lcore_id].list_lock);
}
}
priv_timer[lcore_id].running_tim = NULL;
}
void
rte_timer_manage_v20(void)
{
__rte_timer_manage(&default_timer_data);
}
VERSION_SYMBOL(rte_timer_manage, _v20, 2.0);
int
rte_timer_manage_v1905(void)
{
struct rte_timer_data *timer_data;
TIMER_DATA_VALID_GET_OR_ERR_RET(default_data_id, timer_data, -EINVAL);
__rte_timer_manage(timer_data);
return 0;
}
MAP_STATIC_SYMBOL(int rte_timer_manage(void), rte_timer_manage_v1905);
BIND_DEFAULT_SYMBOL(rte_timer_manage, _v1905, 19.05);
int
rte_timer_alt_manage(uint32_t timer_data_id,
unsigned int *poll_lcores,
int nb_poll_lcores,
rte_timer_alt_manage_cb_t f)
{
unsigned int default_poll_lcores[] = {rte_lcore_id()};
union rte_timer_status status;
struct rte_timer *tim, *next_tim, **pprev;
struct rte_timer *run_first_tims[RTE_MAX_LCORE];
unsigned int this_lcore = rte_lcore_id();
struct rte_timer *prev[MAX_SKIPLIST_DEPTH + 1];
uint64_t cur_time;
int i, j, ret;
int nb_runlists = 0;
struct rte_timer_data *data;
struct priv_timer *privp;
uint32_t poll_lcore;
TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, data, -EINVAL);
/* timer manager only runs on EAL thread with valid lcore_id */
assert(this_lcore < RTE_MAX_LCORE);
__TIMER_STAT_ADD(data->priv_timer, manage, 1);
if (poll_lcores == NULL) {
poll_lcores = default_poll_lcores;
nb_poll_lcores = RTE_DIM(default_poll_lcores);
}
for (i = 0; i < nb_poll_lcores; i++) {
poll_lcore = poll_lcores[i];
privp = &data->priv_timer[poll_lcore];
/* optimize for the case where per-cpu list is empty */
if (privp->pending_head.sl_next[0] == NULL)
continue;
cur_time = rte_get_timer_cycles();
#ifdef RTE_ARCH_64
/* on 64-bit the value cached in the pending_head.expired will
* be updated atomically, so we can consult that for a quick
* check here outside the lock
*/
if (likely(privp->pending_head.expire > cur_time))
continue;
#endif
/* browse ordered list, add expired timers in 'expired' list */
rte_spinlock_lock(&privp->list_lock);
/* if nothing to do just unlock and return */
if (privp->pending_head.sl_next[0] == NULL ||
privp->pending_head.sl_next[0]->expire > cur_time) {
rte_spinlock_unlock(&privp->list_lock);
continue;
}
/* save start of list of expired timers */
tim = privp->pending_head.sl_next[0];
/* break the existing list at current time point */
timer_get_prev_entries(cur_time, poll_lcore, prev,
data->priv_timer);
for (j = privp->curr_skiplist_depth - 1; j >= 0; j--) {
if (prev[j] == &privp->pending_head)
continue;
privp->pending_head.sl_next[j] =
prev[j]->sl_next[j];
if (prev[j]->sl_next[j] == NULL)
privp->curr_skiplist_depth--;
prev[j]->sl_next[j] = NULL;
}
/* transition run-list from PENDING to RUNNING */
run_first_tims[nb_runlists] = tim;
pprev = &run_first_tims[nb_runlists];
nb_runlists++;
for ( ; tim != NULL; tim = next_tim) {
next_tim = tim->sl_next[0];
ret = timer_set_running_state(tim);
if (likely(ret == 0)) {
pprev = &tim->sl_next[0];
} else {
/* another core is trying to re-config this one,
* remove it from local expired list
*/
*pprev = next_tim;
}
}
/* update the next to expire timer value */
privp->pending_head.expire =
(privp->pending_head.sl_next[0] == NULL) ? 0 :
privp->pending_head.sl_next[0]->expire;
rte_spinlock_unlock(&privp->list_lock);
}
/* Now process the run lists */
while (1) {
bool done = true;
uint64_t min_expire = UINT64_MAX;
int min_idx = 0;
/* Find the next oldest timer to process */
for (i = 0; i < nb_runlists; i++) {
tim = run_first_tims[i];
if (tim != NULL && tim->expire < min_expire) {
min_expire = tim->expire;
min_idx = i;
done = false;
}
}
if (done)
break;
tim = run_first_tims[min_idx];
/* Move down the runlist from which we picked a timer to
* execute
*/
run_first_tims[min_idx] = run_first_tims[min_idx]->sl_next[0];
data->priv_timer[this_lcore].updated = 0;
data->priv_timer[this_lcore].running_tim = tim;
/* Call the provided callback function */
f(tim);
__TIMER_STAT_ADD(data->priv_timer, pending, -1);
/* the timer was stopped or reloaded by the callback
* function, we have nothing to do here
*/
if (data->priv_timer[this_lcore].updated == 1)
continue;
if (tim->period == 0) {
/* remove from done list and mark timer as stopped */
status.state = RTE_TIMER_STOP;
status.owner = RTE_TIMER_NO_OWNER;
rte_wmb();
tim->status.u32 = status.u32;
} else {
/* keep it in list and mark timer as pending */
rte_spinlock_lock(
&data->priv_timer[this_lcore].list_lock);
status.state = RTE_TIMER_PENDING;
__TIMER_STAT_ADD(data->priv_timer, pending, 1);
status.owner = (int16_t)this_lcore;
rte_wmb();
tim->status.u32 = status.u32;
__rte_timer_reset(tim, tim->expire + tim->period,
tim->period, this_lcore, tim->f, tim->arg, 1,
data);
rte_spinlock_unlock(
&data->priv_timer[this_lcore].list_lock);
}
data->priv_timer[this_lcore].running_tim = NULL;
}
return 0;
}
/* Walk pending lists, stopping timers and calling user-specified function */
int
rte_timer_stop_all(uint32_t timer_data_id, unsigned int *walk_lcores,
int nb_walk_lcores,
rte_timer_stop_all_cb_t f, void *f_arg)
{
int i;
struct priv_timer *priv_timer;
uint32_t walk_lcore;
struct rte_timer *tim, *next_tim;
struct rte_timer_data *timer_data;
TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, timer_data, -EINVAL);
for (i = 0; i < nb_walk_lcores; i++) {
walk_lcore = walk_lcores[i];
priv_timer = &timer_data->priv_timer[walk_lcore];
rte_spinlock_lock(&priv_timer->list_lock);
for (tim = priv_timer->pending_head.sl_next[0];
tim != NULL;
tim = next_tim) {
next_tim = tim->sl_next[0];
/* Call timer_stop with lock held */
__rte_timer_stop(tim, 1, timer_data);
if (f)
f(tim, f_arg);
}
rte_spinlock_unlock(&priv_timer->list_lock);
}
return 0;
}
/* dump statistics about timers */
static void
__rte_timer_dump_stats(struct rte_timer_data *timer_data __rte_unused, FILE *f)
{
#ifdef RTE_LIBRTE_TIMER_DEBUG
struct rte_timer_debug_stats sum;
unsigned lcore_id;
struct priv_timer *priv_timer = timer_data->priv_timer;
memset(&sum, 0, sizeof(sum));
for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
sum.reset += priv_timer[lcore_id].stats.reset;
sum.stop += priv_timer[lcore_id].stats.stop;
sum.manage += priv_timer[lcore_id].stats.manage;
sum.pending += priv_timer[lcore_id].stats.pending;
}
fprintf(f, "Timer statistics:\n");
fprintf(f, " reset = %"PRIu64"\n", sum.reset);
fprintf(f, " stop = %"PRIu64"\n", sum.stop);
fprintf(f, " manage = %"PRIu64"\n", sum.manage);
fprintf(f, " pending = %"PRIu64"\n", sum.pending);
#else
fprintf(f, "No timer statistics, RTE_LIBRTE_TIMER_DEBUG is disabled\n");
#endif
}
void
rte_timer_dump_stats_v20(FILE *f)
{
__rte_timer_dump_stats(&default_timer_data, f);
}
VERSION_SYMBOL(rte_timer_dump_stats, _v20, 2.0);
int
rte_timer_dump_stats_v1905(FILE *f)
{
return rte_timer_alt_dump_stats(default_data_id, f);
}
MAP_STATIC_SYMBOL(int rte_timer_dump_stats(FILE *f),
rte_timer_dump_stats_v1905);
BIND_DEFAULT_SYMBOL(rte_timer_dump_stats, _v1905, 19.05);
int
rte_timer_alt_dump_stats(uint32_t timer_data_id __rte_unused, FILE *f)
{
struct rte_timer_data *timer_data;
TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, timer_data, -EINVAL);
__rte_timer_dump_stats(timer_data, f);
return 0;
}