numam-spdk/lib/event/reactor.c

499 lines
12 KiB
C
Raw Normal View History

/*-
* BSD LICENSE
*
* Copyright (c) Intel Corporation.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "spdk/stdinc.h"
#include "spdk/likely.h"
#include "spdk_internal/event.h"
#include "spdk_internal/log.h"
#include "spdk_internal/thread.h"
#include "spdk/log.h"
#include "spdk/thread.h"
#include "spdk/env.h"
#include "spdk/util.h"
#define SPDK_EVENT_BATCH_SIZE 8
enum spdk_reactor_state {
SPDK_REACTOR_STATE_INVALID = 0,
SPDK_REACTOR_STATE_INITIALIZED = 1,
SPDK_REACTOR_STATE_RUNNING = 2,
SPDK_REACTOR_STATE_EXITING = 3,
SPDK_REACTOR_STATE_SHUTDOWN = 4,
};
struct spdk_reactor {
/* Logical core number for this reactor. */
uint32_t lcore;
/* Poller for get the rusage for the reactor. */
struct spdk_poller *rusage_poller;
/* Reactor tsc stats */
struct spdk_reactor_tsc_stats tsc_stats;
uint64_t tsc_last;
/* The last known rusage values */
struct rusage rusage;
struct spdk_ring *events;
uint64_t max_delay_us;
} __attribute__((aligned(64)));
static struct spdk_reactor *g_reactors;
static enum spdk_reactor_state g_reactor_state = SPDK_REACTOR_STATE_INVALID;
static bool g_context_switch_monitor_enabled = true;
static void spdk_reactor_construct(struct spdk_reactor *w, uint32_t lcore,
uint64_t max_delay_us);
static struct spdk_mempool *g_spdk_event_mempool = NULL;
static struct spdk_cpuset *g_spdk_app_core_mask;
static struct spdk_reactor *
spdk_reactor_get(uint32_t lcore)
{
struct spdk_reactor *reactor;
reactor = spdk_likely(g_reactors) ? &g_reactors[lcore] : NULL;
return reactor;
}
struct spdk_event *
spdk_event_allocate(uint32_t lcore, spdk_event_fn fn, void *arg1, void *arg2)
{
struct spdk_event *event = NULL;
struct spdk_reactor *reactor = spdk_reactor_get(lcore);
if (!reactor) {
assert(false);
return NULL;
}
event = spdk_mempool_get(g_spdk_event_mempool);
if (event == NULL) {
assert(false);
return NULL;
}
event->lcore = lcore;
event->fn = fn;
event->arg1 = arg1;
event->arg2 = arg2;
return event;
}
void
spdk_event_call(struct spdk_event *event)
{
int rc;
struct spdk_reactor *reactor;
reactor = spdk_reactor_get(event->lcore);
assert(reactor->events != NULL);
rc = spdk_ring_enqueue(reactor->events, (void **)&event, 1);
if (rc != 1) {
assert(false);
}
}
static inline uint32_t
_spdk_event_queue_run_batch(struct spdk_reactor *reactor, struct spdk_thread *thread)
{
unsigned count, i;
void *events[SPDK_EVENT_BATCH_SIZE];
#ifdef DEBUG
/*
* spdk_ring_dequeue() fills events and returns how many entries it wrote,
* so we will never actually read uninitialized data from events, but just to be sure
* (and to silence a static analyzer false positive), initialize the array to NULL pointers.
*/
memset(events, 0, sizeof(events));
#endif
count = spdk_ring_dequeue(reactor->events, events, SPDK_EVENT_BATCH_SIZE);
if (count == 0) {
return 0;
}
spdk_set_thread(thread);
for (i = 0; i < count; i++) {
struct spdk_event *event = events[i];
assert(event != NULL);
event->fn(event->arg1, event->arg2);
}
spdk_set_thread(NULL);
spdk_mempool_put_bulk(g_spdk_event_mempool, events, count);
return count;
}
static int
get_rusage(void *arg)
{
struct spdk_reactor *reactor = arg;
struct rusage rusage;
if (getrusage(RUSAGE_THREAD, &rusage) != 0) {
return -1;
}
if (rusage.ru_nvcsw != reactor->rusage.ru_nvcsw || rusage.ru_nivcsw != reactor->rusage.ru_nivcsw) {
SPDK_INFOLOG(SPDK_LOG_REACTOR,
"Reactor %d: %ld voluntary context switches and %ld involuntary context switches in the last second.\n",
reactor->lcore, rusage.ru_nvcsw - reactor->rusage.ru_nvcsw,
rusage.ru_nivcsw - reactor->rusage.ru_nivcsw);
}
reactor->rusage = rusage;
return -1;
}
static void
_spdk_reactor_context_switch_monitor_start(void *arg1, void *arg2)
{
struct spdk_reactor *reactor = arg1;
if (reactor->rusage_poller == NULL) {
getrusage(RUSAGE_THREAD, &reactor->rusage);
reactor->rusage_poller = spdk_poller_register(get_rusage, reactor, 1000000);
}
}
static void
_spdk_reactor_context_switch_monitor_stop(void *arg1, void *arg2)
{
struct spdk_reactor *reactor = arg1;
if (reactor->rusage_poller != NULL) {
spdk_poller_unregister(&reactor->rusage_poller);
}
}
void
spdk_reactor_enable_context_switch_monitor(bool enable)
{
struct spdk_reactor *reactor;
spdk_event_fn fn;
uint32_t core;
if (enable != g_context_switch_monitor_enabled) {
g_context_switch_monitor_enabled = enable;
if (enable) {
fn = _spdk_reactor_context_switch_monitor_start;
} else {
fn = _spdk_reactor_context_switch_monitor_stop;
}
SPDK_ENV_FOREACH_CORE(core) {
reactor = spdk_reactor_get(core);
spdk_event_call(spdk_event_allocate(core, fn, reactor, NULL));
}
}
}
bool
spdk_reactor_context_switch_monitor_enabled(void)
{
return g_context_switch_monitor_enabled;
}
static void
spdk_reactor_add_tsc_stats(void *arg, int rc, uint64_t now)
{
struct spdk_reactor *reactor = arg;
struct spdk_reactor_tsc_stats *tsc_stats = &reactor->tsc_stats;
if (rc == 0) {
/* Poller status idle */
tsc_stats->idle_tsc += now - reactor->tsc_last;
} else if (rc > 0) {
/* Poller status busy */
tsc_stats->busy_tsc += now - reactor->tsc_last;
} else {
/* Poller status unknown */
tsc_stats->unknown_tsc += now - reactor->tsc_last;
}
reactor->tsc_last = now;
}
int
spdk_reactor_get_tsc_stats(struct spdk_reactor_tsc_stats *tsc_stats, uint32_t core)
{
struct spdk_reactor *reactor;
if (!spdk_cpuset_get_cpu(g_spdk_app_core_mask, core)) {
return -1;
}
reactor = spdk_reactor_get(core);
if (!reactor) {
SPDK_ERRLOG("Unable to get reactor for core %u\n", core);
return -1;
}
*tsc_stats = reactor->tsc_stats;
return 0;
}
static int
_spdk_reactor_run(void *arg)
{
struct spdk_reactor *reactor = arg;
struct spdk_thread *thread;
uint32_t event_count;
uint64_t now;
uint64_t sleep_cycles;
uint32_t sleep_us;
int rc = -1;
char thread_name[32];
snprintf(thread_name, sizeof(thread_name), "reactor_%u", reactor->lcore);
thread = spdk_allocate_thread(thread_name);
if (!thread) {
return -1;
}
SPDK_NOTICELOG("Reactor started on core %u\n", reactor->lcore);
sleep_cycles = reactor->max_delay_us * spdk_get_ticks_hz() / SPDK_SEC_TO_USEC;
if (g_context_switch_monitor_enabled) {
spdk_set_thread(thread);
_spdk_reactor_context_switch_monitor_start(reactor, NULL);
spdk_set_thread(NULL);
}
now = spdk_get_ticks();
reactor->tsc_last = now;
while (1) {
bool took_action = false;
event_count = _spdk_event_queue_run_batch(reactor, thread);
if (event_count > 0) {
rc = 1;
now = spdk_get_ticks();
spdk_reactor_add_tsc_stats(reactor, rc, now);
took_action = true;
}
rc = spdk_thread_poll(thread, 0);
if (rc != 0) {
now = spdk_get_ticks();
spdk_reactor_add_tsc_stats(reactor, rc, now);
took_action = true;
}
/* Determine if the thread can sleep */
if (sleep_cycles && !took_action) {
uint64_t next_run_tick;
now = spdk_get_ticks();
sleep_us = reactor->max_delay_us;
next_run_tick = spdk_thread_next_poller_expiration(thread);
/* There are timers registered, so don't sleep beyond
* when the next timer should fire */
if (next_run_tick > 0 && next_run_tick < (now + sleep_cycles)) {
if (next_run_tick <= now) {
sleep_us = 0;
} else {
sleep_us = ((next_run_tick - now) *
SPDK_SEC_TO_USEC) / spdk_get_ticks_hz();
}
}
if (sleep_us > 0) {
usleep(sleep_us);
}
}
if (g_reactor_state != SPDK_REACTOR_STATE_RUNNING) {
break;
}
}
spdk_set_thread(thread);
_spdk_reactor_context_switch_monitor_stop(reactor, NULL);
spdk_free_thread(thread);
return 0;
}
static void
spdk_reactor_construct(struct spdk_reactor *reactor, uint32_t lcore, uint64_t max_delay_us)
{
reactor->lcore = lcore;
reactor->max_delay_us = max_delay_us;
reactor->events = spdk_ring_create(SPDK_RING_TYPE_MP_SC, 65536, SPDK_ENV_SOCKET_ID_ANY);
assert(reactor->events != NULL);
}
int
spdk_app_parse_core_mask(const char *mask, struct spdk_cpuset *cpumask)
{
int ret;
struct spdk_cpuset *validmask;
ret = spdk_cpuset_parse(cpumask, mask);
if (ret < 0) {
return ret;
}
validmask = spdk_app_get_core_mask();
spdk_cpuset_and(cpumask, validmask);
return 0;
}
struct spdk_cpuset *
spdk_app_get_core_mask(void)
{
return g_spdk_app_core_mask;
}
void
spdk_reactors_start(void)
{
struct spdk_reactor *reactor;
uint32_t i, current_core;
int rc;
g_reactor_state = SPDK_REACTOR_STATE_RUNNING;
g_spdk_app_core_mask = spdk_cpuset_alloc();
current_core = spdk_env_get_current_core();
SPDK_ENV_FOREACH_CORE(i) {
if (i != current_core) {
reactor = spdk_reactor_get(i);
rc = spdk_env_thread_launch_pinned(reactor->lcore, _spdk_reactor_run, reactor);
if (rc < 0) {
SPDK_ERRLOG("Unable to start reactor thread on core %u\n", reactor->lcore);
assert(false);
return;
}
}
spdk_cpuset_set_cpu(g_spdk_app_core_mask, i, true);
}
/* Start the master reactor */
reactor = spdk_reactor_get(current_core);
_spdk_reactor_run(reactor);
spdk_env_thread_wait_all();
g_reactor_state = SPDK_REACTOR_STATE_SHUTDOWN;
spdk_cpuset_free(g_spdk_app_core_mask);
g_spdk_app_core_mask = NULL;
}
void
spdk_reactors_stop(void *arg1, void *arg2)
{
g_reactor_state = SPDK_REACTOR_STATE_EXITING;
}
int
spdk_reactors_init(unsigned int max_delay_us)
{
int rc;
uint32_t i, last_core;
struct spdk_reactor *reactor;
char mempool_name[32];
snprintf(mempool_name, sizeof(mempool_name), "evtpool_%d", getpid());
g_spdk_event_mempool = spdk_mempool_create(mempool_name,
262144 - 1, /* Power of 2 minus 1 is optimal for memory consumption */
sizeof(struct spdk_event),
SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
SPDK_ENV_SOCKET_ID_ANY);
if (g_spdk_event_mempool == NULL) {
SPDK_ERRLOG("spdk_event_mempool creation failed\n");
return -1;
}
/* struct spdk_reactor must be aligned on 64 byte boundary */
last_core = spdk_env_get_last_core();
rc = posix_memalign((void **)&g_reactors, 64,
(last_core + 1) * sizeof(struct spdk_reactor));
if (rc != 0) {
SPDK_ERRLOG("Could not allocate array size=%u for g_reactors\n",
last_core + 1);
spdk_mempool_free(g_spdk_event_mempool);
return -1;
}
memset(g_reactors, 0, (last_core + 1) * sizeof(struct spdk_reactor));
SPDK_ENV_FOREACH_CORE(i) {
reactor = spdk_reactor_get(i);
spdk_reactor_construct(reactor, i, max_delay_us);
}
g_reactor_state = SPDK_REACTOR_STATE_INITIALIZED;
return 0;
}
void
spdk_reactors_fini(void)
{
2017-02-24 16:27:42 -07:00
uint32_t i;
struct spdk_reactor *reactor;
SPDK_ENV_FOREACH_CORE(i) {
reactor = spdk_reactor_get(i);
if (spdk_likely(reactor != NULL) && reactor->events != NULL) {
spdk_ring_free(reactor->events);
2017-02-24 16:27:42 -07:00
}
}
spdk_mempool_free(g_spdk_event_mempool);
free(g_reactors);
g_reactors = NULL;
}
SPDK_LOG_REGISTER_COMPONENT("reactor", SPDK_LOG_REACTOR)