numam-spdk/lib/event/reactor.c
Daniel Verkamp 2b0b705fb1 reactor: make sure socket_count isn't zero
Avoid division by zero in the event mempool cache size calculation.

Change-Id: Ic117ef2dc3a798fb0a57572f1178233e83e73849
Signed-off-by: Daniel Verkamp <daniel.verkamp@intel.com>
2017-03-24 10:52:28 -07:00

824 lines
20 KiB
C

/*-
* BSD LICENSE
*
* Copyright (c) Intel Corporation.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "spdk_internal/event.h"
#include <assert.h>
#include <stdio.h>
#include <stdbool.h>
#include <string.h>
#include <unistd.h>
#ifdef __linux__
#include <sys/prctl.h>
#endif
#ifdef __FreeBSD__
#include <pthread_np.h>
#endif
#include <rte_config.h>
#include <rte_ring.h>
#include "spdk/log.h"
#include "spdk/io_channel.h"
#include "spdk/env.h"
#define SPDK_MAX_SOCKET 64
#define SPDK_REACTOR_SPIN_TIME_US 1000
#define SPDK_TIMER_POLL_ITERATIONS 5
#define SPDK_EVENT_BATCH_SIZE 8
enum spdk_poller_state {
/* The poller is registered with a reactor but not currently executing its fn. */
SPDK_POLLER_STATE_WAITING,
/* The poller is currently running its fn. */
SPDK_POLLER_STATE_RUNNING,
/* The poller was unregistered during the execution of its fn. */
SPDK_POLLER_STATE_UNREGISTERED,
};
struct spdk_poller {
TAILQ_ENTRY(spdk_poller) tailq;
uint32_t lcore;
/* Current state of the poller; should only be accessed from the poller's thread. */
enum spdk_poller_state state;
uint64_t period_ticks;
uint64_t next_run_tick;
spdk_poller_fn fn;
void *arg;
struct spdk_event *unregister_complete_event;
};
enum spdk_reactor_state {
SPDK_REACTOR_STATE_INVALID = 0,
SPDK_REACTOR_STATE_INITIALIZED = 1,
SPDK_REACTOR_STATE_RUNNING = 2,
SPDK_REACTOR_STATE_EXITING = 3,
SPDK_REACTOR_STATE_SHUTDOWN = 4,
};
struct spdk_reactor {
/* Logical core number for this reactor. */
uint32_t lcore;
/* Socket ID for this reactor. */
uint32_t socket_id;
/*
* Contains pollers actively running on this reactor. Pollers
* are run round-robin. The reactor takes one poller from the head
* of the ring, executes it, then puts it back at the tail of
* the ring.
*/
TAILQ_HEAD(, spdk_poller) active_pollers;
/**
* Contains pollers running on this reactor with a periodic timer.
*/
TAILQ_HEAD(timer_pollers_head, spdk_poller) timer_pollers;
struct rte_ring *events;
/* Pointer to the per-socket g_spdk_event_mempool for this reactor. */
struct spdk_mempool *event_mempool;
uint64_t max_delay_us;
} __attribute__((aligned(64)));
static struct spdk_reactor g_reactors[RTE_MAX_LCORE];
static uint64_t g_reactor_mask = 0;
static int g_reactor_count = 0;
static enum spdk_reactor_state g_reactor_state = SPDK_REACTOR_STATE_INVALID;
static void spdk_reactor_construct(struct spdk_reactor *w, uint32_t lcore,
uint64_t max_delay_us);
static struct spdk_mempool *g_spdk_event_mempool[SPDK_MAX_SOCKET];
/** \file
*/
static struct spdk_reactor *
spdk_reactor_get(uint32_t lcore)
{
struct spdk_reactor *reactor;
reactor = &g_reactors[lcore];
return reactor;
}
struct spdk_event *
spdk_event_allocate(uint32_t lcore, spdk_event_fn fn, void *arg1, void *arg2)
{
struct spdk_event *event = NULL;
struct spdk_reactor *reactor = spdk_reactor_get(lcore);
event = spdk_mempool_get(reactor->event_mempool);
if (event == NULL) {
assert(false);
return NULL;
}
event->lcore = lcore;
event->fn = fn;
event->arg1 = arg1;
event->arg2 = arg2;
return event;
}
void
spdk_event_call(struct spdk_event *event)
{
int rc;
struct spdk_reactor *reactor;
reactor = spdk_reactor_get(event->lcore);
assert(reactor->events != NULL);
rc = rte_ring_mp_enqueue(reactor->events, event);
if (rc != 0) {
assert(false);
}
}
static inline uint32_t
_spdk_event_queue_run_batch(struct spdk_reactor *reactor)
{
unsigned count, i;
void *events[SPDK_EVENT_BATCH_SIZE];
#ifdef DEBUG
/*
* rte_ring_dequeue_burst() fills events and returns how many entries it wrote,
* so we will never actually read uninitialized data from events, but just to be sure
* (and to silence a static analyzer false positive), initialize the array to NULL pointers.
*/
memset(events, 0, sizeof(events));
#endif
count = rte_ring_sc_dequeue_burst(reactor->events, events, SPDK_EVENT_BATCH_SIZE);
if (count == 0) {
return 0;
}
for (i = 0; i < count; i++) {
struct spdk_event *event = events[i];
assert(event != NULL);
event->fn(event->arg1, event->arg2);
}
spdk_mempool_put_bulk(reactor->event_mempool, events, count);
return count;
}
uint32_t
spdk_event_queue_run_batch(uint32_t lcore)
{
return _spdk_event_queue_run_batch(spdk_reactor_get(lcore));
}
/**
\brief Set current reactor thread name to "reactor <cpu #>".
This makes the reactor threads distinguishable in top and gdb.
*/
static void set_reactor_thread_name(uint32_t lcore)
{
char thread_name[16];
snprintf(thread_name, sizeof(thread_name), "reactor_%u", lcore);
#if defined(__linux__)
prctl(PR_SET_NAME, thread_name, 0, 0, 0);
#elif defined(__FreeBSD__)
pthread_set_name_np(pthread_self(), thread_name);
#else
#error missing platform support for thread name
#endif
}
static void
spdk_poller_insert_timer(struct spdk_reactor *reactor, struct spdk_poller *poller, uint64_t now)
{
struct spdk_poller *iter;
uint64_t next_run_tick;
next_run_tick = now + poller->period_ticks;
poller->next_run_tick = next_run_tick;
/*
* Insert poller in the reactor's timer_pollers list in sorted order by next scheduled
* run time.
*/
TAILQ_FOREACH_REVERSE(iter, &reactor->timer_pollers, timer_pollers_head, tailq) {
if (iter->next_run_tick <= next_run_tick) {
TAILQ_INSERT_AFTER(&reactor->timer_pollers, iter, poller, tailq);
return;
}
}
/* No earlier pollers were found, so this poller must be the new head */
TAILQ_INSERT_HEAD(&reactor->timer_pollers, poller, tailq);
}
static void
_spdk_poller_unregister_complete(struct spdk_poller *poller)
{
if (poller->unregister_complete_event) {
spdk_event_call(poller->unregister_complete_event);
}
free(poller);
}
/**
\brief This is the main function of the reactor thread.
\code
while (1)
if (events to run)
dequeue and run a batch of events
if (active pollers)
run the first poller in the list and move it to the back
if (first timer poller has expired)
run the first timer poller and reinsert it in the timer list
if (idle for at least SPDK_REACTOR_SPIN_TIME_US)
sleep until next timer poller is scheduled to expire
\endcode
*/
static int
_spdk_reactor_run(void *arg)
{
struct spdk_reactor *reactor = arg;
struct spdk_poller *poller;
uint32_t event_count;
uint64_t idle_started, now;
uint64_t spin_cycles, sleep_cycles;
uint32_t sleep_us;
uint32_t timer_poll_count;
spdk_allocate_thread();
set_reactor_thread_name(reactor->lcore);
SPDK_NOTICELOG("Reactor started on core %u on socket %u\n", reactor->lcore,
reactor->socket_id);
spin_cycles = SPDK_REACTOR_SPIN_TIME_US * spdk_get_ticks_hz() / 1000000ULL;
sleep_cycles = reactor->max_delay_us * spdk_get_ticks_hz() / 1000000ULL;
idle_started = 0;
timer_poll_count = 0;
while (1) {
bool took_action = false;
event_count = _spdk_event_queue_run_batch(reactor);
if (event_count > 0) {
took_action = true;
}
poller = TAILQ_FIRST(&reactor->active_pollers);
if (poller) {
TAILQ_REMOVE(&reactor->active_pollers, poller, tailq);
poller->state = SPDK_POLLER_STATE_RUNNING;
poller->fn(poller->arg);
if (poller->state == SPDK_POLLER_STATE_UNREGISTERED) {
_spdk_poller_unregister_complete(poller);
} else {
poller->state = SPDK_POLLER_STATE_WAITING;
TAILQ_INSERT_TAIL(&reactor->active_pollers, poller, tailq);
}
took_action = true;
}
if (timer_poll_count >= SPDK_TIMER_POLL_ITERATIONS) {
poller = TAILQ_FIRST(&reactor->timer_pollers);
if (poller) {
now = spdk_get_ticks();
if (now >= poller->next_run_tick) {
TAILQ_REMOVE(&reactor->timer_pollers, poller, tailq);
poller->state = SPDK_POLLER_STATE_RUNNING;
poller->fn(poller->arg);
if (poller->state == SPDK_POLLER_STATE_UNREGISTERED) {
_spdk_poller_unregister_complete(poller);
} else {
poller->state = SPDK_POLLER_STATE_WAITING;
spdk_poller_insert_timer(reactor, poller, now);
}
took_action = true;
}
}
timer_poll_count = 0;
} else {
timer_poll_count++;
}
if (took_action) {
/* We were busy this loop iteration. Reset the idle timer. */
idle_started = 0;
} else if (idle_started == 0) {
/* We were previously busy, but this loop we took no actions. */
idle_started = spdk_get_ticks();
}
/* Determine if the thread can sleep */
if (sleep_cycles && idle_started) {
now = spdk_get_ticks();
if (now >= (idle_started + spin_cycles)) {
sleep_us = reactor->max_delay_us;
poller = TAILQ_FIRST(&reactor->timer_pollers);
if (poller) {
/* There are timers registered, so don't sleep beyond
* when the next timer should fire */
if (poller->next_run_tick < (now + sleep_cycles)) {
if (poller->next_run_tick <= now) {
sleep_us = 0;
} else {
sleep_us = ((poller->next_run_tick - now) * 1000000ULL) / spdk_get_ticks_hz();
}
}
}
if (sleep_us > 0) {
usleep(sleep_us);
}
/* After sleeping, always poll for timers */
timer_poll_count = SPDK_TIMER_POLL_ITERATIONS;
}
}
if (g_reactor_state != SPDK_REACTOR_STATE_RUNNING) {
break;
}
}
spdk_free_thread();
return 0;
}
static void
spdk_reactor_construct(struct spdk_reactor *reactor, uint32_t lcore, uint64_t max_delay_us)
{
char ring_name[64];
reactor->lcore = lcore;
reactor->socket_id = rte_lcore_to_socket_id(lcore);
assert(reactor->socket_id < SPDK_MAX_SOCKET);
reactor->max_delay_us = max_delay_us;
TAILQ_INIT(&reactor->active_pollers);
TAILQ_INIT(&reactor->timer_pollers);
snprintf(ring_name, sizeof(ring_name) - 1, "spdk_event_queue_%u", lcore);
reactor->events =
rte_ring_create(ring_name, 65536, reactor->socket_id, RING_F_SC_DEQ);
assert(reactor->events != NULL);
reactor->event_mempool = g_spdk_event_mempool[reactor->socket_id];
}
static void
spdk_reactor_start(struct spdk_reactor *reactor)
{
if (reactor->lcore != rte_get_master_lcore()) {
switch (rte_eal_get_lcore_state(reactor->lcore)) {
case FINISHED:
rte_eal_wait_lcore(reactor->lcore);
/* drop through */
case WAIT:
rte_eal_remote_launch(_spdk_reactor_run, (void *)reactor, reactor->lcore);
break;
case RUNNING:
printf("Something already running on lcore %d\n", reactor->lcore);
break;
}
} else {
_spdk_reactor_run(reactor);
}
}
int
spdk_app_get_core_count(void)
{
return g_reactor_count;
}
uint32_t
spdk_app_get_current_core(void)
{
return rte_lcore_id();
}
int
spdk_app_parse_core_mask(const char *mask, uint64_t *cpumask)
{
unsigned int i;
char *end;
if (mask == NULL || cpumask == NULL) {
return -1;
}
errno = 0;
*cpumask = strtoull(mask, &end, 16);
if (*end != '\0' || errno) {
return -1;
}
for (i = 0; i < RTE_MAX_LCORE && i < 64; i++) {
if ((*cpumask & (1ULL << i)) && !rte_lcore_is_enabled(i)) {
*cpumask &= ~(1ULL << i);
}
}
return 0;
}
static int
spdk_reactor_parse_mask(const char *mask)
{
int i;
int ret = 0;
uint32_t master_core = rte_get_master_lcore();
if (g_reactor_state >= SPDK_REACTOR_STATE_INITIALIZED) {
SPDK_ERRLOG("cannot set reactor mask after application has started\n");
return -1;
}
g_reactor_mask = 0;
if (mask == NULL) {
/* No mask specified so use the same mask as DPDK. */
RTE_LCORE_FOREACH(i) {
g_reactor_mask |= (1ULL << i);
}
} else {
ret = spdk_app_parse_core_mask(mask, &g_reactor_mask);
if (ret != 0) {
SPDK_ERRLOG("reactor mask %s specified on command line "
"is invalid\n", mask);
return ret;
}
if (!(g_reactor_mask & (1ULL << master_core))) {
SPDK_ERRLOG("master_core %d must be set in core mask\n", master_core);
return -1;
}
}
return 0;
}
uint64_t
spdk_app_get_core_mask(void)
{
return g_reactor_mask;
}
static uint64_t
spdk_reactor_get_socket_mask(void)
{
int i;
uint32_t socket_id;
uint64_t socket_info = 0;
RTE_LCORE_FOREACH(i) {
if (((1ULL << i) & g_reactor_mask)) {
socket_id = rte_lcore_to_socket_id(i);
socket_info |= (1ULL << socket_id);
}
}
return socket_info;
}
void
spdk_reactors_start(void)
{
struct spdk_reactor *reactor;
uint32_t i;
assert(rte_get_master_lcore() == rte_lcore_id());
g_reactor_state = SPDK_REACTOR_STATE_RUNNING;
RTE_LCORE_FOREACH_SLAVE(i) {
if (((1ULL << i) & spdk_app_get_core_mask())) {
reactor = spdk_reactor_get(i);
spdk_reactor_start(reactor);
}
}
/* Start the master reactor */
reactor = spdk_reactor_get(rte_get_master_lcore());
spdk_reactor_start(reactor);
rte_eal_mp_wait_lcore();
g_reactor_state = SPDK_REACTOR_STATE_SHUTDOWN;
}
void spdk_reactors_stop(void)
{
g_reactor_state = SPDK_REACTOR_STATE_EXITING;
}
int
spdk_reactors_init(const char *mask, unsigned int max_delay_us)
{
uint32_t i, j;
int rc;
struct spdk_reactor *reactor;
uint64_t socket_mask = 0x0;
uint8_t socket_count = 0;
char mempool_name[32];
rc = spdk_reactor_parse_mask(mask);
if (rc < 0) {
return rc;
}
printf("Occupied cpu core mask is 0x%lx\n", spdk_app_get_core_mask());
socket_mask = spdk_reactor_get_socket_mask();
printf("Occupied cpu socket mask is 0x%lx\n", socket_mask);
for (i = 0; i < SPDK_MAX_SOCKET; i++) {
if ((1ULL << i) & socket_mask) {
socket_count++;
}
}
if (socket_count == 0) {
printf("No sockets occupied (internal error)\n");
return -1;
}
for (i = 0; i < SPDK_MAX_SOCKET; i++) {
if ((1ULL << i) & socket_mask) {
snprintf(mempool_name, sizeof(mempool_name), "spdk_event_mempool_%d", i);
g_spdk_event_mempool[i] = spdk_mempool_create(mempool_name,
(262144 / socket_count),
sizeof(struct spdk_event), -1, i);
if (g_spdk_event_mempool[i] == NULL) {
SPDK_ERRLOG("spdk_event_mempool creation failed on socket %d\n", i);
/*
* Instead of failing the operation directly, try to create
* the mempool on any available sockets in the case that
* memory is not evenly installed on all sockets. If still
* fails, free all allocated memory and exits.
*/
g_spdk_event_mempool[i] = spdk_mempool_create(
mempool_name,
(262144 / socket_count),
sizeof(struct spdk_event), -1,
SPDK_ENV_SOCKET_ID_ANY);
if (g_spdk_event_mempool[i] == NULL) {
for (j = i - 1; j < i; j--) {
if (g_spdk_event_mempool[j] != NULL) {
spdk_mempool_free(g_spdk_event_mempool[j]);
}
}
SPDK_ERRLOG("spdk_event_mempool creation failed\n");
return -1;
}
}
}
}
RTE_LCORE_FOREACH(i) {
if (((1ULL << i) & spdk_app_get_core_mask())) {
reactor = spdk_reactor_get(i);
spdk_reactor_construct(reactor, i, max_delay_us);
g_reactor_count++;
}
}
g_reactor_state = SPDK_REACTOR_STATE_INITIALIZED;
return rc;
}
int
spdk_reactors_fini(void)
{
uint32_t i;
uint64_t socket_mask;
struct spdk_reactor *reactor;
RTE_LCORE_FOREACH(i) {
if (((1ULL << i) & spdk_app_get_core_mask())) {
reactor = spdk_reactor_get(i);
if (reactor->events != NULL) {
rte_ring_free(reactor->events);
}
}
}
socket_mask = spdk_reactor_get_socket_mask();
for (i = 0; i < SPDK_MAX_SOCKET; i++) {
if ((1ULL << i) & socket_mask && g_spdk_event_mempool[i] != NULL) {
spdk_mempool_free(g_spdk_event_mempool[i]);
}
}
return 0;
}
static void
_spdk_poller_register(struct spdk_reactor *reactor, struct spdk_poller *poller)
{
if (poller->period_ticks) {
spdk_poller_insert_timer(reactor, poller, spdk_get_ticks());
} else {
TAILQ_INSERT_TAIL(&reactor->active_pollers, poller, tailq);
}
}
static void
_spdk_event_add_poller(void *arg1, void *arg2)
{
struct spdk_reactor *reactor = arg1;
struct spdk_poller *poller = arg2;
_spdk_poller_register(reactor, poller);
}
void
spdk_poller_register(struct spdk_poller **ppoller, spdk_poller_fn fn, void *arg,
uint32_t lcore, uint64_t period_microseconds)
{
struct spdk_poller *poller;
struct spdk_reactor *reactor;
poller = calloc(1, sizeof(*poller));
if (poller == NULL) {
SPDK_ERRLOG("Poller memory allocation failed\n");
abort();
}
poller->lcore = lcore;
poller->state = SPDK_POLLER_STATE_WAITING;
poller->fn = fn;
poller->arg = arg;
if (period_microseconds) {
poller->period_ticks = (spdk_get_ticks_hz() * period_microseconds) / 1000000ULL;
} else {
poller->period_ticks = 0;
}
if (*ppoller != NULL) {
SPDK_ERRLOG("Attempted reuse of poller pointer\n");
abort();
}
if (lcore >= RTE_MAX_LCORE) {
SPDK_ERRLOG("Attempted use lcore %u larger than max lcore %u\n",
lcore, RTE_MAX_LCORE - 1);
abort();
}
*ppoller = poller;
reactor = spdk_reactor_get(lcore);
if (lcore == spdk_app_get_current_core()) {
/*
* The poller is registered to run on the current core, so call the add function
* directly.
*/
_spdk_poller_register(reactor, poller);
} else {
/*
* The poller is registered to run on a different core.
* Schedule an event to run on the poller's core that will add the poller.
*/
spdk_event_call(spdk_event_allocate(lcore, _spdk_event_add_poller, reactor, poller));
}
}
static void
_spdk_poller_unregister(struct spdk_reactor *reactor, struct spdk_poller *poller,
struct spdk_event *next)
{
assert(poller->lcore == reactor->lcore);
assert(poller->lcore == spdk_app_get_current_core());
poller->unregister_complete_event = next;
if (poller->state == SPDK_POLLER_STATE_RUNNING) {
/*
* We are being called from the poller_fn, so set the state to unregistered
* and let the reactor loop free the poller.
*/
poller->state = SPDK_POLLER_STATE_UNREGISTERED;
} else {
/* Poller is not running currently, so just free it. */
if (poller->period_ticks) {
TAILQ_REMOVE(&reactor->timer_pollers, poller, tailq);
} else {
TAILQ_REMOVE(&reactor->active_pollers, poller, tailq);
}
_spdk_poller_unregister_complete(poller);
}
}
static void
_spdk_event_remove_poller(void *arg1, void *arg2)
{
struct spdk_poller *poller = arg1;
struct spdk_reactor *reactor = spdk_reactor_get(poller->lcore);
struct spdk_event *next = arg2;
_spdk_poller_unregister(reactor, poller, next);
}
void
spdk_poller_unregister(struct spdk_poller **ppoller,
struct spdk_event *complete)
{
struct spdk_poller *poller;
uint32_t lcore;
poller = *ppoller;
*ppoller = NULL;
if (poller == NULL) {
if (complete) {
spdk_event_call(complete);
}
return;
}
lcore = poller->lcore;
if (lcore == spdk_app_get_current_core()) {
/*
* The poller is registered on the current core, so call the remove function
* directly.
*/
_spdk_poller_unregister(spdk_reactor_get(lcore), poller, complete);
} else {
/*
* The poller is registered on a different core.
* Schedule an event to run on the poller's core that will remove the poller.
*/
spdk_event_call(spdk_event_allocate(lcore, _spdk_event_remove_poller, poller, complete));
}
}