eal: register non-EAL threads as lcores

DPDK allows calling some part of its API from a non-EAL thread but this
has some limitations.
OVS (and other applications) has its own thread management but still
want to avoid such limitations by hacking RTE_PER_LCORE(_lcore_id) and
faking EAL threads potentially unknown of some DPDK component.

Introduce a new API to register non-EAL thread and associate them to a
free lcore with a new NON_EAL role.
This role denotes lcores that do not run DPDK mainloop and as such
prevents use of rte_eal_wait_lcore() and consorts.

Multiprocess is not supported as the need for cohabitation with this new
feature is unclear at the moment.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Acked-by: Andrew Rybchenko <arybchenko@solarflare.com>
Acked-by: Thomas Monjalon <thomas@monjalon.net>
Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
This commit is contained in:
David Marchand 2020-07-06 22:52:30 +02:00
parent a837d5c598
commit 5c307ba2a5
20 changed files with 385 additions and 23 deletions

View File

@ -182,6 +182,7 @@ F: app/test/test_cycles.c
F: app/test/test_debug.c
F: app/test/test_eal*
F: app/test/test_errno.c
F: app/test/test_lcores.c
F: app/test/test_logs.c
F: app/test/test_memcpy*
F: app/test/test_per_lcore.c

View File

@ -98,6 +98,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_FLOW_CLASSIFY) += test_flow_classify.c
endif
SRCS-y += test_rwlock.c
SRCS-y += test_lcores.c
SRCS-$(CONFIG_RTE_LIBRTE_STACK) += test_stack.c
SRCS-$(CONFIG_RTE_LIBRTE_STACK) += test_stack_perf.c

View File

@ -62,6 +62,12 @@ parallel_test_list = [
"Func": rwlock_autotest,
"Report": None,
},
{
"Name": "Lcores autotest",
"Command": "lcores_autotest",
"Func": default_autotest,
"Report": None,
},
{
"Name": "Logs autotest",
"Command": "logs_autotest",

View File

@ -67,6 +67,7 @@ test_sources = files('commands.c',
'test_ipsec_perf.c',
'test_kni.c',
'test_kvargs.c',
'test_lcores.c',
'test_logs.c',
'test_lpm.c',
'test_lpm6.c',
@ -207,6 +208,7 @@ fast_tests = [
['hash_autotest', true],
['interrupt_autotest', true],
['ipfrag_autotest', false],
['lcores_autotest', true],
['logs_autotest', true],
['lpm_autotest', true],
['lpm6_autotest', true],

143
app/test/test_lcores.c Normal file
View File

@ -0,0 +1,143 @@
/* SPDX-License-Identifier: BSD-3-Clause
* Copyright (c) 2020 Red Hat, Inc.
*/
#include <pthread.h>
#include <string.h>
#include <rte_errno.h>
#include <rte_lcore.h>
#include "test.h"
struct thread_context {
enum { INIT, ERROR, DONE } state;
bool lcore_id_any;
pthread_t id;
unsigned int *registered_count;
};
static void *thread_loop(void *arg)
{
struct thread_context *t = arg;
unsigned int lcore_id;
lcore_id = rte_lcore_id();
if (lcore_id != LCORE_ID_ANY) {
printf("Error: incorrect lcore id for new thread %u\n", lcore_id);
t->state = ERROR;
}
if (rte_thread_register() < 0)
printf("Warning: could not register new thread (this might be expected during this test), reason %s\n",
rte_strerror(rte_errno));
lcore_id = rte_lcore_id();
if ((t->lcore_id_any && lcore_id != LCORE_ID_ANY) ||
(!t->lcore_id_any && lcore_id == LCORE_ID_ANY)) {
printf("Error: could not register new thread, got %u while %sexpecting %u\n",
lcore_id, t->lcore_id_any ? "" : "not ", LCORE_ID_ANY);
t->state = ERROR;
}
/* Report register happened to the control thread. */
__atomic_add_fetch(t->registered_count, 1, __ATOMIC_RELEASE);
/* Wait for release from the control thread. */
while (__atomic_load_n(t->registered_count, __ATOMIC_ACQUIRE) != 0)
;
rte_thread_unregister();
lcore_id = rte_lcore_id();
if (lcore_id != LCORE_ID_ANY) {
printf("Error: could not unregister new thread, %u still assigned\n",
lcore_id);
t->state = ERROR;
}
if (t->state != ERROR)
t->state = DONE;
return NULL;
}
static int
test_non_eal_lcores(unsigned int eal_threads_count)
{
struct thread_context thread_contexts[RTE_MAX_LCORE];
unsigned int non_eal_threads_count;
unsigned int registered_count;
struct thread_context *t;
unsigned int i;
int ret;
non_eal_threads_count = 0;
registered_count = 0;
/* Try to create as many threads as possible. */
for (i = 0; i < RTE_MAX_LCORE - eal_threads_count; i++) {
t = &thread_contexts[i];
t->state = INIT;
t->registered_count = &registered_count;
t->lcore_id_any = false;
if (pthread_create(&t->id, NULL, thread_loop, t) != 0)
break;
non_eal_threads_count++;
}
printf("non-EAL threads count: %u\n", non_eal_threads_count);
/* Wait all non-EAL threads to register. */
while (__atomic_load_n(&registered_count, __ATOMIC_ACQUIRE) !=
non_eal_threads_count)
;
/* We managed to create the max number of threads, let's try to create
* one more. This will allow one more check.
*/
if (eal_threads_count + non_eal_threads_count < RTE_MAX_LCORE)
goto skip_lcore_any;
t = &thread_contexts[non_eal_threads_count];
t->state = INIT;
t->registered_count = &registered_count;
t->lcore_id_any = true;
if (pthread_create(&t->id, NULL, thread_loop, t) == 0) {
non_eal_threads_count++;
printf("non-EAL threads count: %u\n", non_eal_threads_count);
while (__atomic_load_n(&registered_count, __ATOMIC_ACQUIRE) !=
non_eal_threads_count)
;
}
skip_lcore_any:
/* Release all threads, and check their states. */
__atomic_store_n(&registered_count, 0, __ATOMIC_RELEASE);
ret = 0;
for (i = 0; i < non_eal_threads_count; i++) {
t = &thread_contexts[i];
pthread_join(t->id, NULL);
if (t->state != DONE)
ret = -1;
}
return ret;
}
static int
test_lcores(void)
{
unsigned int eal_threads_count = 0;
unsigned int i;
for (i = 0; i < RTE_MAX_LCORE; i++) {
if (!rte_lcore_has_role(i, ROLE_OFF))
eal_threads_count++;
}
if (eal_threads_count == 0) {
printf("Error: something is broken, no EAL thread detected.\n");
return TEST_FAILED;
}
printf("EAL threads count: %u, RTE_MAX_LCORE=%u\n", eal_threads_count,
RTE_MAX_LCORE);
if (test_non_eal_lcores(eal_threads_count) < 0)
return TEST_FAILED;
return TEST_SUCCESS;
}
REGISTER_TEST_COMMAND(lcores_autotest, test_lcores);

View File

@ -307,8 +307,9 @@ Custom worker function :numref:`dtg_distributor_worker`.
#. Configuration issue isolation
* Identify core role using ``rte_eal_lcore_role`` to identify RTE, OFF and
SERVICE. Check performance functions are mapped to run on the cores.
* Identify core role using ``rte_eal_lcore_role`` to identify RTE, OFF,
SERVICE and NON_EAL. Check performance functions are mapped to run on the
cores.
* For high-performance execution logic ensure running it on correct NUMA
and non-master core.

View File

@ -564,9 +564,13 @@ It's also compatible with the pattern of corelist('-l') option.
non-EAL pthread support
~~~~~~~~~~~~~~~~~~~~~~~
It is possible to use the DPDK execution context with any user pthread (aka. Non-EAL pthreads).
In a non-EAL pthread, the *_lcore_id* is always LCORE_ID_ANY which identifies that it is not an EAL thread with a valid, unique, *_lcore_id*.
Some libraries will use an alternative unique ID (e.g. TID), some will not be impacted at all, and some will work but with limitations (e.g. timer and mempool libraries).
It is possible to use the DPDK execution context with any user pthread (aka. non-EAL pthreads).
There are two kinds of non-EAL pthreads:
- a registered non-EAL pthread with a valid *_lcore_id* that was successfully assigned by calling ``rte_thread_register()``,
- a non registered non-EAL pthread with a LCORE_ID_ANY,
For non registered non-EAL pthread (with a LCORE_ID_ANY *_lcore_id*), some libraries will use an alternative unique ID (e.g. TID), some will not be impacted at all, and some will work but with limitations (e.g. timer and mempool libraries).
All these impacts are mentioned in :ref:`known_issue_label` section.
@ -613,9 +617,9 @@ Known Issues
+ rte_mempool
The rte_mempool uses a per-lcore cache inside the mempool.
For non-EAL pthreads, ``rte_lcore_id()`` will not return a valid number.
So for now, when rte_mempool is used with non-EAL pthreads, the put/get operations will bypass the default mempool cache and there is a performance penalty because of this bypass.
Only user-owned external caches can be used in a non-EAL context in conjunction with ``rte_mempool_generic_put()`` and ``rte_mempool_generic_get()`` that accept an explicit cache parameter.
For unregistered non-EAL pthreads, ``rte_lcore_id()`` will not return a valid number.
So for now, when rte_mempool is used with unregistered non-EAL pthreads, the put/get operations will bypass the default mempool cache and there is a performance penalty because of this bypass.
Only user-owned external caches can be used in an unregistered non-EAL context in conjunction with ``rte_mempool_generic_put()`` and ``rte_mempool_generic_get()`` that accept an explicit cache parameter.
+ rte_ring
@ -660,15 +664,15 @@ Known Issues
+ rte_timer
Running ``rte_timer_manage()`` on a non-EAL pthread is not allowed. However, resetting/stopping the timer from a non-EAL pthread is allowed.
Running ``rte_timer_manage()`` on an unregistered non-EAL pthread is not allowed. However, resetting/stopping the timer from a non-EAL pthread is allowed.
+ rte_log
In non-EAL pthreads, there is no per thread loglevel and logtype, global loglevels are used.
In unregistered non-EAL pthreads, there is no per thread loglevel and logtype, global loglevels are used.
+ misc
The debug statistics of rte_ring, rte_mempool and rte_timer are not supported in a non-EAL pthread.
The debug statistics of rte_ring, rte_mempool and rte_timer are not supported in an unregistered non-EAL pthread.
cgroup control
~~~~~~~~~~~~~~

View File

@ -103,7 +103,7 @@ The maximum size of the cache is static and is defined at compilation time (CONF
Alternatively to the internal default per-lcore local cache, an application can create and manage external caches through the ``rte_mempool_cache_create()``, ``rte_mempool_cache_free()`` and ``rte_mempool_cache_flush()`` calls.
These user-owned caches can be explicitly passed to ``rte_mempool_generic_put()`` and ``rte_mempool_generic_get()``.
The ``rte_mempool_default_cache()`` call returns the default internal cache if any.
In contrast to the default caches, user-owned caches can be used by non-EAL threads too.
In contrast to the default caches, user-owned caches can be used by unregistered non-EAL threads too.
Mempool Handlers
------------------------

View File

@ -56,6 +56,12 @@ New Features
Also, make sure to start the actual text at the margin.
=========================================================
* **Added non-EAL threads registration API.**
Added a new API to register non-EAL threads as lcores. This can be used by
applications to have its threads known of DPDK without suffering from the
non-EAL previous limitations in terms of performance.
* **rte_*mb APIs are updated to use DMB instruction for ARMv8.**
ARMv8 memory model has been strengthened to require other-multi-copy

View File

@ -6,13 +6,15 @@
#include <limits.h>
#include <string.h>
#include <rte_errno.h>
#include <rte_log.h>
#include <rte_eal.h>
#include <rte_lcore.h>
#include <rte_common.h>
#include <rte_debug.h>
#include <rte_eal.h>
#include <rte_errno.h>
#include <rte_lcore.h>
#include <rte_log.h>
#include <rte_spinlock.h>
#include "eal_memcfg.h"
#include "eal_private.h"
#include "eal_thread.h"
@ -228,3 +230,38 @@ rte_socket_id_by_idx(unsigned int idx)
}
return config->numa_nodes[idx];
}
static rte_spinlock_t lcore_lock = RTE_SPINLOCK_INITIALIZER;
unsigned int
eal_lcore_non_eal_allocate(void)
{
struct rte_config *cfg = rte_eal_get_configuration();
unsigned int lcore_id;
rte_spinlock_lock(&lcore_lock);
for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
if (cfg->lcore_role[lcore_id] != ROLE_OFF)
continue;
cfg->lcore_role[lcore_id] = ROLE_NON_EAL;
cfg->lcore_count++;
break;
}
if (lcore_id == RTE_MAX_LCORE)
RTE_LOG(DEBUG, EAL, "No lcore available.\n");
rte_spinlock_unlock(&lcore_lock);
return lcore_id;
}
void
eal_lcore_non_eal_release(unsigned int lcore_id)
{
struct rte_config *cfg = rte_eal_get_configuration();
rte_spinlock_lock(&lcore_lock);
if (cfg->lcore_role[lcore_id] == ROLE_NON_EAL) {
cfg->lcore_role[lcore_id] = ROLE_OFF;
cfg->lcore_count--;
}
rte_spinlock_unlock(&lcore_lock);
}

View File

@ -29,6 +29,7 @@
#include <rte_log.h>
#include <rte_tailq.h>
#include "eal_memcfg.h"
#include "eal_private.h"
#include "eal_filesystem.h"
#include "eal_internal_cfg.h"
@ -1232,3 +1233,43 @@ rte_mp_reply(struct rte_mp_msg *msg, const char *peer)
return mp_send(msg, peer, MP_REP);
}
/* Internally, the status of the mp feature is represented as a three-state:
* - "unknown" as long as no secondary process attached to a primary process
* and there was no call to __rte_mp_disable yet,
* - "enabled" as soon as a secondary process attaches to a primary process,
* - "disabled" when a primary process successfully called __rte_mp_disable,
*/
enum mp_status {
MP_STATUS_UNKNOWN,
MP_STATUS_DISABLED,
MP_STATUS_ENABLED,
};
static bool
set_mp_status(enum mp_status status)
{
struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
uint8_t expected;
uint8_t desired;
expected = MP_STATUS_UNKNOWN;
desired = status;
if (__atomic_compare_exchange_n(&mcfg->mp_status, &expected, desired,
false, __ATOMIC_RELAXED, __ATOMIC_RELAXED))
return true;
return __atomic_load_n(&mcfg->mp_status, __ATOMIC_RELAXED) == desired;
}
bool
__rte_mp_disable(void)
{
return set_mp_status(MP_STATUS_DISABLED);
}
bool
__rte_mp_enable(void)
{
return set_mp_status(MP_STATUS_ENABLED);
}

View File

@ -12,9 +12,10 @@
#include <assert.h>
#include <string.h>
#include <rte_errno.h>
#include <rte_lcore.h>
#include <rte_memory.h>
#include <rte_log.h>
#include <rte_memory.h>
#include <rte_trace_point.h>
#include "eal_internal_cfg.h"
@ -241,3 +242,49 @@ fail:
pthread_join(*thread, NULL);
return -ret;
}
int
rte_thread_register(void)
{
unsigned int lcore_id;
rte_cpuset_t cpuset;
/* EAL init flushes all lcores, we can't register before. */
if (eal_get_internal_configuration()->init_complete != 1) {
RTE_LOG(DEBUG, EAL, "Called %s before EAL init.\n", __func__);
rte_errno = EINVAL;
return -1;
}
if (!__rte_mp_disable()) {
RTE_LOG(ERR, EAL, "Multiprocess in use, registering non-EAL threads is not supported.\n");
rte_errno = EINVAL;
return -1;
}
if (pthread_getaffinity_np(pthread_self(), sizeof(cpuset),
&cpuset) != 0)
CPU_ZERO(&cpuset);
lcore_id = eal_lcore_non_eal_allocate();
if (lcore_id >= RTE_MAX_LCORE)
lcore_id = LCORE_ID_ANY;
__rte_thread_init(lcore_id, &cpuset);
if (lcore_id == LCORE_ID_ANY) {
rte_errno = ENOMEM;
return -1;
}
RTE_LOG(DEBUG, EAL, "Registered non-EAL thread as lcore %u.\n",
lcore_id);
return 0;
}
void
rte_thread_unregister(void)
{
unsigned int lcore_id = rte_lcore_id();
if (lcore_id != LCORE_ID_ANY)
eal_lcore_non_eal_release(lcore_id);
__rte_thread_uninit();
if (lcore_id != LCORE_ID_ANY)
RTE_LOG(DEBUG, EAL, "Unregistered non-EAL thread (was lcore %u).\n",
lcore_id);
}

View File

@ -41,6 +41,8 @@ struct rte_mem_config {
rte_rwlock_t memory_hotplug_lock;
/**< Indicates whether memory hotplug request is in progress. */
uint8_t mp_status; /**< Multiprocess status. */
/* memory segments and zones */
struct rte_fbarray memzones; /**< Memzone descriptors. */

View File

@ -396,6 +396,24 @@ uint64_t get_tsc_freq(void);
*/
uint64_t get_tsc_freq_arch(void);
/**
* Allocate a free lcore to associate to a non-EAL thread.
*
* @return
* - the id of a lcore with role ROLE_NON_EAL on success.
* - RTE_MAX_LCORE if none was available.
*/
unsigned int eal_lcore_non_eal_allocate(void);
/**
* Release the lcore used by a non-EAL thread.
* Counterpart of eal_lcore_non_eal_allocate().
*
* @param lcore_id
* The lcore with role ROLE_NON_EAL to release.
*/
void eal_lcore_non_eal_release(unsigned int lcore_id);
/**
* Prepare physical memory mapping
* i.e. hugepages on Linux and
@ -699,6 +717,16 @@ eal_get_internal_configuration(void);
rte_usage_hook_t
eal_get_application_usage_hook(void);
/**
* Mark primary process as not supporting multi-process.
*/
bool __rte_mp_disable(void);
/**
* Instruct primary process that a secondary process wants to attach.
*/
bool __rte_mp_enable(void);
/**
* Init per-lcore info in current thread.
*

View File

@ -400,6 +400,10 @@ rte_config_init(void)
}
if (rte_eal_config_reattach() < 0)
return -1;
if (!__rte_mp_enable()) {
RTE_LOG(ERR, EAL, "Primary process refused secondary attachment\n");
return -1;
}
eal_mcfg_update_internal();
break;
case RTE_PROC_AUTO:

View File

@ -31,6 +31,7 @@ enum rte_lcore_role_t {
ROLE_RTE,
ROLE_OFF,
ROLE_SERVICE,
ROLE_NON_EAL,
};
/**
@ -67,7 +68,8 @@ rte_lcore_has_role(unsigned int lcore_id, enum rte_lcore_role_t role);
* to run threads with lcore IDs 0, 1, 2 and 3 on physical core 10..
*
* @return
* Logical core ID (in EAL thread) or LCORE_ID_ANY (in non-EAL thread)
* Logical core ID (in EAL thread or registered non-EAL thread) or
* LCORE_ID_ANY (in unregistered non-EAL thread)
*/
static inline unsigned
rte_lcore_id(void)
@ -279,6 +281,30 @@ int rte_thread_setname(pthread_t id, const char *name);
__rte_experimental
int rte_thread_getname(pthread_t id, char *name, size_t len);
/**
* Register current non-EAL thread as a lcore.
*
* @note This API is not compatible with the multi-process feature:
* - if a primary process registers a non-EAL thread, then no secondary process
* will initialise.
* - if a secondary process initialises successfully, trying to register a
* non-EAL thread from either primary or secondary processes will always end
* up with the thread getting LCORE_ID_ANY as lcore.
*
* @return
* On success, return 0; otherwise return -1 with rte_errno set.
*/
__rte_experimental
int
rte_thread_register(void);
/**
* Unregister current thread and release lcore if one was associated.
*/
__rte_experimental
void
rte_thread_unregister(void);
/**
* Create a control thread.
*

View File

@ -492,6 +492,10 @@ rte_config_init(void)
}
if (rte_eal_config_reattach() < 0)
return -1;
if (!__rte_mp_enable()) {
RTE_LOG(ERR, EAL, "Primary process refused secondary attachment\n");
return -1;
}
eal_mcfg_update_internal();
break;
case RTE_PROC_AUTO:

View File

@ -396,6 +396,8 @@ EXPERIMENTAL {
# added in 20.08
rte_eal_vfio_get_vf_token;
rte_thread_register;
rte_thread_unregister;
};
INTERNAL {

View File

@ -66,6 +66,12 @@ eal_proc_type_detect(void)
return ptype;
}
bool
__rte_mp_disable(void)
{
return true;
}
/* display usage */
static void
eal_usage(const char *prgname)

View File

@ -28,9 +28,9 @@
* rte_mempool_get() or rte_mempool_put() are designed to be called from an EAL
* thread due to the internal per-lcore cache. Due to the lack of caching,
* rte_mempool_get() or rte_mempool_put() performance will suffer when called
* by non-EAL threads. Instead, non-EAL threads should call
* rte_mempool_generic_get() or rte_mempool_generic_put() with a user cache
* created with rte_mempool_cache_create().
* by unregistered non-EAL threads. Instead, unregistered non-EAL threads
* should call rte_mempool_generic_get() or rte_mempool_generic_put() with a
* user cache created with rte_mempool_cache_create().
*/
#include <stdio.h>
@ -1233,7 +1233,7 @@ void rte_mempool_dump(FILE *f, struct rte_mempool *mp);
/**
* Create a user-owned mempool cache.
*
* This can be used by non-EAL threads to enable caching when they
* This can be used by unregistered non-EAL threads to enable caching when they
* interact with a mempool.
*
* @param size
@ -1264,7 +1264,8 @@ rte_mempool_cache_free(struct rte_mempool_cache *cache);
* @param lcore_id
* The logical core id.
* @return
* A pointer to the mempool cache or NULL if disabled or non-EAL thread.
* A pointer to the mempool cache or NULL if disabled or unregistered non-EAL
* thread.
*/
static __rte_always_inline struct rte_mempool_cache *
rte_mempool_default_cache(struct rte_mempool *mp, unsigned lcore_id)