eal: get/set thread affinity per thread identifier

Implement functions for getting/setting thread affinity.
Threads can be pinned to specific cores by setting their
affinity attribute.

Windows error codes are translated to errno-style error codes.
The possible return values are chosen so that we have as
much semantic compatibility between platforms as possible.

note: convert_cpuset_to_affinity has a limitation that all cpus of
the set belong to the same processor group.

Signed-off-by: Narcisa Vasile <navasile@linux.microsoft.com>
Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
Acked-by: Dmitry Kozlyuk <dmitry.kozliuk@gmail.com>
This commit is contained in:
Tyler Retzlaff 2022-05-12 06:14:29 -07:00 committed by David Marchand
parent 56539289b8
commit b70a9b7886
7 changed files with 386 additions and 48 deletions

View File

@ -49,6 +49,48 @@ rte_thread_t rte_thread_self(void);
#ifdef RTE_HAS_CPUSET
/**
* @warning
* @b EXPERIMENTAL: this API may change without prior notice.
*
* Set the affinity of thread 'thread_id' to the cpu set
* specified by 'cpuset'.
*
* @param thread_id
* Id of the thread for which to set the affinity.
*
* @param cpuset
* Pointer to CPU affinity to set.
*
* @return
* On success, return 0.
* On failure, return a positive errno-style error number.
*/
__rte_experimental
int rte_thread_set_affinity_by_id(rte_thread_t thread_id,
const rte_cpuset_t *cpuset);
/**
* @warning
* @b EXPERIMENTAL: this API may change without prior notice.
*
* Get the affinity of thread 'thread_id' and store it
* in 'cpuset'.
*
* @param thread_id
* Id of the thread for which to get the affinity.
*
* @param cpuset
* Pointer for storing the affinity value.
*
* @return
* On success, return 0.
* On failure, return a positive errno-style error number.
*/
__rte_experimental
int rte_thread_get_affinity_by_id(rte_thread_t thread_id,
rte_cpuset_t *cpuset);
/**
* Set core affinity of the current thread.
* Support both EAL and non-EAL thread and update TLS.

View File

@ -102,3 +102,19 @@ rte_thread_value_get(rte_thread_key key)
}
return pthread_getspecific(key->thread_index);
}
int
rte_thread_set_affinity_by_id(rte_thread_t thread_id,
const rte_cpuset_t *cpuset)
{
return pthread_setaffinity_np((pthread_t)thread_id.opaque_id,
sizeof(*cpuset), cpuset);
}
int
rte_thread_get_affinity_by_id(rte_thread_t thread_id,
rte_cpuset_t *cpuset)
{
return pthread_getaffinity_np((pthread_t)thread_id.opaque_id,
sizeof(*cpuset), cpuset);
}

View File

@ -422,7 +422,9 @@ EXPERIMENTAL {
rte_intr_type_set;
# added in 22.07
rte_thread_get_affinity_by_id;
rte_thread_self;
rte_thread_set_affinity_by_id;
};
INTERNAL {

View File

@ -1,8 +1,8 @@
/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(c) 2019 Intel Corporation
* Copyright (C) 2022 Microsoft Corporation
*/
#include <pthread.h>
#include <stdbool.h>
#include <stdint.h>
@ -27,13 +27,15 @@ struct socket_map {
};
struct cpu_map {
unsigned int socket_count;
unsigned int lcore_count;
unsigned int socket_count;
unsigned int cpu_count;
struct lcore_map lcores[RTE_MAX_LCORE];
struct socket_map sockets[RTE_MAX_NUMA_NODES];
GROUP_AFFINITY cpus[CPU_SETSIZE];
};
static struct cpu_map cpu_map = { 0 };
static struct cpu_map cpu_map;
/* eal_create_cpu_map() is called before logging is initialized */
static void
@ -47,13 +49,115 @@ log_early(const char *format, ...)
va_end(va);
}
static int
eal_query_group_affinity(void)
{
SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *infos = NULL;
unsigned int *cpu_count = &cpu_map.cpu_count;
DWORD infos_size = 0;
int ret = 0;
USHORT group_count;
KAFFINITY affinity;
USHORT group_no;
unsigned int i;
if (!GetLogicalProcessorInformationEx(RelationGroup, NULL,
&infos_size)) {
DWORD error = GetLastError();
if (error != ERROR_INSUFFICIENT_BUFFER) {
RTE_LOG(ERR, EAL, "Cannot get group information size, error %lu\n", error);
rte_errno = EINVAL;
ret = -1;
goto cleanup;
}
}
infos = malloc(infos_size);
if (infos == NULL) {
RTE_LOG(ERR, EAL, "Cannot allocate memory for NUMA node information\n");
rte_errno = ENOMEM;
ret = -1;
goto cleanup;
}
if (!GetLogicalProcessorInformationEx(RelationGroup, infos,
&infos_size)) {
RTE_LOG(ERR, EAL, "Cannot get group information, error %lu\n",
GetLastError());
rte_errno = EINVAL;
ret = -1;
goto cleanup;
}
*cpu_count = 0;
group_count = infos->Group.ActiveGroupCount;
for (group_no = 0; group_no < group_count; group_no++) {
affinity = infos->Group.GroupInfo[group_no].ActiveProcessorMask;
for (i = 0; i < EAL_PROCESSOR_GROUP_SIZE; i++) {
if ((affinity & ((KAFFINITY)1 << i)) == 0)
continue;
cpu_map.cpus[*cpu_count].Group = group_no;
cpu_map.cpus[*cpu_count].Mask = (KAFFINITY)1 << i;
(*cpu_count)++;
}
}
cleanup:
free(infos);
return ret;
}
static bool
eal_create_lcore_map(const SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *info)
{
const unsigned int node_id = info->NumaNode.NodeNumber;
const GROUP_AFFINITY *cores = &info->NumaNode.GroupMask;
struct lcore_map *lcore;
unsigned int socket_id;
unsigned int i;
/*
* NUMA node may be reported multiple times if it includes
* cores from different processor groups, e. g. 80 cores
* of a physical processor comprise one NUMA node, but two
* processor groups, because group size is limited by 32/64.
*/
for (socket_id = 0; socket_id < cpu_map.socket_count; socket_id++)
if (cpu_map.sockets[socket_id].node_id == node_id)
break;
if (socket_id == cpu_map.socket_count) {
if (socket_id == RTE_DIM(cpu_map.sockets))
return true;
cpu_map.sockets[socket_id].node_id = node_id;
cpu_map.socket_count++;
}
for (i = 0; i < EAL_PROCESSOR_GROUP_SIZE; i++) {
if ((cores->Mask & ((KAFFINITY)1 << i)) == 0)
continue;
if (cpu_map.lcore_count == RTE_DIM(cpu_map.lcores))
return true;
lcore = &cpu_map.lcores[cpu_map.lcore_count];
lcore->socket_id = socket_id;
lcore->core_id = cores->Group * EAL_PROCESSOR_GROUP_SIZE + i;
cpu_map.lcore_count++;
}
return false;
}
int
eal_create_cpu_map(void)
{
SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *infos, *info;
DWORD infos_size;
bool full = false;
int ret = 0;
infos = NULL;
infos_size = 0;
if (!GetLogicalProcessorInformationEx(
RelationNumaNode, NULL, &infos_size)) {
@ -62,7 +166,8 @@ eal_create_cpu_map(void)
log_early("Cannot get NUMA node info size, error %lu\n",
GetLastError());
rte_errno = ENOMEM;
return -1;
ret = -1;
goto exit;
}
}
@ -70,7 +175,8 @@ eal_create_cpu_map(void)
if (infos == NULL) {
log_early("Cannot allocate memory for NUMA node information\n");
rte_errno = ENOMEM;
return -1;
ret = -1;
goto exit;
}
if (!GetLogicalProcessorInformationEx(
@ -78,57 +184,30 @@ eal_create_cpu_map(void)
log_early("Cannot get NUMA node information, error %lu\n",
GetLastError());
rte_errno = EINVAL;
return -1;
ret = -1;
goto exit;
}
info = infos;
while ((uint8_t *)info - (uint8_t *)infos < infos_size) {
unsigned int node_id = info->NumaNode.NodeNumber;
GROUP_AFFINITY *cores = &info->NumaNode.GroupMask;
struct lcore_map *lcore;
unsigned int i, socket_id;
/* NUMA node may be reported multiple times if it includes
* cores from different processor groups, e. g. 80 cores
* of a physical processor comprise one NUMA node, but two
* processor groups, because group size is limited by 32/64.
*/
for (socket_id = 0; socket_id < cpu_map.socket_count;
socket_id++) {
if (cpu_map.sockets[socket_id].node_id == node_id)
break;
}
if (socket_id == cpu_map.socket_count) {
if (socket_id == RTE_DIM(cpu_map.sockets)) {
full = true;
goto exit;
}
cpu_map.sockets[socket_id].node_id = node_id;
cpu_map.socket_count++;
}
for (i = 0; i < EAL_PROCESSOR_GROUP_SIZE; i++) {
if ((cores->Mask & ((KAFFINITY)1 << i)) == 0)
continue;
if (cpu_map.lcore_count == RTE_DIM(cpu_map.lcores)) {
full = true;
goto exit;
}
lcore = &cpu_map.lcores[cpu_map.lcore_count];
lcore->socket_id = socket_id;
lcore->core_id =
cores->Group * EAL_PROCESSOR_GROUP_SIZE + i;
cpu_map.lcore_count++;
if (eal_create_lcore_map(info)) {
full = true;
break;
}
info = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)(
(uint8_t *)info + info->Size);
}
if (eal_query_group_affinity()) {
/*
* No need to set rte_errno here.
* It is set by eal_query_group_affinity().
*/
ret = -1;
goto exit;
}
exit:
if (full) {
/* Not a fatal error, but important for troubleshooting. */
@ -138,7 +217,7 @@ eal_create_cpu_map(void)
free(infos);
return 0;
return ret;
}
int
@ -164,3 +243,11 @@ eal_socket_numa_node(unsigned int socket_id)
{
return cpu_map.sockets[socket_id].node_id;
}
PGROUP_AFFINITY
eal_get_cpu_affinity(size_t cpu_index)
{
RTE_VERIFY(cpu_index < CPU_SETSIZE);
return &cpu_map.cpus[cpu_index];
}

View File

@ -57,6 +57,16 @@ int eal_thread_create(pthread_t *thread, unsigned int lcore_id);
*/
unsigned int eal_socket_numa_node(unsigned int socket_id);
/**
* Get pointer to the group affinity for the cpu.
*
* @param cpu_index
* Index of the cpu, as it comes from rte_cpuset_t.
* @return
* Pointer to the group affinity for the cpu.
*/
PGROUP_AFFINITY eal_get_cpu_affinity(size_t cpu_index);
/**
* Schedule code for execution in the interrupt thread.
*

View File

@ -14,6 +14,8 @@
#include <stdlib.h>
#include <string.h>
#include <sched.h>
#ifdef __cplusplus
extern "C" {
#endif

View File

@ -1,16 +1,66 @@
/* SPDX-License-Identifier: BSD-3-Clause
* Copyright 2021 Mellanox Technologies, Ltd
* Copyright (C) 2022 Microsoft Corporation
*/
#include <rte_common.h>
#include <rte_errno.h>
#include <rte_thread.h>
#include <rte_windows.h>
#include "eal_windows.h"
struct eal_tls_key {
DWORD thread_index;
};
/* Translates the most common error codes related to threads */
static int
thread_translate_win32_error(DWORD error)
{
switch (error) {
case ERROR_SUCCESS:
return 0;
case ERROR_INVALID_PARAMETER:
return EINVAL;
case ERROR_INVALID_HANDLE:
return EFAULT;
case ERROR_NOT_ENOUGH_MEMORY:
/* FALLTHROUGH */
case ERROR_NO_SYSTEM_RESOURCES:
return ENOMEM;
case ERROR_PRIVILEGE_NOT_HELD:
/* FALLTHROUGH */
case ERROR_ACCESS_DENIED:
return EACCES;
case ERROR_ALREADY_EXISTS:
return EEXIST;
case ERROR_POSSIBLE_DEADLOCK:
return EDEADLK;
case ERROR_INVALID_FUNCTION:
/* FALLTHROUGH */
case ERROR_CALL_NOT_IMPLEMENTED:
return ENOSYS;
}
return EINVAL;
}
static int
thread_log_last_error(const char *message)
{
DWORD error = GetLastError();
RTE_LOG(DEBUG, EAL, "GetLastError()=%lu: %s\n", error, message);
return thread_translate_win32_error(error);
}
rte_thread_t
rte_thread_self(void)
{
@ -97,3 +147,132 @@ rte_thread_value_get(rte_thread_key key)
}
return output;
}
static int
convert_cpuset_to_affinity(const rte_cpuset_t *cpuset,
PGROUP_AFFINITY affinity)
{
int ret = 0;
PGROUP_AFFINITY cpu_affinity = NULL;
unsigned int cpu_idx;
memset(affinity, 0, sizeof(GROUP_AFFINITY));
affinity->Group = (USHORT)-1;
/* Check that all cpus of the set belong to the same processor group and
* accumulate thread affinity to be applied.
*/
for (cpu_idx = 0; cpu_idx < CPU_SETSIZE; cpu_idx++) {
if (!CPU_ISSET(cpu_idx, cpuset))
continue;
cpu_affinity = eal_get_cpu_affinity(cpu_idx);
if (affinity->Group == (USHORT)-1) {
affinity->Group = cpu_affinity->Group;
} else if (affinity->Group != cpu_affinity->Group) {
RTE_LOG(DEBUG, EAL, "All processors must belong to the same processor group\n");
ret = ENOTSUP;
goto cleanup;
}
affinity->Mask |= cpu_affinity->Mask;
}
if (affinity->Mask == 0) {
ret = EINVAL;
goto cleanup;
}
cleanup:
return ret;
}
int
rte_thread_set_affinity_by_id(rte_thread_t thread_id,
const rte_cpuset_t *cpuset)
{
int ret = 0;
GROUP_AFFINITY thread_affinity;
HANDLE thread_handle = NULL;
if (cpuset == NULL) {
ret = EINVAL;
goto cleanup;
}
ret = convert_cpuset_to_affinity(cpuset, &thread_affinity);
if (ret != 0) {
RTE_LOG(DEBUG, EAL, "Unable to convert cpuset to thread affinity\n");
goto cleanup;
}
thread_handle = OpenThread(THREAD_ALL_ACCESS, FALSE,
thread_id.opaque_id);
if (thread_handle == NULL) {
ret = thread_log_last_error("OpenThread()");
goto cleanup;
}
if (!SetThreadGroupAffinity(thread_handle, &thread_affinity, NULL)) {
ret = thread_log_last_error("SetThreadGroupAffinity()");
goto cleanup;
}
cleanup:
if (thread_handle != NULL) {
CloseHandle(thread_handle);
thread_handle = NULL;
}
return ret;
}
int
rte_thread_get_affinity_by_id(rte_thread_t thread_id,
rte_cpuset_t *cpuset)
{
HANDLE thread_handle = NULL;
PGROUP_AFFINITY cpu_affinity;
GROUP_AFFINITY thread_affinity;
unsigned int cpu_idx;
int ret = 0;
if (cpuset == NULL) {
ret = EINVAL;
goto cleanup;
}
thread_handle = OpenThread(THREAD_ALL_ACCESS, FALSE,
thread_id.opaque_id);
if (thread_handle == NULL) {
ret = thread_log_last_error("OpenThread()");
goto cleanup;
}
/* obtain previous thread affinity */
if (!GetThreadGroupAffinity(thread_handle, &thread_affinity)) {
ret = thread_log_last_error("GetThreadGroupAffinity()");
goto cleanup;
}
CPU_ZERO(cpuset);
/* Convert affinity to DPDK cpu set */
for (cpu_idx = 0; cpu_idx < CPU_SETSIZE; cpu_idx++) {
cpu_affinity = eal_get_cpu_affinity(cpu_idx);
if ((cpu_affinity->Group == thread_affinity.Group) &&
((cpu_affinity->Mask & thread_affinity.Mask) != 0)) {
CPU_SET(cpu_idx, cpuset);
}
}
cleanup:
if (thread_handle != NULL) {
CloseHandle(thread_handle);
thread_handle = NULL;
}
return ret;
}