eal: add power management intrinsics

Add two new power management intrinsics, and provide an implementation
in eal/x86 based on UMONITOR/UMWAIT instructions. The instructions
are implemented as raw byte opcodes because there is not yet widespread
compiler support for these instructions.

The power management instructions provide an architecture-specific
function to either wait until a specified TSC timestamp is reached, or
optionally wait until either a TSC timestamp is reached or a memory
location is written to. The monitor function also provides an optional
comparison, to avoid sleeping when the expected write has already
happened, and no more writes are expected.

For more details, please refer to Intel(R) 64 and IA-32 Architectures
Software Developer's Manual, Volume 2.

Signed-off-by: Liang Ma <liang.j.ma@intel.com>
Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
Acked-by: David Christensen <drc@linux.vnet.ibm.com>
Acked-by: Jerin Jacob <jerinj@marvell.com>
Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
Acked-by: Ruifeng Wang <ruifeng.wang@arm.com>
This commit is contained in:
Liang Ma 2020-10-27 14:59:02 +00:00 committed by Thomas Monjalon
parent e448a5a9ed
commit cda57d9388
8 changed files with 371 additions and 1 deletions

View File

@ -20,6 +20,7 @@ arch_headers = files(
'rte_pause_32.h',
'rte_pause_64.h',
'rte_pause.h',
'rte_power_intrinsics.h',
'rte_prefetch_32.h',
'rte_prefetch_64.h',
'rte_prefetch.h',

View File

@ -0,0 +1,60 @@
/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(c) 2020 Intel Corporation
*/
#ifndef _RTE_POWER_INTRINSIC_ARM_H_
#define _RTE_POWER_INTRINSIC_ARM_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <rte_common.h>
#include "generic/rte_power_intrinsics.h"
/**
* This function is not supported on ARM.
*/
static inline void
rte_power_monitor(const volatile void *p, const uint64_t expected_value,
const uint64_t value_mask, const uint64_t tsc_timestamp,
const uint8_t data_sz)
{
RTE_SET_USED(p);
RTE_SET_USED(expected_value);
RTE_SET_USED(value_mask);
RTE_SET_USED(tsc_timestamp);
RTE_SET_USED(data_sz);
}
/**
* This function is not supported on ARM.
*/
static inline void
rte_power_monitor_sync(const volatile void *p, const uint64_t expected_value,
const uint64_t value_mask, const uint64_t tsc_timestamp,
const uint8_t data_sz, rte_spinlock_t *lck)
{
RTE_SET_USED(p);
RTE_SET_USED(expected_value);
RTE_SET_USED(value_mask);
RTE_SET_USED(tsc_timestamp);
RTE_SET_USED(lck);
RTE_SET_USED(data_sz);
}
/**
* This function is not supported on ARM.
*/
static inline void
rte_power_pause(const uint64_t tsc_timestamp)
{
RTE_SET_USED(tsc_timestamp);
}
#ifdef __cplusplus
}
#endif
#endif /* _RTE_POWER_INTRINSIC_ARM_H_ */

View File

@ -0,0 +1,111 @@
/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(c) 2020 Intel Corporation
*/
#ifndef _RTE_POWER_INTRINSIC_H_
#define _RTE_POWER_INTRINSIC_H_
#include <inttypes.h>
#include <rte_compat.h>
#include <rte_spinlock.h>
/**
* @file
* Advanced power management operations.
*
* This file define APIs for advanced power management,
* which are architecture-dependent.
*/
/**
* @warning
* @b EXPERIMENTAL: this API may change without prior notice
*
* Monitor specific address for changes. This will cause the CPU to enter an
* architecture-defined optimized power state until either the specified
* memory address is written to, a certain TSC timestamp is reached, or other
* reasons cause the CPU to wake up.
*
* Additionally, an `expected` 64-bit value and 64-bit mask are provided. If
* mask is non-zero, the current value pointed to by the `p` pointer will be
* checked against the expected value, and if they match, the entering of
* optimized power state may be aborted.
*
* @param p
* Address to monitor for changes.
* @param expected_value
* Before attempting the monitoring, the `p` address may be read and compared
* against this value. If `value_mask` is zero, this step will be skipped.
* @param value_mask
* The 64-bit mask to use to extract current value from `p`.
* @param tsc_timestamp
* Maximum TSC timestamp to wait for. Note that the wait behavior is
* architecture-dependent.
* @param data_sz
* Data size (in bytes) that will be used to compare expected value with the
* memory address. Can be 1, 2, 4 or 8. Supplying any other value will lead
* to undefined result.
*/
__rte_experimental
static inline void rte_power_monitor(const volatile void *p,
const uint64_t expected_value, const uint64_t value_mask,
const uint64_t tsc_timestamp, const uint8_t data_sz);
/**
* @warning
* @b EXPERIMENTAL: this API may change without prior notice
*
* Monitor specific address for changes. This will cause the CPU to enter an
* architecture-defined optimized power state until either the specified
* memory address is written to, a certain TSC timestamp is reached, or other
* reasons cause the CPU to wake up.
*
* Additionally, an `expected` 64-bit value and 64-bit mask are provided. If
* mask is non-zero, the current value pointed to by the `p` pointer will be
* checked against the expected value, and if they match, the entering of
* optimized power state may be aborted.
*
* This call will also lock a spinlock on entering sleep, and release it on
* waking up the CPU.
*
* @param p
* Address to monitor for changes.
* @param expected_value
* Before attempting the monitoring, the `p` address may be read and compared
* against this value. If `value_mask` is zero, this step will be skipped.
* @param value_mask
* The 64-bit mask to use to extract current value from `p`.
* @param tsc_timestamp
* Maximum TSC timestamp to wait for. Note that the wait behavior is
* architecture-dependent.
* @param data_sz
* Data size (in bytes) that will be used to compare expected value with the
* memory address. Can be 1, 2, 4 or 8. Supplying any other value will lead
* to undefined result.
* @param lck
* A spinlock that must be locked before entering the function, will be
* unlocked while the CPU is sleeping, and will be locked again once the CPU
* wakes up.
*/
__rte_experimental
static inline void rte_power_monitor_sync(const volatile void *p,
const uint64_t expected_value, const uint64_t value_mask,
const uint64_t tsc_timestamp, const uint8_t data_sz,
rte_spinlock_t *lck);
/**
* @warning
* @b EXPERIMENTAL: this API may change without prior notice
*
* Enter an architecture-defined optimized power state until a certain TSC
* timestamp is reached.
*
* @param tsc_timestamp
* Maximum TSC timestamp to wait for. Note that the wait behavior is
* architecture-dependent.
*/
__rte_experimental
static inline void rte_power_pause(const uint64_t tsc_timestamp);
#endif /* _RTE_POWER_INTRINSIC_H_ */

View File

@ -59,6 +59,7 @@ generic_headers = files(
'generic/rte_mcslock.h',
'generic/rte_memcpy.h',
'generic/rte_pause.h',
'generic/rte_power_intrinsics.h',
'generic/rte_prefetch.h',
'generic/rte_rwlock.h',
'generic/rte_spinlock.h',

View File

@ -10,6 +10,7 @@ arch_headers = files(
'rte_io.h',
'rte_memcpy.h',
'rte_pause.h',
'rte_power_intrinsics.h',
'rte_prefetch.h',
'rte_rwlock.h',
'rte_spinlock.h',

View File

@ -0,0 +1,60 @@
/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(c) 2020 Intel Corporation
*/
#ifndef _RTE_POWER_INTRINSIC_PPC_H_
#define _RTE_POWER_INTRINSIC_PPC_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <rte_common.h>
#include "generic/rte_power_intrinsics.h"
/**
* This function is not supported on PPC64.
*/
static inline void
rte_power_monitor(const volatile void *p, const uint64_t expected_value,
const uint64_t value_mask, const uint64_t tsc_timestamp,
const uint8_t data_sz)
{
RTE_SET_USED(p);
RTE_SET_USED(expected_value);
RTE_SET_USED(value_mask);
RTE_SET_USED(tsc_timestamp);
RTE_SET_USED(data_sz);
}
/**
* This function is not supported on PPC64.
*/
static inline void
rte_power_monitor_sync(const volatile void *p, const uint64_t expected_value,
const uint64_t value_mask, const uint64_t tsc_timestamp,
const uint8_t data_sz, rte_spinlock_t *lck)
{
RTE_SET_USED(p);
RTE_SET_USED(expected_value);
RTE_SET_USED(value_mask);
RTE_SET_USED(tsc_timestamp);
RTE_SET_USED(lck);
RTE_SET_USED(data_sz);
}
/**
* This function is not supported on PPC64.
*/
static inline void
rte_power_pause(const uint64_t tsc_timestamp)
{
RTE_SET_USED(tsc_timestamp);
}
#ifdef __cplusplus
}
#endif
#endif /* _RTE_POWER_INTRINSIC_PPC_H_ */

View File

@ -12,8 +12,9 @@ arch_headers = files(
'rte_cycles.h',
'rte_io.h',
'rte_memcpy.h',
'rte_prefetch.h',
'rte_pause.h',
'rte_power_intrinsics.h',
'rte_prefetch.h',
'rte_rtm.h',
'rte_rwlock.h',
'rte_spinlock.h',

View File

@ -0,0 +1,135 @@
/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(c) 2020 Intel Corporation
*/
#ifndef _RTE_POWER_INTRINSIC_X86_H_
#define _RTE_POWER_INTRINSIC_X86_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <rte_common.h>
#include "generic/rte_power_intrinsics.h"
static inline uint64_t
__rte_power_get_umwait_val(const volatile void *p, const uint8_t sz)
{
switch (sz) {
case sizeof(uint8_t):
return *(const volatile uint8_t *)p;
case sizeof(uint16_t):
return *(const volatile uint16_t *)p;
case sizeof(uint32_t):
return *(const volatile uint32_t *)p;
case sizeof(uint64_t):
return *(const volatile uint64_t *)p;
default:
/* this is an intrinsic, so we can't have any error handling */
RTE_ASSERT(0);
return 0;
}
}
/**
* This function uses UMONITOR/UMWAIT instructions and will enter C0.2 state.
* For more information about usage of these instructions, please refer to
* Intel(R) 64 and IA-32 Architectures Software Developer's Manual.
*/
static inline void
rte_power_monitor(const volatile void *p, const uint64_t expected_value,
const uint64_t value_mask, const uint64_t tsc_timestamp,
const uint8_t data_sz)
{
const uint32_t tsc_l = (uint32_t)tsc_timestamp;
const uint32_t tsc_h = (uint32_t)(tsc_timestamp >> 32);
/*
* we're using raw byte codes for now as only the newest compiler
* versions support this instruction natively.
*/
/* set address for UMONITOR */
asm volatile(".byte 0xf3, 0x0f, 0xae, 0xf7;"
:
: "D"(p));
if (value_mask) {
const uint64_t cur_value = __rte_power_get_umwait_val(p, data_sz);
const uint64_t masked = cur_value & value_mask;
/* if the masked value is already matching, abort */
if (masked == expected_value)
return;
}
/* execute UMWAIT */
asm volatile(".byte 0xf2, 0x0f, 0xae, 0xf7;"
: /* ignore rflags */
: "D"(0), /* enter C0.2 */
"a"(tsc_l), "d"(tsc_h));
}
/**
* This function uses UMONITOR/UMWAIT instructions and will enter C0.2 state.
* For more information about usage of these instructions, please refer to
* Intel(R) 64 and IA-32 Architectures Software Developer's Manual.
*/
static inline void
rte_power_monitor_sync(const volatile void *p, const uint64_t expected_value,
const uint64_t value_mask, const uint64_t tsc_timestamp,
const uint8_t data_sz, rte_spinlock_t *lck)
{
const uint32_t tsc_l = (uint32_t)tsc_timestamp;
const uint32_t tsc_h = (uint32_t)(tsc_timestamp >> 32);
/*
* we're using raw byte codes for now as only the newest compiler
* versions support this instruction natively.
*/
/* set address for UMONITOR */
asm volatile(".byte 0xf3, 0x0f, 0xae, 0xf7;"
:
: "D"(p));
if (value_mask) {
const uint64_t cur_value = __rte_power_get_umwait_val(p, data_sz);
const uint64_t masked = cur_value & value_mask;
/* if the masked value is already matching, abort */
if (masked == expected_value)
return;
}
rte_spinlock_unlock(lck);
/* execute UMWAIT */
asm volatile(".byte 0xf2, 0x0f, 0xae, 0xf7;"
: /* ignore rflags */
: "D"(0), /* enter C0.2 */
"a"(tsc_l), "d"(tsc_h));
rte_spinlock_lock(lck);
}
/**
* This function uses TPAUSE instruction and will enter C0.2 state. For more
* information about usage of this instruction, please refer to Intel(R) 64 and
* IA-32 Architectures Software Developer's Manual.
*/
static inline void
rte_power_pause(const uint64_t tsc_timestamp)
{
const uint32_t tsc_l = (uint32_t)tsc_timestamp;
const uint32_t tsc_h = (uint32_t)(tsc_timestamp >> 32);
/* execute TPAUSE */
asm volatile(".byte 0x66, 0x0f, 0xae, 0xf7;"
: /* ignore rflags */
: "D"(0), /* enter C0.2 */
"a"(tsc_l), "d"(tsc_h));
}
#ifdef __cplusplus
}
#endif
#endif /* _RTE_POWER_INTRINSIC_X86_H_ */