numam-dpdk/app/test/test_atomic.c
Stephen Hemminger cb056611a8 eal: rename lcore master and slave
Replace master lcore with main lcore and
replace slave lcore with worker lcore.

Keep the old functions and macros but mark them as deprecated
for this release.

The "--master-lcore" command line option is also deprecated
and any usage will print a warning and use "--main-lcore"
as replacement.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Acked-by: Anatoly Burakov <anatoly.burakov@intel.com>
2020-10-20 13:17:08 +02:00

635 lines
17 KiB
C

/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(c) 2010-2014 Intel Corporation
* Copyright(c) 2019 Arm Limited
*/
#include <stdio.h>
#include <stdint.h>
#include <unistd.h>
#include <inttypes.h>
#include <sys/queue.h>
#include <rte_memory.h>
#include <rte_per_lcore.h>
#include <rte_launch.h>
#include <rte_atomic.h>
#include <rte_eal.h>
#include <rte_lcore.h>
#include <rte_random.h>
#include <rte_hash_crc.h>
#include "test.h"
/*
* Atomic Variables
* ================
*
* - The main test function performs several subtests. The first
* checks that the usual inc/dec/add/sub functions are working
* correctly:
*
* - Initialize 16-bit, 32-bit and 64-bit atomic variables to specific
* values.
*
* - These variables are incremented and decremented on each core at
* the same time in ``test_atomic_usual()``.
*
* - The function checks that once all lcores finish their function,
* the value of the atomic variables are still the same.
*
* - Test "test and set" functions.
*
* - Initialize 16-bit, 32-bit and 64-bit atomic variables to zero.
*
* - Invoke ``test_atomic_tas()`` on each lcore: before doing anything
* else. The cores are waiting a synchro using ``while
* (rte_atomic32_read(&val) == 0)`` which is triggered by the main test
* function. Then all cores do a
* ``rte_atomicXX_test_and_set()`` at the same time. If it is successful,
* it increments another atomic counter.
*
* - The main function checks that the atomic counter was incremented
* twice only (one for 16-bit, one for 32-bit and one for 64-bit values).
*
* - Test "add/sub and return" functions
*
* - Initialize 16-bit, 32-bit and 64-bit atomic variables to zero.
*
* - Invoke ``test_atomic_addsub_return()`` on each lcore. Before doing
* anything else, the cores are waiting a synchro. Each lcore does
* this operation several times::
*
* tmp = rte_atomicXX_add_return(&a, 1);
* atomic_add(&count, tmp);
* tmp = rte_atomicXX_sub_return(&a, 1);
* atomic_sub(&count, tmp+1);
*
* - At the end of the test, the *count* value must be 0.
*
* - Test "128-bit compare and swap" (aarch64 and x86_64 only)
*
* - Initialize 128-bit atomic variables to zero.
*
* - Invoke ``test_atomic128_cmp_exchange()`` on each lcore. Before doing
* anything else, the cores are waiting a synchro. Each lcore does
* these compare and swap (CAS) operations several times::
*
* Acquired CAS update counter.val[0] + 2; counter.val[1] + 1;
* Released CAS update counter.val[0] + 2; counter.val[1] + 1;
* Acquired_Released CAS update counter.val[0] + 2; counter.val[1] + 1;
* Relaxed CAS update counter.val[0] + 2; counter.val[1] + 1;
*
* - At the end of the test, the *count128* first 64-bit value and
* second 64-bit value differ by the total iterations.
*
* - Test "atomic exchange" functions
*
* - Create a 64 bit token that can be tested for data integrity
*
* - Invoke ``test_atomic_exchange`` on each lcore. Before doing
* anything else, the cores wait for a synchronization event.
* Each core then does the follwoing for N iterations:
*
* Generate a new token with a data integrity check
* Exchange the new token for previously generated token
* Increment a counter if a corrupt token was received
*
* - At the end of the test, the number of corrupted tokens must be 0.
*/
#define NUM_ATOMIC_TYPES 3
#define N 1000000
static rte_atomic16_t a16;
static rte_atomic32_t a32;
static rte_atomic64_t a64;
static rte_atomic64_t count;
static rte_atomic32_t synchro;
static int
test_atomic_usual(__rte_unused void *arg)
{
unsigned i;
while (rte_atomic32_read(&synchro) == 0)
;
for (i = 0; i < N; i++)
rte_atomic16_inc(&a16);
for (i = 0; i < N; i++)
rte_atomic16_dec(&a16);
for (i = 0; i < (N / 5); i++)
rte_atomic16_add(&a16, 5);
for (i = 0; i < (N / 5); i++)
rte_atomic16_sub(&a16, 5);
for (i = 0; i < N; i++)
rte_atomic32_inc(&a32);
for (i = 0; i < N; i++)
rte_atomic32_dec(&a32);
for (i = 0; i < (N / 5); i++)
rte_atomic32_add(&a32, 5);
for (i = 0; i < (N / 5); i++)
rte_atomic32_sub(&a32, 5);
for (i = 0; i < N; i++)
rte_atomic64_inc(&a64);
for (i = 0; i < N; i++)
rte_atomic64_dec(&a64);
for (i = 0; i < (N / 5); i++)
rte_atomic64_add(&a64, 5);
for (i = 0; i < (N / 5); i++)
rte_atomic64_sub(&a64, 5);
return 0;
}
static int
test_atomic_tas(__rte_unused void *arg)
{
while (rte_atomic32_read(&synchro) == 0)
;
if (rte_atomic16_test_and_set(&a16))
rte_atomic64_inc(&count);
if (rte_atomic32_test_and_set(&a32))
rte_atomic64_inc(&count);
if (rte_atomic64_test_and_set(&a64))
rte_atomic64_inc(&count);
return 0;
}
static int
test_atomic_addsub_and_return(__rte_unused void *arg)
{
uint32_t tmp16;
uint32_t tmp32;
uint64_t tmp64;
unsigned i;
while (rte_atomic32_read(&synchro) == 0)
;
for (i = 0; i < N; i++) {
tmp16 = rte_atomic16_add_return(&a16, 1);
rte_atomic64_add(&count, tmp16);
tmp16 = rte_atomic16_sub_return(&a16, 1);
rte_atomic64_sub(&count, tmp16+1);
tmp32 = rte_atomic32_add_return(&a32, 1);
rte_atomic64_add(&count, tmp32);
tmp32 = rte_atomic32_sub_return(&a32, 1);
rte_atomic64_sub(&count, tmp32+1);
tmp64 = rte_atomic64_add_return(&a64, 1);
rte_atomic64_add(&count, tmp64);
tmp64 = rte_atomic64_sub_return(&a64, 1);
rte_atomic64_sub(&count, tmp64+1);
}
return 0;
}
/*
* rte_atomic32_inc_and_test() would increase a 32 bits counter by one and then
* test if that counter is equal to 0. It would return true if the counter is 0
* and false if the counter is not 0. rte_atomic64_inc_and_test() could do the
* same thing but for a 64 bits counter.
* Here checks that if the 32/64 bits counter is equal to 0 after being atomically
* increased by one. If it is, increase the variable of "count" by one which would
* be checked as the result later.
*
*/
static int
test_atomic_inc_and_test(__rte_unused void *arg)
{
while (rte_atomic32_read(&synchro) == 0)
;
if (rte_atomic16_inc_and_test(&a16)) {
rte_atomic64_inc(&count);
}
if (rte_atomic32_inc_and_test(&a32)) {
rte_atomic64_inc(&count);
}
if (rte_atomic64_inc_and_test(&a64)) {
rte_atomic64_inc(&count);
}
return 0;
}
/*
* rte_atomicXX_dec_and_test() should decrease a 32 bits counter by one and then
* test if that counter is equal to 0. It should return true if the counter is 0
* and false if the counter is not 0.
* This test checks if the counter is equal to 0 after being atomically
* decreased by one. If it is, increase the value of "count" by one which is to
* be checked as the result later.
*/
static int
test_atomic_dec_and_test(__rte_unused void *arg)
{
while (rte_atomic32_read(&synchro) == 0)
;
if (rte_atomic16_dec_and_test(&a16))
rte_atomic64_inc(&count);
if (rte_atomic32_dec_and_test(&a32))
rte_atomic64_inc(&count);
if (rte_atomic64_dec_and_test(&a64))
rte_atomic64_inc(&count);
return 0;
}
#if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_ARM64)
static rte_int128_t count128;
/*
* rte_atomic128_cmp_exchange() should update a 128 bits counter's first 64
* bits by 2 and the second 64 bits by 1 in this test. It should return true
* if the compare exchange operation is successful.
* This test repeats 128 bits compare and swap operations N rounds. In each
* iteration it runs compare and swap operation with different memory models.
*/
static int
test_atomic128_cmp_exchange(__rte_unused void *arg)
{
rte_int128_t expected;
int success;
unsigned int i;
while (rte_atomic32_read(&synchro) == 0)
;
expected = count128;
for (i = 0; i < N; i++) {
do {
rte_int128_t desired;
desired.val[0] = expected.val[0] + 2;
desired.val[1] = expected.val[1] + 1;
success = rte_atomic128_cmp_exchange(&count128,
&expected, &desired, 1,
__ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
} while (success == 0);
do {
rte_int128_t desired;
desired.val[0] = expected.val[0] + 2;
desired.val[1] = expected.val[1] + 1;
success = rte_atomic128_cmp_exchange(&count128,
&expected, &desired, 1,
__ATOMIC_RELEASE, __ATOMIC_RELAXED);
} while (success == 0);
do {
rte_int128_t desired;
desired.val[0] = expected.val[0] + 2;
desired.val[1] = expected.val[1] + 1;
success = rte_atomic128_cmp_exchange(&count128,
&expected, &desired, 1,
__ATOMIC_ACQ_REL, __ATOMIC_RELAXED);
} while (success == 0);
do {
rte_int128_t desired;
desired.val[0] = expected.val[0] + 2;
desired.val[1] = expected.val[1] + 1;
success = rte_atomic128_cmp_exchange(&count128,
&expected, &desired, 1,
__ATOMIC_RELAXED, __ATOMIC_RELAXED);
} while (success == 0);
}
return 0;
}
#endif
/*
* Helper definitions/variables/functions for
* atomic exchange tests
*/
typedef union {
uint16_t u16;
uint8_t u8[2];
} test16_t;
typedef union {
uint32_t u32;
uint16_t u16[2];
uint8_t u8[4];
} test32_t;
typedef union {
uint64_t u64;
uint32_t u32[2];
uint16_t u16[4];
uint8_t u8[8];
} test64_t;
const uint8_t CRC8_POLY = 0x91;
uint8_t crc8_table[256];
volatile uint16_t token16;
volatile uint32_t token32;
volatile uint64_t token64;
static void
build_crc8_table(void)
{
uint8_t val;
int i, j;
for (i = 0; i < 256; i++) {
val = i;
for (j = 0; j < 8; j++) {
if (val & 1)
val ^= CRC8_POLY;
val >>= 1;
}
crc8_table[i] = val;
}
}
static uint8_t
get_crc8(uint8_t *message, int length)
{
uint8_t crc = 0;
int i;
for (i = 0; i < length; i++)
crc = crc8_table[crc ^ message[i]];
return crc;
}
/*
* The atomic exchange test sets up a token in memory and
* then spins up multiple lcores whose job is to generate
* new tokens, exchange that new token for the old one held
* in memory, and then verify that the old token is still
* valid (i.e. the exchange did not corrupt the token).
*
* A token is made up of random data and 8 bits of crc
* covering that random data. The following is an example
* of a 64bit token.
*
* +------------+------------+
* | 63 56 | 55 0 |
* +------------+------------+
* | CRC8 | Data |
* +------------+------------+
*/
static int
test_atomic_exchange(__rte_unused void *arg)
{
int i;
test16_t nt16, ot16; /* new token, old token */
test32_t nt32, ot32;
test64_t nt64, ot64;
/* Wait until all of the other threads have been dispatched */
while (rte_atomic32_read(&synchro) == 0)
;
/*
* Let the battle begin! Every thread attempts to steal the current
* token with an atomic exchange operation and install its own newly
* generated token. If the old token is valid (i.e. it has the
* appropriate crc32 hash for the data) then the test iteration has
* passed. If the token is invalid, increment the counter.
*/
for (i = 0; i < N; i++) {
/* Test 64bit Atomic Exchange */
nt64.u64 = rte_rand();
nt64.u8[7] = get_crc8(&nt64.u8[0], sizeof(nt64) - 1);
ot64.u64 = rte_atomic64_exchange(&token64, nt64.u64);
if (ot64.u8[7] != get_crc8(&ot64.u8[0], sizeof(ot64) - 1))
rte_atomic64_inc(&count);
/* Test 32bit Atomic Exchange */
nt32.u32 = (uint32_t)rte_rand();
nt32.u8[3] = get_crc8(&nt32.u8[0], sizeof(nt32) - 1);
ot32.u32 = rte_atomic32_exchange(&token32, nt32.u32);
if (ot32.u8[3] != get_crc8(&ot32.u8[0], sizeof(ot32) - 1))
rte_atomic64_inc(&count);
/* Test 16bit Atomic Exchange */
nt16.u16 = (uint16_t)rte_rand();
nt16.u8[1] = get_crc8(&nt16.u8[0], sizeof(nt16) - 1);
ot16.u16 = rte_atomic16_exchange(&token16, nt16.u16);
if (ot16.u8[1] != get_crc8(&ot16.u8[0], sizeof(ot16) - 1))
rte_atomic64_inc(&count);
}
return 0;
}
static int
test_atomic(void)
{
rte_atomic16_init(&a16);
rte_atomic32_init(&a32);
rte_atomic64_init(&a64);
rte_atomic64_init(&count);
rte_atomic32_init(&synchro);
rte_atomic16_set(&a16, 1UL << 10);
rte_atomic32_set(&a32, 1UL << 10);
rte_atomic64_set(&a64, 1ULL << 33);
printf("usual inc/dec/add/sub functions\n");
rte_eal_mp_remote_launch(test_atomic_usual, NULL, SKIP_MAIN);
rte_atomic32_set(&synchro, 1);
rte_eal_mp_wait_lcore();
rte_atomic32_set(&synchro, 0);
if (rte_atomic16_read(&a16) != 1UL << 10) {
printf("Atomic16 usual functions failed\n");
return -1;
}
if (rte_atomic32_read(&a32) != 1UL << 10) {
printf("Atomic32 usual functions failed\n");
return -1;
}
if (rte_atomic64_read(&a64) != 1ULL << 33) {
printf("Atomic64 usual functions failed\n");
return -1;
}
printf("test and set\n");
rte_atomic64_set(&a64, 0);
rte_atomic32_set(&a32, 0);
rte_atomic16_set(&a16, 0);
rte_atomic64_set(&count, 0);
rte_eal_mp_remote_launch(test_atomic_tas, NULL, SKIP_MAIN);
rte_atomic32_set(&synchro, 1);
rte_eal_mp_wait_lcore();
rte_atomic32_set(&synchro, 0);
if (rte_atomic64_read(&count) != NUM_ATOMIC_TYPES) {
printf("Atomic test and set failed\n");
return -1;
}
printf("add/sub and return\n");
rte_atomic64_set(&a64, 0);
rte_atomic32_set(&a32, 0);
rte_atomic16_set(&a16, 0);
rte_atomic64_set(&count, 0);
rte_eal_mp_remote_launch(test_atomic_addsub_and_return, NULL,
SKIP_MAIN);
rte_atomic32_set(&synchro, 1);
rte_eal_mp_wait_lcore();
rte_atomic32_set(&synchro, 0);
if (rte_atomic64_read(&count) != 0) {
printf("Atomic add/sub+return failed\n");
return -1;
}
/*
* Set a64, a32 and a16 with the same value of minus "number of worker
* lcores", launch all worker lcores to atomically increase by one and
* test them respectively.
* Each lcore should have only one chance to increase a64 by one and
* then check if it is equal to 0, but there should be only one lcore
* that finds that it is 0. It is similar for a32 and a16.
* Then a variable of "count", initialized to zero, is increased by
* one if a64, a32 or a16 is 0 after being increased and tested
* atomically.
* We can check if "count" is finally equal to 3 to see if all worker
* lcores performed "atomic inc and test" right.
*/
printf("inc and test\n");
rte_atomic64_clear(&a64);
rte_atomic32_clear(&a32);
rte_atomic16_clear(&a16);
rte_atomic32_clear(&synchro);
rte_atomic64_clear(&count);
rte_atomic64_set(&a64, (int64_t)(1 - (int64_t)rte_lcore_count()));
rte_atomic32_set(&a32, (int32_t)(1 - (int32_t)rte_lcore_count()));
rte_atomic16_set(&a16, (int16_t)(1 - (int16_t)rte_lcore_count()));
rte_eal_mp_remote_launch(test_atomic_inc_and_test, NULL, SKIP_MAIN);
rte_atomic32_set(&synchro, 1);
rte_eal_mp_wait_lcore();
rte_atomic32_clear(&synchro);
if (rte_atomic64_read(&count) != NUM_ATOMIC_TYPES) {
printf("Atomic inc and test failed %d\n", (int)count.cnt);
return -1;
}
/*
* Same as above, but this time we set the values to "number of worker
* lcores", and decrement instead of increment.
*/
printf("dec and test\n");
rte_atomic32_clear(&synchro);
rte_atomic64_clear(&count);
rte_atomic64_set(&a64, (int64_t)(rte_lcore_count() - 1));
rte_atomic32_set(&a32, (int32_t)(rte_lcore_count() - 1));
rte_atomic16_set(&a16, (int16_t)(rte_lcore_count() - 1));
rte_eal_mp_remote_launch(test_atomic_dec_and_test, NULL, SKIP_MAIN);
rte_atomic32_set(&synchro, 1);
rte_eal_mp_wait_lcore();
rte_atomic32_clear(&synchro);
if (rte_atomic64_read(&count) != NUM_ATOMIC_TYPES) {
printf("Atomic dec and test failed\n");
return -1;
}
#if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_ARM64)
/*
* This case tests the functionality of rte_atomic128_cmp_exchange
* API. It calls rte_atomic128_cmp_exchange with four kinds of memory
* models successively on each worker core. Once each 128-bit atomic
* compare and swap operation is successful, it updates the global
* 128-bit counter by 2 for the first 64-bit and 1 for the second
* 64-bit. Each worker core iterates this test N times.
* At the end of test, verify whether the first 64-bits of the 128-bit
* counter and the second 64bits is differ by the total iterations. If
* it is, the test passes.
*/
printf("128-bit compare and swap test\n");
uint64_t iterations = 0;
rte_atomic32_clear(&synchro);
count128.val[0] = 0;
count128.val[1] = 0;
rte_eal_mp_remote_launch(test_atomic128_cmp_exchange, NULL,
SKIP_MAIN);
rte_atomic32_set(&synchro, 1);
rte_eal_mp_wait_lcore();
rte_atomic32_clear(&synchro);
iterations = count128.val[0] - count128.val[1];
if (iterations != 4*N*(rte_lcore_count()-1)) {
printf("128-bit compare and swap failed\n");
return -1;
}
#endif
/*
* Test 16/32/64bit atomic exchange.
*/
test64_t t;
printf("exchange test\n");
rte_atomic32_clear(&synchro);
rte_atomic64_clear(&count);
/* Generate the CRC8 lookup table */
build_crc8_table();
/* Create the initial tokens used by the test */
t.u64 = rte_rand();
token16 = (get_crc8(&t.u8[0], sizeof(token16) - 1) << 8)
| (t.u16[0] & 0x00ff);
token32 = ((uint32_t)get_crc8(&t.u8[0], sizeof(token32) - 1) << 24)
| (t.u32[0] & 0x00ffffff);
token64 = ((uint64_t)get_crc8(&t.u8[0], sizeof(token64) - 1) << 56)
| (t.u64 & 0x00ffffffffffffff);
rte_eal_mp_remote_launch(test_atomic_exchange, NULL, SKIP_MAIN);
rte_atomic32_set(&synchro, 1);
rte_eal_mp_wait_lcore();
rte_atomic32_clear(&synchro);
if (rte_atomic64_read(&count) > 0) {
printf("Atomic exchange test failed\n");
return -1;
}
return 0;
}
REGISTER_TEST_COMMAND(atomic_autotest, test_atomic);