numam-dpdk/app/test/test_atomic.c
David Christensen 17a042376b test/atomic: fix 128-bit atomic test with many cores
When checking the results of the rte_atomic128_cmp_exchange() function,
current code compares the values of a uint32_t and a uint64_t variable.
If the number of lcores used by the test is large, or the value of the
iteration count N is increased, the variable size mismatch can cause a
false test failure.  Modify the comparison to compare uint64_t values.

Fixes: fa3253c534b1 ("test/atomic: add 128-bit atomic compare exchange test")
Cc: stable@dpdk.org

Signed-off-by: David Christensen <drc@linux.vnet.ibm.com>
Tested-by: Ruifeng Wang <ruifeng.wang@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
2021-10-14 17:20:49 +02:00

635 lines
17 KiB
C

/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(c) 2010-2014 Intel Corporation
* Copyright(c) 2019 Arm Limited
*/
#include <stdio.h>
#include <stdint.h>
#include <unistd.h>
#include <inttypes.h>
#include <sys/queue.h>
#include <rte_memory.h>
#include <rte_per_lcore.h>
#include <rte_launch.h>
#include <rte_atomic.h>
#include <rte_eal.h>
#include <rte_lcore.h>
#include <rte_random.h>
#include <rte_hash_crc.h>
#include "test.h"
/*
* Atomic Variables
* ================
*
* - The main test function performs several subtests. The first
* checks that the usual inc/dec/add/sub functions are working
* correctly:
*
* - Initialize 16-bit, 32-bit and 64-bit atomic variables to specific
* values.
*
* - These variables are incremented and decremented on each core at
* the same time in ``test_atomic_usual()``.
*
* - The function checks that once all lcores finish their function,
* the value of the atomic variables are still the same.
*
* - Test "test and set" functions.
*
* - Initialize 16-bit, 32-bit and 64-bit atomic variables to zero.
*
* - Invoke ``test_atomic_tas()`` on each lcore: before doing anything
* else. The cores are waiting a synchro using ``while
* (rte_atomic32_read(&val) == 0)`` which is triggered by the main test
* function. Then all cores do a
* ``rte_atomicXX_test_and_set()`` at the same time. If it is successful,
* it increments another atomic counter.
*
* - The main function checks that the atomic counter was incremented
* twice only (one for 16-bit, one for 32-bit and one for 64-bit values).
*
* - Test "add/sub and return" functions
*
* - Initialize 16-bit, 32-bit and 64-bit atomic variables to zero.
*
* - Invoke ``test_atomic_addsub_return()`` on each lcore. Before doing
* anything else, the cores are waiting a synchro. Each lcore does
* this operation several times::
*
* tmp = rte_atomicXX_add_return(&a, 1);
* atomic_add(&count, tmp);
* tmp = rte_atomicXX_sub_return(&a, 1);
* atomic_sub(&count, tmp+1);
*
* - At the end of the test, the *count* value must be 0.
*
* - Test "128-bit compare and swap" (aarch64 and x86_64 only)
*
* - Initialize 128-bit atomic variables to zero.
*
* - Invoke ``test_atomic128_cmp_exchange()`` on each lcore. Before doing
* anything else, the cores are waiting a synchro. Each lcore does
* these compare and swap (CAS) operations several times::
*
* Acquired CAS update counter.val[0] + 2; counter.val[1] + 1;
* Released CAS update counter.val[0] + 2; counter.val[1] + 1;
* Acquired_Released CAS update counter.val[0] + 2; counter.val[1] + 1;
* Relaxed CAS update counter.val[0] + 2; counter.val[1] + 1;
*
* - At the end of the test, the *count128* first 64-bit value and
* second 64-bit value differ by the total iterations.
*
* - Test "atomic exchange" functions
*
* - Create a 64 bit token that can be tested for data integrity
*
* - Invoke ``test_atomic_exchange`` on each lcore. Before doing
* anything else, the cores wait for a synchronization event.
* Each core then does the follwoing for N iterations:
*
* Generate a new token with a data integrity check
* Exchange the new token for previously generated token
* Increment a counter if a corrupt token was received
*
* - At the end of the test, the number of corrupted tokens must be 0.
*/
#define NUM_ATOMIC_TYPES 3
#define N 1000000
static rte_atomic16_t a16;
static rte_atomic32_t a32;
static rte_atomic64_t a64;
static rte_atomic64_t count;
static rte_atomic32_t synchro;
static int
test_atomic_usual(__rte_unused void *arg)
{
unsigned i;
while (rte_atomic32_read(&synchro) == 0)
;
for (i = 0; i < N; i++)
rte_atomic16_inc(&a16);
for (i = 0; i < N; i++)
rte_atomic16_dec(&a16);
for (i = 0; i < (N / 5); i++)
rte_atomic16_add(&a16, 5);
for (i = 0; i < (N / 5); i++)
rte_atomic16_sub(&a16, 5);
for (i = 0; i < N; i++)
rte_atomic32_inc(&a32);
for (i = 0; i < N; i++)
rte_atomic32_dec(&a32);
for (i = 0; i < (N / 5); i++)
rte_atomic32_add(&a32, 5);
for (i = 0; i < (N / 5); i++)
rte_atomic32_sub(&a32, 5);
for (i = 0; i < N; i++)
rte_atomic64_inc(&a64);
for (i = 0; i < N; i++)
rte_atomic64_dec(&a64);
for (i = 0; i < (N / 5); i++)
rte_atomic64_add(&a64, 5);
for (i = 0; i < (N / 5); i++)
rte_atomic64_sub(&a64, 5);
return 0;
}
static int
test_atomic_tas(__rte_unused void *arg)
{
while (rte_atomic32_read(&synchro) == 0)
;
if (rte_atomic16_test_and_set(&a16))
rte_atomic64_inc(&count);
if (rte_atomic32_test_and_set(&a32))
rte_atomic64_inc(&count);
if (rte_atomic64_test_and_set(&a64))
rte_atomic64_inc(&count);
return 0;
}
static int
test_atomic_addsub_and_return(__rte_unused void *arg)
{
uint32_t tmp16;
uint32_t tmp32;
uint64_t tmp64;
unsigned i;
while (rte_atomic32_read(&synchro) == 0)
;
for (i = 0; i < N; i++) {
tmp16 = rte_atomic16_add_return(&a16, 1);
rte_atomic64_add(&count, tmp16);
tmp16 = rte_atomic16_sub_return(&a16, 1);
rte_atomic64_sub(&count, tmp16+1);
tmp32 = rte_atomic32_add_return(&a32, 1);
rte_atomic64_add(&count, tmp32);
tmp32 = rte_atomic32_sub_return(&a32, 1);
rte_atomic64_sub(&count, tmp32+1);
tmp64 = rte_atomic64_add_return(&a64, 1);
rte_atomic64_add(&count, tmp64);
tmp64 = rte_atomic64_sub_return(&a64, 1);
rte_atomic64_sub(&count, tmp64+1);
}
return 0;
}
/*
* rte_atomic32_inc_and_test() would increase a 32 bits counter by one and then
* test if that counter is equal to 0. It would return true if the counter is 0
* and false if the counter is not 0. rte_atomic64_inc_and_test() could do the
* same thing but for a 64 bits counter.
* Here checks that if the 32/64 bits counter is equal to 0 after being atomically
* increased by one. If it is, increase the variable of "count" by one which would
* be checked as the result later.
*
*/
static int
test_atomic_inc_and_test(__rte_unused void *arg)
{
while (rte_atomic32_read(&synchro) == 0)
;
if (rte_atomic16_inc_and_test(&a16)) {
rte_atomic64_inc(&count);
}
if (rte_atomic32_inc_and_test(&a32)) {
rte_atomic64_inc(&count);
}
if (rte_atomic64_inc_and_test(&a64)) {
rte_atomic64_inc(&count);
}
return 0;
}
/*
* rte_atomicXX_dec_and_test() should decrease a 32 bits counter by one and then
* test if that counter is equal to 0. It should return true if the counter is 0
* and false if the counter is not 0.
* This test checks if the counter is equal to 0 after being atomically
* decreased by one. If it is, increase the value of "count" by one which is to
* be checked as the result later.
*/
static int
test_atomic_dec_and_test(__rte_unused void *arg)
{
while (rte_atomic32_read(&synchro) == 0)
;
if (rte_atomic16_dec_and_test(&a16))
rte_atomic64_inc(&count);
if (rte_atomic32_dec_and_test(&a32))
rte_atomic64_inc(&count);
if (rte_atomic64_dec_and_test(&a64))
rte_atomic64_inc(&count);
return 0;
}
#if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_ARM64)
static rte_int128_t count128;
/*
* rte_atomic128_cmp_exchange() should update a 128 bits counter's first 64
* bits by 2 and the second 64 bits by 1 in this test. It should return true
* if the compare exchange operation is successful.
* This test repeats 128 bits compare and swap operations N rounds. In each
* iteration it runs compare and swap operation with different memory models.
*/
static int
test_atomic128_cmp_exchange(__rte_unused void *arg)
{
rte_int128_t expected;
int success;
unsigned int i;
while (rte_atomic32_read(&synchro) == 0)
;
expected = count128;
for (i = 0; i < N; i++) {
do {
rte_int128_t desired;
desired.val[0] = expected.val[0] + 2;
desired.val[1] = expected.val[1] + 1;
success = rte_atomic128_cmp_exchange(&count128,
&expected, &desired, 1,
__ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
} while (success == 0);
do {
rte_int128_t desired;
desired.val[0] = expected.val[0] + 2;
desired.val[1] = expected.val[1] + 1;
success = rte_atomic128_cmp_exchange(&count128,
&expected, &desired, 1,
__ATOMIC_RELEASE, __ATOMIC_RELAXED);
} while (success == 0);
do {
rte_int128_t desired;
desired.val[0] = expected.val[0] + 2;
desired.val[1] = expected.val[1] + 1;
success = rte_atomic128_cmp_exchange(&count128,
&expected, &desired, 1,
__ATOMIC_ACQ_REL, __ATOMIC_RELAXED);
} while (success == 0);
do {
rte_int128_t desired;
desired.val[0] = expected.val[0] + 2;
desired.val[1] = expected.val[1] + 1;
success = rte_atomic128_cmp_exchange(&count128,
&expected, &desired, 1,
__ATOMIC_RELAXED, __ATOMIC_RELAXED);
} while (success == 0);
}
return 0;
}
#endif
/*
* Helper definitions/variables/functions for
* atomic exchange tests
*/
typedef union {
uint16_t u16;
uint8_t u8[2];
} test16_t;
typedef union {
uint32_t u32;
uint16_t u16[2];
uint8_t u8[4];
} test32_t;
typedef union {
uint64_t u64;
uint32_t u32[2];
uint16_t u16[4];
uint8_t u8[8];
} test64_t;
const uint8_t CRC8_POLY = 0x91;
uint8_t crc8_table[256];
volatile uint16_t token16;
volatile uint32_t token32;
volatile uint64_t token64;
static void
build_crc8_table(void)
{
uint8_t val;
int i, j;
for (i = 0; i < 256; i++) {
val = i;
for (j = 0; j < 8; j++) {
if (val & 1)
val ^= CRC8_POLY;
val >>= 1;
}
crc8_table[i] = val;
}
}
static uint8_t
get_crc8(uint8_t *message, int length)
{
uint8_t crc = 0;
int i;
for (i = 0; i < length; i++)
crc = crc8_table[crc ^ message[i]];
return crc;
}
/*
* The atomic exchange test sets up a token in memory and
* then spins up multiple lcores whose job is to generate
* new tokens, exchange that new token for the old one held
* in memory, and then verify that the old token is still
* valid (i.e. the exchange did not corrupt the token).
*
* A token is made up of random data and 8 bits of crc
* covering that random data. The following is an example
* of a 64bit token.
*
* +------------+------------+
* | 63 56 | 55 0 |
* +------------+------------+
* | CRC8 | Data |
* +------------+------------+
*/
static int
test_atomic_exchange(__rte_unused void *arg)
{
int i;
test16_t nt16, ot16; /* new token, old token */
test32_t nt32, ot32;
test64_t nt64, ot64;
/* Wait until all of the other threads have been dispatched */
while (rte_atomic32_read(&synchro) == 0)
;
/*
* Let the battle begin! Every thread attempts to steal the current
* token with an atomic exchange operation and install its own newly
* generated token. If the old token is valid (i.e. it has the
* appropriate crc32 hash for the data) then the test iteration has
* passed. If the token is invalid, increment the counter.
*/
for (i = 0; i < N; i++) {
/* Test 64bit Atomic Exchange */
nt64.u64 = rte_rand();
nt64.u8[7] = get_crc8(&nt64.u8[0], sizeof(nt64) - 1);
ot64.u64 = rte_atomic64_exchange(&token64, nt64.u64);
if (ot64.u8[7] != get_crc8(&ot64.u8[0], sizeof(ot64) - 1))
rte_atomic64_inc(&count);
/* Test 32bit Atomic Exchange */
nt32.u32 = (uint32_t)rte_rand();
nt32.u8[3] = get_crc8(&nt32.u8[0], sizeof(nt32) - 1);
ot32.u32 = rte_atomic32_exchange(&token32, nt32.u32);
if (ot32.u8[3] != get_crc8(&ot32.u8[0], sizeof(ot32) - 1))
rte_atomic64_inc(&count);
/* Test 16bit Atomic Exchange */
nt16.u16 = (uint16_t)rte_rand();
nt16.u8[1] = get_crc8(&nt16.u8[0], sizeof(nt16) - 1);
ot16.u16 = rte_atomic16_exchange(&token16, nt16.u16);
if (ot16.u8[1] != get_crc8(&ot16.u8[0], sizeof(ot16) - 1))
rte_atomic64_inc(&count);
}
return 0;
}
static int
test_atomic(void)
{
rte_atomic16_init(&a16);
rte_atomic32_init(&a32);
rte_atomic64_init(&a64);
rte_atomic64_init(&count);
rte_atomic32_init(&synchro);
rte_atomic16_set(&a16, 1UL << 10);
rte_atomic32_set(&a32, 1UL << 10);
rte_atomic64_set(&a64, 1ULL << 33);
printf("usual inc/dec/add/sub functions\n");
rte_eal_mp_remote_launch(test_atomic_usual, NULL, SKIP_MAIN);
rte_atomic32_set(&synchro, 1);
rte_eal_mp_wait_lcore();
rte_atomic32_set(&synchro, 0);
if (rte_atomic16_read(&a16) != 1UL << 10) {
printf("Atomic16 usual functions failed\n");
return -1;
}
if (rte_atomic32_read(&a32) != 1UL << 10) {
printf("Atomic32 usual functions failed\n");
return -1;
}
if (rte_atomic64_read(&a64) != 1ULL << 33) {
printf("Atomic64 usual functions failed\n");
return -1;
}
printf("test and set\n");
rte_atomic64_set(&a64, 0);
rte_atomic32_set(&a32, 0);
rte_atomic16_set(&a16, 0);
rte_atomic64_set(&count, 0);
rte_eal_mp_remote_launch(test_atomic_tas, NULL, SKIP_MAIN);
rte_atomic32_set(&synchro, 1);
rte_eal_mp_wait_lcore();
rte_atomic32_set(&synchro, 0);
if (rte_atomic64_read(&count) != NUM_ATOMIC_TYPES) {
printf("Atomic test and set failed\n");
return -1;
}
printf("add/sub and return\n");
rte_atomic64_set(&a64, 0);
rte_atomic32_set(&a32, 0);
rte_atomic16_set(&a16, 0);
rte_atomic64_set(&count, 0);
rte_eal_mp_remote_launch(test_atomic_addsub_and_return, NULL,
SKIP_MAIN);
rte_atomic32_set(&synchro, 1);
rte_eal_mp_wait_lcore();
rte_atomic32_set(&synchro, 0);
if (rte_atomic64_read(&count) != 0) {
printf("Atomic add/sub+return failed\n");
return -1;
}
/*
* Set a64, a32 and a16 with the same value of minus "number of worker
* lcores", launch all worker lcores to atomically increase by one and
* test them respectively.
* Each lcore should have only one chance to increase a64 by one and
* then check if it is equal to 0, but there should be only one lcore
* that finds that it is 0. It is similar for a32 and a16.
* Then a variable of "count", initialized to zero, is increased by
* one if a64, a32 or a16 is 0 after being increased and tested
* atomically.
* We can check if "count" is finally equal to 3 to see if all worker
* lcores performed "atomic inc and test" right.
*/
printf("inc and test\n");
rte_atomic64_clear(&a64);
rte_atomic32_clear(&a32);
rte_atomic16_clear(&a16);
rte_atomic32_clear(&synchro);
rte_atomic64_clear(&count);
rte_atomic64_set(&a64, (int64_t)(1 - (int64_t)rte_lcore_count()));
rte_atomic32_set(&a32, (int32_t)(1 - (int32_t)rte_lcore_count()));
rte_atomic16_set(&a16, (int16_t)(1 - (int16_t)rte_lcore_count()));
rte_eal_mp_remote_launch(test_atomic_inc_and_test, NULL, SKIP_MAIN);
rte_atomic32_set(&synchro, 1);
rte_eal_mp_wait_lcore();
rte_atomic32_clear(&synchro);
if (rte_atomic64_read(&count) != NUM_ATOMIC_TYPES) {
printf("Atomic inc and test failed %d\n", (int)count.cnt);
return -1;
}
/*
* Same as above, but this time we set the values to "number of worker
* lcores", and decrement instead of increment.
*/
printf("dec and test\n");
rte_atomic32_clear(&synchro);
rte_atomic64_clear(&count);
rte_atomic64_set(&a64, (int64_t)(rte_lcore_count() - 1));
rte_atomic32_set(&a32, (int32_t)(rte_lcore_count() - 1));
rte_atomic16_set(&a16, (int16_t)(rte_lcore_count() - 1));
rte_eal_mp_remote_launch(test_atomic_dec_and_test, NULL, SKIP_MAIN);
rte_atomic32_set(&synchro, 1);
rte_eal_mp_wait_lcore();
rte_atomic32_clear(&synchro);
if (rte_atomic64_read(&count) != NUM_ATOMIC_TYPES) {
printf("Atomic dec and test failed\n");
return -1;
}
#if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_ARM64)
/*
* This case tests the functionality of rte_atomic128_cmp_exchange
* API. It calls rte_atomic128_cmp_exchange with four kinds of memory
* models successively on each worker core. Once each 128-bit atomic
* compare and swap operation is successful, it updates the global
* 128-bit counter by 2 for the first 64-bit and 1 for the second
* 64-bit. Each worker core iterates this test N times.
* At the end of test, verify whether the first 64-bits of the 128-bit
* counter and the second 64bits is differ by the total iterations. If
* it is, the test passes.
*/
printf("128-bit compare and swap test\n");
uint64_t iterations = 0;
rte_atomic32_clear(&synchro);
count128.val[0] = 0;
count128.val[1] = 0;
rte_eal_mp_remote_launch(test_atomic128_cmp_exchange, NULL,
SKIP_MAIN);
rte_atomic32_set(&synchro, 1);
rte_eal_mp_wait_lcore();
rte_atomic32_clear(&synchro);
iterations = count128.val[0] - count128.val[1];
if (iterations != (uint64_t)4*N*(rte_lcore_count()-1)) {
printf("128-bit compare and swap failed\n");
return -1;
}
#endif
/*
* Test 16/32/64bit atomic exchange.
*/
test64_t t;
printf("exchange test\n");
rte_atomic32_clear(&synchro);
rte_atomic64_clear(&count);
/* Generate the CRC8 lookup table */
build_crc8_table();
/* Create the initial tokens used by the test */
t.u64 = rte_rand();
token16 = (get_crc8(&t.u8[0], sizeof(token16) - 1) << 8)
| (t.u16[0] & 0x00ff);
token32 = ((uint32_t)get_crc8(&t.u8[0], sizeof(token32) - 1) << 24)
| (t.u32[0] & 0x00ffffff);
token64 = ((uint64_t)get_crc8(&t.u8[0], sizeof(token64) - 1) << 56)
| (t.u64 & 0x00ffffffffffffff);
rte_eal_mp_remote_launch(test_atomic_exchange, NULL, SKIP_MAIN);
rte_atomic32_set(&synchro, 1);
rte_eal_mp_wait_lcore();
rte_atomic32_clear(&synchro);
if (rte_atomic64_read(&count) > 0) {
printf("Atomic exchange test failed\n");
return -1;
}
return 0;
}
REGISTER_TEST_COMMAND(atomic_autotest, test_atomic);