numam-dpdk/app/test/test_rwlock.c
Joyce Kong 6fef1ae4fc test/rwlock: amortize the cost of getting time
Instead of getting timestamp per iteration, amortize its
overhead can help to get more precise benchmarking results.

Fixes: af75078fece3 ("first public release")
Cc: stable@dpdk.org

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
2019-03-28 11:49:36 +01:00

550 lines
12 KiB
C

/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(c) 2010-2014 Intel Corporation
*/
#include <stdio.h>
#include <stdint.h>
#include <inttypes.h>
#include <unistd.h>
#include <sys/queue.h>
#include <string.h>
#include <rte_common.h>
#include <rte_memory.h>
#include <rte_per_lcore.h>
#include <rte_launch.h>
#include <rte_atomic.h>
#include <rte_rwlock.h>
#include <rte_eal.h>
#include <rte_lcore.h>
#include <rte_cycles.h>
#include "test.h"
/*
* rwlock test
* ===========
* Provides UT for rte_rwlock API.
* Main concern is on functional testing, but also provides some
* performance measurements.
* Obviously for proper testing need to be executed with more than one lcore.
*/
#define ITER_NUM 0x80
#define TEST_SEC 5
static rte_rwlock_t sl;
static rte_rwlock_t sl_tab[RTE_MAX_LCORE];
static rte_atomic32_t synchro;
enum {
LC_TYPE_RDLOCK,
LC_TYPE_WRLOCK,
};
static struct {
rte_rwlock_t lock;
uint64_t tick;
volatile union {
uint8_t u8[RTE_CACHE_LINE_SIZE];
uint64_t u64[RTE_CACHE_LINE_SIZE / sizeof(uint64_t)];
} data;
} __rte_cache_aligned try_rwlock_data;
struct try_rwlock_lcore {
int32_t rc;
int32_t type;
struct {
uint64_t tick;
uint64_t fail;
uint64_t success;
} stat;
} __rte_cache_aligned;
static struct try_rwlock_lcore try_lcore_data[RTE_MAX_LCORE];
static int
test_rwlock_per_core(__attribute__((unused)) void *arg)
{
rte_rwlock_write_lock(&sl);
printf("Global write lock taken on core %u\n", rte_lcore_id());
rte_rwlock_write_unlock(&sl);
rte_rwlock_write_lock(&sl_tab[rte_lcore_id()]);
printf("Hello from core %u !\n", rte_lcore_id());
rte_rwlock_write_unlock(&sl_tab[rte_lcore_id()]);
rte_rwlock_read_lock(&sl);
printf("Global read lock taken on core %u\n", rte_lcore_id());
rte_delay_ms(100);
printf("Release global read lock on core %u\n", rte_lcore_id());
rte_rwlock_read_unlock(&sl);
return 0;
}
static rte_rwlock_t lk = RTE_RWLOCK_INITIALIZER;
static volatile uint64_t rwlock_data;
static uint64_t time_count[RTE_MAX_LCORE] = {0};
#define MAX_LOOP 10000
#define TEST_RWLOCK_DEBUG 0
static int
load_loop_fn(__attribute__((unused)) void *arg)
{
uint64_t time_diff = 0, begin;
uint64_t hz = rte_get_timer_hz();
uint64_t lcount = 0;
const unsigned int lcore = rte_lcore_id();
/* wait synchro for slaves */
if (lcore != rte_get_master_lcore())
while (rte_atomic32_read(&synchro) == 0)
;
begin = rte_rdtsc_precise();
while (lcount < MAX_LOOP) {
rte_rwlock_write_lock(&lk);
++rwlock_data;
rte_rwlock_write_unlock(&lk);
rte_rwlock_read_lock(&lk);
if (TEST_RWLOCK_DEBUG && !(lcount % 100))
printf("Core [%u] rwlock_data = %"PRIu64"\n",
lcore, rwlock_data);
rte_rwlock_read_unlock(&lk);
lcount++;
/* delay to make lock duty cycle slightly realistic */
rte_pause();
}
time_diff = rte_rdtsc_precise() - begin;
time_count[lcore] = time_diff * 1000000 / hz;
return 0;
}
static int
test_rwlock_perf(void)
{
unsigned int i;
uint64_t total = 0;
printf("\nRwlock Perf Test on %u cores...\n", rte_lcore_count());
/* clear synchro and start slaves */
rte_atomic32_set(&synchro, 0);
if (rte_eal_mp_remote_launch(load_loop_fn, NULL, SKIP_MASTER) < 0)
return -1;
/* start synchro and launch test on master */
rte_atomic32_set(&synchro, 1);
load_loop_fn(NULL);
rte_eal_mp_wait_lcore();
RTE_LCORE_FOREACH(i) {
printf("Core [%u] cost time = %"PRIu64" us\n",
i, time_count[i]);
total += time_count[i];
}
printf("Total cost time = %"PRIu64" us\n", total);
memset(time_count, 0, sizeof(time_count));
return 0;
}
/*
* - There is a global rwlock and a table of rwlocks (one per lcore).
*
* - The test function takes all of these locks and launches the
* ``test_rwlock_per_core()`` function on each core (except the master).
*
* - The function takes the global write lock, display something,
* then releases the global lock.
* - Then, it takes the per-lcore write lock, display something, and
* releases the per-core lock.
* - Finally, a read lock is taken during 100 ms, then released.
*
* - The main function unlocks the per-lcore locks sequentially and
* waits between each lock. This triggers the display of a message
* for each core, in the correct order.
*
* Then, it tries to take the global write lock and display the last
* message. The autotest script checks that the message order is correct.
*/
static int
rwlock_test1(void)
{
int i;
rte_rwlock_init(&sl);
for (i=0; i<RTE_MAX_LCORE; i++)
rte_rwlock_init(&sl_tab[i]);
rte_rwlock_write_lock(&sl);
RTE_LCORE_FOREACH_SLAVE(i) {
rte_rwlock_write_lock(&sl_tab[i]);
rte_eal_remote_launch(test_rwlock_per_core, NULL, i);
}
rte_rwlock_write_unlock(&sl);
RTE_LCORE_FOREACH_SLAVE(i) {
rte_rwlock_write_unlock(&sl_tab[i]);
rte_delay_ms(100);
}
rte_rwlock_write_lock(&sl);
/* this message should be the last message of test */
printf("Global write lock taken on master core %u\n", rte_lcore_id());
rte_rwlock_write_unlock(&sl);
rte_eal_mp_wait_lcore();
if (test_rwlock_perf() < 0)
return -1;
return 0;
}
static int
try_read(uint32_t lc)
{
int32_t rc;
uint32_t i;
rc = rte_rwlock_read_trylock(&try_rwlock_data.lock);
if (rc != 0)
return rc;
for (i = 0; i != RTE_DIM(try_rwlock_data.data.u64); i++) {
/* race condition occurred, lock doesn't work properly */
if (try_rwlock_data.data.u64[i] != 0) {
printf("%s(%u) error: unexpected data pattern\n",
__func__, lc);
rte_memdump(stdout, NULL,
(void *)(uintptr_t)&try_rwlock_data.data,
sizeof(try_rwlock_data.data));
rc = -EFAULT;
break;
}
}
rte_rwlock_read_unlock(&try_rwlock_data.lock);
return rc;
}
static int
try_write(uint32_t lc)
{
int32_t rc;
uint32_t i, v;
v = RTE_MAX(lc % UINT8_MAX, 1U);
rc = rte_rwlock_write_trylock(&try_rwlock_data.lock);
if (rc != 0)
return rc;
/* update by bytes in reverese order */
for (i = RTE_DIM(try_rwlock_data.data.u8); i-- != 0; ) {
/* race condition occurred, lock doesn't work properly */
if (try_rwlock_data.data.u8[i] != 0) {
printf("%s:%d(%u) error: unexpected data pattern\n",
__func__, __LINE__, lc);
rte_memdump(stdout, NULL,
(void *)(uintptr_t)&try_rwlock_data.data,
sizeof(try_rwlock_data.data));
rc = -EFAULT;
break;
}
try_rwlock_data.data.u8[i] = v;
}
/* restore by bytes in reverese order */
for (i = RTE_DIM(try_rwlock_data.data.u8); i-- != 0; ) {
/* race condition occurred, lock doesn't work properly */
if (try_rwlock_data.data.u8[i] != v) {
printf("%s:%d(%u) error: unexpected data pattern\n",
__func__, __LINE__, lc);
rte_memdump(stdout, NULL,
(void *)(uintptr_t)&try_rwlock_data.data,
sizeof(try_rwlock_data.data));
rc = -EFAULT;
break;
}
try_rwlock_data.data.u8[i] = 0;
}
rte_rwlock_write_unlock(&try_rwlock_data.lock);
return rc;
}
static int
try_read_lcore(__rte_unused void *data)
{
int32_t rc;
uint32_t i, lc;
uint64_t ftm, stm, tm;
struct try_rwlock_lcore *lcd;
lc = rte_lcore_id();
lcd = try_lcore_data + lc;
lcd->type = LC_TYPE_RDLOCK;
ftm = try_rwlock_data.tick;
stm = rte_get_timer_cycles();
do {
for (i = 0; i != ITER_NUM; i++) {
rc = try_read(lc);
if (rc == 0)
lcd->stat.success++;
else if (rc == -EBUSY)
lcd->stat.fail++;
else
break;
rc = 0;
}
tm = rte_get_timer_cycles() - stm;
} while (tm < ftm && rc == 0);
lcd->rc = rc;
lcd->stat.tick = tm;
return rc;
}
static int
try_write_lcore(__rte_unused void *data)
{
int32_t rc;
uint32_t i, lc;
uint64_t ftm, stm, tm;
struct try_rwlock_lcore *lcd;
lc = rte_lcore_id();
lcd = try_lcore_data + lc;
lcd->type = LC_TYPE_WRLOCK;
ftm = try_rwlock_data.tick;
stm = rte_get_timer_cycles();
do {
for (i = 0; i != ITER_NUM; i++) {
rc = try_write(lc);
if (rc == 0)
lcd->stat.success++;
else if (rc == -EBUSY)
lcd->stat.fail++;
else
break;
rc = 0;
}
tm = rte_get_timer_cycles() - stm;
} while (tm < ftm && rc == 0);
lcd->rc = rc;
lcd->stat.tick = tm;
return rc;
}
static void
print_try_lcore_stats(const struct try_rwlock_lcore *tlc, uint32_t lc)
{
uint64_t f, s;
f = RTE_MAX(tlc->stat.fail, 1ULL);
s = RTE_MAX(tlc->stat.success, 1ULL);
printf("try_lcore_data[%u]={\n"
"\trc=%d,\n"
"\ttype=%s,\n"
"\tfail=%" PRIu64 ",\n"
"\tsuccess=%" PRIu64 ",\n"
"\tcycles=%" PRIu64 ",\n"
"\tcycles/op=%#Lf,\n"
"\tcycles/success=%#Lf,\n"
"\tsuccess/fail=%#Lf,\n"
"};\n",
lc,
tlc->rc,
tlc->type == LC_TYPE_RDLOCK ? "RDLOCK" : "WRLOCK",
tlc->stat.fail,
tlc->stat.success,
tlc->stat.tick,
(long double)tlc->stat.tick /
(tlc->stat.fail + tlc->stat.success),
(long double)tlc->stat.tick / s,
(long double)tlc->stat.success / f);
}
static void
collect_try_lcore_stats(struct try_rwlock_lcore *tlc,
const struct try_rwlock_lcore *lc)
{
tlc->stat.tick += lc->stat.tick;
tlc->stat.fail += lc->stat.fail;
tlc->stat.success += lc->stat.success;
}
/*
* Process collected results:
* - check status
* - collect and print statistics
*/
static int
process_try_lcore_stats(void)
{
int32_t rc;
uint32_t lc, rd, wr;
struct try_rwlock_lcore rlc, wlc;
memset(&rlc, 0, sizeof(rlc));
memset(&wlc, 0, sizeof(wlc));
rlc.type = LC_TYPE_RDLOCK;
wlc.type = LC_TYPE_WRLOCK;
rd = 0;
wr = 0;
rc = 0;
RTE_LCORE_FOREACH(lc) {
rc |= try_lcore_data[lc].rc;
if (try_lcore_data[lc].type == LC_TYPE_RDLOCK) {
collect_try_lcore_stats(&rlc, try_lcore_data + lc);
rd++;
} else {
collect_try_lcore_stats(&wlc, try_lcore_data + lc);
wr++;
}
}
if (rc == 0) {
RTE_LCORE_FOREACH(lc)
print_try_lcore_stats(try_lcore_data + lc, lc);
if (rd != 0) {
printf("aggregated stats for %u RDLOCK cores:\n", rd);
print_try_lcore_stats(&rlc, rd);
}
if (wr != 0) {
printf("aggregated stats for %u WRLOCK cores:\n", wr);
print_try_lcore_stats(&wlc, wr);
}
}
return rc;
}
static void
try_test_reset(void)
{
memset(&try_lcore_data, 0, sizeof(try_lcore_data));
memset(&try_rwlock_data, 0, sizeof(try_rwlock_data));
try_rwlock_data.tick = TEST_SEC * rte_get_tsc_hz();
}
/* all lcores grab RDLOCK */
static int
try_rwlock_test_rda(void)
{
try_test_reset();
/* start read test on all avaialble lcores */
rte_eal_mp_remote_launch(try_read_lcore, NULL, CALL_MASTER);
rte_eal_mp_wait_lcore();
return process_try_lcore_stats();
}
/* all slave lcores grab RDLOCK, master one grabs WRLOCK */
static int
try_rwlock_test_rds_wrm(void)
{
try_test_reset();
rte_eal_mp_remote_launch(try_read_lcore, NULL, SKIP_MASTER);
try_write_lcore(NULL);
rte_eal_mp_wait_lcore();
return process_try_lcore_stats();
}
/* master and even slave lcores grab RDLOCK, odd lcores grab WRLOCK */
static int
try_rwlock_test_rde_wro(void)
{
uint32_t lc, mlc;
try_test_reset();
mlc = rte_get_master_lcore();
RTE_LCORE_FOREACH(lc) {
if (lc != mlc) {
if ((lc & 1) == 0)
rte_eal_remote_launch(try_read_lcore,
NULL, lc);
else
rte_eal_remote_launch(try_write_lcore,
NULL, lc);
}
}
try_read_lcore(NULL);
rte_eal_mp_wait_lcore();
return process_try_lcore_stats();
}
static int
test_rwlock(void)
{
uint32_t i;
int32_t rc, ret;
static const struct {
const char *name;
int (*ftst)(void);
} test[] = {
{
.name = "rwlock_test1",
.ftst = rwlock_test1,
},
{
.name = "try_rwlock_test_rda",
.ftst = try_rwlock_test_rda,
},
{
.name = "try_rwlock_test_rds_wrm",
.ftst = try_rwlock_test_rds_wrm,
},
{
.name = "try_rwlock_test_rde_wro",
.ftst = try_rwlock_test_rde_wro,
},
};
ret = 0;
for (i = 0; i != RTE_DIM(test); i++) {
printf("starting test %s;\n", test[i].name);
rc = test[i].ftst();
printf("test %s completed with status %d\n", test[i].name, rc);
ret |= rc;
}
return ret;
}
REGISTER_TEST_COMMAND(rwlock_autotest, test_rwlock);