numam-dpdk/app/test/test_mempool_perf.c
Bruce Richardson a9de470cc7 test: move to app directory
Since all other apps have been moved to the "app" folder, the autotest app
remains alone in the test folder. Rather than having an entire top-level
folder for this, we can move it back to where it all started in early
versions of DPDK - the "app/" folder.

This move has a couple of advantages:
* This reduces clutter at the top level of the project, due to one less
  folder.
* It eliminates the separate build task necessary for building the
  autotests using make "make test-build" which means that developers are
  less likely to miss something in their own compilation tests
* It re-aligns the final location of the test binary in the app folder when
  building with make with it's location in the source tree.

For meson builds, the autotest app is different from the other apps in that
it needs a series of different test cases defined for it for use by "meson
test". Therefore, it does not get built as part of the main loop in the
app folder, but gets built separately at the end.

Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
2019-02-26 15:29:27 +01:00

400 lines
9.3 KiB
C

/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(c) 2010-2014 Intel Corporation
*/
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <inttypes.h>
#include <stdarg.h>
#include <errno.h>
#include <sys/queue.h>
#include <rte_common.h>
#include <rte_log.h>
#include <rte_debug.h>
#include <rte_memory.h>
#include <rte_launch.h>
#include <rte_cycles.h>
#include <rte_eal.h>
#include <rte_per_lcore.h>
#include <rte_lcore.h>
#include <rte_atomic.h>
#include <rte_branch_prediction.h>
#include <rte_mempool.h>
#include <rte_spinlock.h>
#include <rte_malloc.h>
#include <rte_mbuf_pool_ops.h>
#include "test.h"
/*
* Mempool performance
* =======
*
* Each core get *n_keep* objects per bulk of *n_get_bulk*. Then,
* objects are put back in the pool per bulk of *n_put_bulk*.
*
* This sequence is done during TIME_S seconds.
*
* This test is done on the following configurations:
*
* - Cores configuration (*cores*)
*
* - One core with cache
* - Two cores with cache
* - Max. cores with cache
* - One core without cache
* - Two cores without cache
* - Max. cores without cache
* - One core with user-owned cache
* - Two cores with user-owned cache
* - Max. cores with user-owned cache
*
* - Bulk size (*n_get_bulk*, *n_put_bulk*)
*
* - Bulk get from 1 to 32
* - Bulk put from 1 to 32
*
* - Number of kept objects (*n_keep*)
*
* - 32
* - 128
*/
#define N 65536
#define TIME_S 5
#define MEMPOOL_ELT_SIZE 2048
#define MAX_KEEP 128
#define MEMPOOL_SIZE ((rte_lcore_count()*(MAX_KEEP+RTE_MEMPOOL_CACHE_MAX_SIZE))-1)
#define LOG_ERR() printf("test failed at %s():%d\n", __func__, __LINE__)
#define RET_ERR() do { \
LOG_ERR(); \
return -1; \
} while (0)
#define GOTO_ERR(var, label) do { \
LOG_ERR(); \
var = -1; \
goto label; \
} while (0)
static int use_external_cache;
static unsigned external_cache_size = RTE_MEMPOOL_CACHE_MAX_SIZE;
static rte_atomic32_t synchro;
/* number of objects in one bulk operation (get or put) */
static unsigned n_get_bulk;
static unsigned n_put_bulk;
/* number of objects retrived from mempool before putting them back */
static unsigned n_keep;
/* number of enqueues / dequeues */
struct mempool_test_stats {
uint64_t enq_count;
} __rte_cache_aligned;
static struct mempool_test_stats stats[RTE_MAX_LCORE];
/*
* save the object number in the first 4 bytes of object data. All
* other bytes are set to 0.
*/
static void
my_obj_init(struct rte_mempool *mp, __attribute__((unused)) void *arg,
void *obj, unsigned i)
{
uint32_t *objnum = obj;
memset(obj, 0, mp->elt_size);
*objnum = i;
}
static int
per_lcore_mempool_test(void *arg)
{
void *obj_table[MAX_KEEP];
unsigned i, idx;
struct rte_mempool *mp = arg;
unsigned lcore_id = rte_lcore_id();
int ret = 0;
uint64_t start_cycles, end_cycles;
uint64_t time_diff = 0, hz = rte_get_timer_hz();
struct rte_mempool_cache *cache;
if (use_external_cache) {
/* Create a user-owned mempool cache. */
cache = rte_mempool_cache_create(external_cache_size,
SOCKET_ID_ANY);
if (cache == NULL)
RET_ERR();
} else {
/* May be NULL if cache is disabled. */
cache = rte_mempool_default_cache(mp, lcore_id);
}
/* n_get_bulk and n_put_bulk must be divisors of n_keep */
if (((n_keep / n_get_bulk) * n_get_bulk) != n_keep)
GOTO_ERR(ret, out);
if (((n_keep / n_put_bulk) * n_put_bulk) != n_keep)
GOTO_ERR(ret, out);
stats[lcore_id].enq_count = 0;
/* wait synchro for slaves */
if (lcore_id != rte_get_master_lcore())
while (rte_atomic32_read(&synchro) == 0);
start_cycles = rte_get_timer_cycles();
while (time_diff/hz < TIME_S) {
for (i = 0; likely(i < (N/n_keep)); i++) {
/* get n_keep objects by bulk of n_bulk */
idx = 0;
while (idx < n_keep) {
ret = rte_mempool_generic_get(mp,
&obj_table[idx],
n_get_bulk,
cache);
if (unlikely(ret < 0)) {
rte_mempool_dump(stdout, mp);
/* in this case, objects are lost... */
GOTO_ERR(ret, out);
}
idx += n_get_bulk;
}
/* put the objects back */
idx = 0;
while (idx < n_keep) {
rte_mempool_generic_put(mp, &obj_table[idx],
n_put_bulk,
cache);
idx += n_put_bulk;
}
}
end_cycles = rte_get_timer_cycles();
time_diff = end_cycles - start_cycles;
stats[lcore_id].enq_count += N;
}
out:
if (use_external_cache) {
rte_mempool_cache_flush(cache, mp);
rte_mempool_cache_free(cache);
}
return ret;
}
/* launch all the per-lcore test, and display the result */
static int
launch_cores(struct rte_mempool *mp, unsigned int cores)
{
unsigned lcore_id;
uint64_t rate;
int ret;
unsigned cores_save = cores;
rte_atomic32_set(&synchro, 0);
/* reset stats */
memset(stats, 0, sizeof(stats));
printf("mempool_autotest cache=%u cores=%u n_get_bulk=%u "
"n_put_bulk=%u n_keep=%u ",
use_external_cache ?
external_cache_size : (unsigned) mp->cache_size,
cores, n_get_bulk, n_put_bulk, n_keep);
if (rte_mempool_avail_count(mp) != MEMPOOL_SIZE) {
printf("mempool is not full\n");
return -1;
}
RTE_LCORE_FOREACH_SLAVE(lcore_id) {
if (cores == 1)
break;
cores--;
rte_eal_remote_launch(per_lcore_mempool_test,
mp, lcore_id);
}
/* start synchro and launch test on master */
rte_atomic32_set(&synchro, 1);
ret = per_lcore_mempool_test(mp);
cores = cores_save;
RTE_LCORE_FOREACH_SLAVE(lcore_id) {
if (cores == 1)
break;
cores--;
if (rte_eal_wait_lcore(lcore_id) < 0)
ret = -1;
}
if (ret < 0) {
printf("per-lcore test returned -1\n");
return -1;
}
rate = 0;
for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
rate += (stats[lcore_id].enq_count / TIME_S);
printf("rate_persec=%" PRIu64 "\n", rate);
return 0;
}
/* for a given number of core, launch all test cases */
static int
do_one_mempool_test(struct rte_mempool *mp, unsigned int cores)
{
unsigned bulk_tab_get[] = { 1, 4, 32, 0 };
unsigned bulk_tab_put[] = { 1, 4, 32, 0 };
unsigned keep_tab[] = { 32, 128, 0 };
unsigned *get_bulk_ptr;
unsigned *put_bulk_ptr;
unsigned *keep_ptr;
int ret;
for (get_bulk_ptr = bulk_tab_get; *get_bulk_ptr; get_bulk_ptr++) {
for (put_bulk_ptr = bulk_tab_put; *put_bulk_ptr; put_bulk_ptr++) {
for (keep_ptr = keep_tab; *keep_ptr; keep_ptr++) {
n_get_bulk = *get_bulk_ptr;
n_put_bulk = *put_bulk_ptr;
n_keep = *keep_ptr;
ret = launch_cores(mp, cores);
if (ret < 0)
return -1;
}
}
}
return 0;
}
static int
test_mempool_perf(void)
{
struct rte_mempool *mp_cache = NULL;
struct rte_mempool *mp_nocache = NULL;
struct rte_mempool *default_pool = NULL;
const char *default_pool_ops;
int ret = -1;
rte_atomic32_init(&synchro);
/* create a mempool (without cache) */
mp_nocache = rte_mempool_create("perf_test_nocache", MEMPOOL_SIZE,
MEMPOOL_ELT_SIZE, 0, 0,
NULL, NULL,
my_obj_init, NULL,
SOCKET_ID_ANY, 0);
if (mp_nocache == NULL)
goto err;
/* create a mempool (with cache) */
mp_cache = rte_mempool_create("perf_test_cache", MEMPOOL_SIZE,
MEMPOOL_ELT_SIZE,
RTE_MEMPOOL_CACHE_MAX_SIZE, 0,
NULL, NULL,
my_obj_init, NULL,
SOCKET_ID_ANY, 0);
if (mp_cache == NULL)
goto err;
default_pool_ops = rte_mbuf_best_mempool_ops();
/* Create a mempool based on Default handler */
default_pool = rte_mempool_create_empty("default_pool",
MEMPOOL_SIZE,
MEMPOOL_ELT_SIZE,
0, 0,
SOCKET_ID_ANY, 0);
if (default_pool == NULL) {
printf("cannot allocate %s mempool\n", default_pool_ops);
goto err;
}
if (rte_mempool_set_ops_byname(default_pool, default_pool_ops, NULL)
< 0) {
printf("cannot set %s handler\n", default_pool_ops);
goto err;
}
if (rte_mempool_populate_default(default_pool) < 0) {
printf("cannot populate %s mempool\n", default_pool_ops);
goto err;
}
rte_mempool_obj_iter(default_pool, my_obj_init, NULL);
/* performance test with 1, 2 and max cores */
printf("start performance test (without cache)\n");
if (do_one_mempool_test(mp_nocache, 1) < 0)
goto err;
if (do_one_mempool_test(mp_nocache, 2) < 0)
goto err;
if (do_one_mempool_test(mp_nocache, rte_lcore_count()) < 0)
goto err;
/* performance test with 1, 2 and max cores */
printf("start performance test for %s (without cache)\n",
default_pool_ops);
if (do_one_mempool_test(default_pool, 1) < 0)
goto err;
if (do_one_mempool_test(default_pool, 2) < 0)
goto err;
if (do_one_mempool_test(default_pool, rte_lcore_count()) < 0)
goto err;
/* performance test with 1, 2 and max cores */
printf("start performance test (with cache)\n");
if (do_one_mempool_test(mp_cache, 1) < 0)
goto err;
if (do_one_mempool_test(mp_cache, 2) < 0)
goto err;
if (do_one_mempool_test(mp_cache, rte_lcore_count()) < 0)
goto err;
/* performance test with 1, 2 and max cores */
printf("start performance test (with user-owned cache)\n");
use_external_cache = 1;
if (do_one_mempool_test(mp_nocache, 1) < 0)
goto err;
if (do_one_mempool_test(mp_nocache, 2) < 0)
goto err;
if (do_one_mempool_test(mp_nocache, rte_lcore_count()) < 0)
goto err;
rte_mempool_list_dump(stdout);
ret = 0;
err:
rte_mempool_free(mp_cache);
rte_mempool_free(mp_nocache);
rte_mempool_free(default_pool);
return ret;
}
REGISTER_TEST_COMMAND(mempool_perf_autotest, test_mempool_perf);