57305d794e
Use service cores for offloading event scheduling in case of centralized scheduling instead of calling the schedule api directly. This removes the dependency on dedicated scheduler core specified by giving command line option --slcore. Signed-off-by: Pavan Nikhilesh <pbhagavatula@caviumnetworks.com> Acked-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
455 lines
11 KiB
C
455 lines
11 KiB
C
/*
|
|
* BSD LICENSE
|
|
*
|
|
* Copyright (C) Cavium, Inc 2017.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
*
|
|
* * Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in
|
|
* the documentation and/or other materials provided with the
|
|
* distribution.
|
|
* * Neither the name of Cavium, Inc nor the names of its
|
|
* contributors may be used to endorse or promote products derived
|
|
* from this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#include "test_perf_common.h"
|
|
|
|
int
|
|
perf_test_result(struct evt_test *test, struct evt_options *opt)
|
|
{
|
|
RTE_SET_USED(opt);
|
|
struct test_perf *t = evt_test_priv(test);
|
|
|
|
return t->result;
|
|
}
|
|
|
|
static inline int
|
|
perf_producer(void *arg)
|
|
{
|
|
struct prod_data *p = arg;
|
|
struct test_perf *t = p->t;
|
|
struct evt_options *opt = t->opt;
|
|
const uint8_t dev_id = p->dev_id;
|
|
const uint8_t port = p->port_id;
|
|
struct rte_mempool *pool = t->pool;
|
|
const uint64_t nb_pkts = t->nb_pkts;
|
|
const uint32_t nb_flows = t->nb_flows;
|
|
uint32_t flow_counter = 0;
|
|
uint64_t count = 0;
|
|
struct perf_elt *m;
|
|
struct rte_event ev;
|
|
|
|
if (opt->verbose_level > 1)
|
|
printf("%s(): lcore %d dev_id %d port=%d queue %d\n", __func__,
|
|
rte_lcore_id(), dev_id, port, p->queue_id);
|
|
|
|
ev.event = 0;
|
|
ev.op = RTE_EVENT_OP_NEW;
|
|
ev.queue_id = p->queue_id;
|
|
ev.sched_type = t->opt->sched_type_list[0];
|
|
ev.priority = RTE_EVENT_DEV_PRIORITY_NORMAL;
|
|
ev.event_type = RTE_EVENT_TYPE_CPU;
|
|
ev.sub_event_type = 0; /* stage 0 */
|
|
|
|
while (count < nb_pkts && t->done == false) {
|
|
if (rte_mempool_get(pool, (void **)&m) < 0)
|
|
continue;
|
|
|
|
ev.flow_id = flow_counter++ % nb_flows;
|
|
ev.event_ptr = m;
|
|
m->timestamp = rte_get_timer_cycles();
|
|
while (rte_event_enqueue_burst(dev_id, port, &ev, 1) != 1) {
|
|
if (t->done)
|
|
break;
|
|
rte_pause();
|
|
m->timestamp = rte_get_timer_cycles();
|
|
}
|
|
count++;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static inline uint64_t
|
|
processed_pkts(struct test_perf *t)
|
|
{
|
|
uint8_t i;
|
|
uint64_t total = 0;
|
|
|
|
rte_smp_rmb();
|
|
for (i = 0; i < t->nb_workers; i++)
|
|
total += t->worker[i].processed_pkts;
|
|
|
|
return total;
|
|
}
|
|
|
|
static inline uint64_t
|
|
total_latency(struct test_perf *t)
|
|
{
|
|
uint8_t i;
|
|
uint64_t total = 0;
|
|
|
|
rte_smp_rmb();
|
|
for (i = 0; i < t->nb_workers; i++)
|
|
total += t->worker[i].latency;
|
|
|
|
return total;
|
|
}
|
|
|
|
|
|
int
|
|
perf_launch_lcores(struct evt_test *test, struct evt_options *opt,
|
|
int (*worker)(void *))
|
|
{
|
|
int ret, lcore_id;
|
|
struct test_perf *t = evt_test_priv(test);
|
|
|
|
int port_idx = 0;
|
|
/* launch workers */
|
|
RTE_LCORE_FOREACH_SLAVE(lcore_id) {
|
|
if (!(opt->wlcores[lcore_id]))
|
|
continue;
|
|
|
|
ret = rte_eal_remote_launch(worker,
|
|
&t->worker[port_idx], lcore_id);
|
|
if (ret) {
|
|
evt_err("failed to launch worker %d", lcore_id);
|
|
return ret;
|
|
}
|
|
port_idx++;
|
|
}
|
|
|
|
/* launch producers */
|
|
RTE_LCORE_FOREACH_SLAVE(lcore_id) {
|
|
if (!(opt->plcores[lcore_id]))
|
|
continue;
|
|
|
|
ret = rte_eal_remote_launch(perf_producer, &t->prod[port_idx],
|
|
lcore_id);
|
|
if (ret) {
|
|
evt_err("failed to launch perf_producer %d", lcore_id);
|
|
return ret;
|
|
}
|
|
port_idx++;
|
|
}
|
|
|
|
const uint64_t total_pkts = opt->nb_pkts *
|
|
evt_nr_active_lcores(opt->plcores);
|
|
|
|
uint64_t dead_lock_cycles = rte_get_timer_cycles();
|
|
int64_t dead_lock_remaining = total_pkts;
|
|
const uint64_t dead_lock_sample = rte_get_timer_hz() * 5;
|
|
|
|
uint64_t perf_cycles = rte_get_timer_cycles();
|
|
int64_t perf_remaining = total_pkts;
|
|
const uint64_t perf_sample = rte_get_timer_hz();
|
|
|
|
static float total_mpps;
|
|
static uint64_t samples;
|
|
|
|
const uint64_t freq_mhz = rte_get_timer_hz() / 1000000;
|
|
int64_t remaining = t->outstand_pkts - processed_pkts(t);
|
|
|
|
while (t->done == false) {
|
|
const uint64_t new_cycles = rte_get_timer_cycles();
|
|
|
|
if ((new_cycles - perf_cycles) > perf_sample) {
|
|
const uint64_t latency = total_latency(t);
|
|
const uint64_t pkts = processed_pkts(t);
|
|
|
|
remaining = t->outstand_pkts - pkts;
|
|
float mpps = (float)(perf_remaining-remaining)/1000000;
|
|
|
|
perf_remaining = remaining;
|
|
perf_cycles = new_cycles;
|
|
total_mpps += mpps;
|
|
++samples;
|
|
if (opt->fwd_latency && pkts > 0) {
|
|
printf(CLGRN"\r%.3f mpps avg %.3f mpps [avg fwd latency %.3f us] "CLNRM,
|
|
mpps, total_mpps/samples,
|
|
(float)(latency/pkts)/freq_mhz);
|
|
} else {
|
|
printf(CLGRN"\r%.3f mpps avg %.3f mpps"CLNRM,
|
|
mpps, total_mpps/samples);
|
|
}
|
|
fflush(stdout);
|
|
|
|
if (remaining <= 0) {
|
|
t->done = true;
|
|
t->result = EVT_TEST_SUCCESS;
|
|
rte_smp_wmb();
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (new_cycles - dead_lock_cycles > dead_lock_sample) {
|
|
remaining = t->outstand_pkts - processed_pkts(t);
|
|
if (dead_lock_remaining == remaining) {
|
|
rte_event_dev_dump(opt->dev_id, stdout);
|
|
evt_err("No schedules for seconds, deadlock");
|
|
t->done = true;
|
|
rte_smp_wmb();
|
|
break;
|
|
}
|
|
dead_lock_remaining = remaining;
|
|
dead_lock_cycles = new_cycles;
|
|
}
|
|
}
|
|
printf("\n");
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt,
|
|
uint8_t stride, uint8_t nb_queues)
|
|
{
|
|
struct test_perf *t = evt_test_priv(test);
|
|
uint8_t port, prod;
|
|
int ret = -1;
|
|
|
|
/* port configuration */
|
|
const struct rte_event_port_conf wkr_p_conf = {
|
|
.dequeue_depth = opt->wkr_deq_dep,
|
|
.enqueue_depth = 64,
|
|
.new_event_threshold = 4096,
|
|
};
|
|
|
|
/* setup one port per worker, linking to all queues */
|
|
for (port = 0; port < evt_nr_active_lcores(opt->wlcores);
|
|
port++) {
|
|
struct worker_data *w = &t->worker[port];
|
|
|
|
w->dev_id = opt->dev_id;
|
|
w->port_id = port;
|
|
w->t = t;
|
|
w->processed_pkts = 0;
|
|
w->latency = 0;
|
|
|
|
ret = rte_event_port_setup(opt->dev_id, port, &wkr_p_conf);
|
|
if (ret) {
|
|
evt_err("failed to setup port %d", port);
|
|
return ret;
|
|
}
|
|
|
|
ret = rte_event_port_link(opt->dev_id, port, NULL, NULL, 0);
|
|
if (ret != nb_queues) {
|
|
evt_err("failed to link all queues to port %d", port);
|
|
return -EINVAL;
|
|
}
|
|
}
|
|
|
|
/* port for producers, no links */
|
|
const struct rte_event_port_conf prod_conf = {
|
|
.dequeue_depth = 8,
|
|
.enqueue_depth = 32,
|
|
.new_event_threshold = 1200,
|
|
};
|
|
prod = 0;
|
|
for ( ; port < perf_nb_event_ports(opt); port++) {
|
|
struct prod_data *p = &t->prod[port];
|
|
|
|
p->dev_id = opt->dev_id;
|
|
p->port_id = port;
|
|
p->queue_id = prod * stride;
|
|
p->t = t;
|
|
|
|
ret = rte_event_port_setup(opt->dev_id, port, &prod_conf);
|
|
if (ret) {
|
|
evt_err("failed to setup port %d", port);
|
|
return ret;
|
|
}
|
|
prod++;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
int
|
|
perf_opt_check(struct evt_options *opt, uint64_t nb_queues)
|
|
{
|
|
unsigned int lcores;
|
|
|
|
/* N producer + N worker + 1 master */
|
|
lcores = 3;
|
|
|
|
if (rte_lcore_count() < lcores) {
|
|
evt_err("test need minimum %d lcores", lcores);
|
|
return -1;
|
|
}
|
|
|
|
/* Validate worker lcores */
|
|
if (evt_lcores_has_overlap(opt->wlcores, rte_get_master_lcore())) {
|
|
evt_err("worker lcores overlaps with master lcore");
|
|
return -1;
|
|
}
|
|
if (evt_lcores_has_overlap_multi(opt->wlcores, opt->plcores)) {
|
|
evt_err("worker lcores overlaps producer lcores");
|
|
return -1;
|
|
}
|
|
if (evt_has_disabled_lcore(opt->wlcores)) {
|
|
evt_err("one or more workers lcores are not enabled");
|
|
return -1;
|
|
}
|
|
if (!evt_has_active_lcore(opt->wlcores)) {
|
|
evt_err("minimum one worker is required");
|
|
return -1;
|
|
}
|
|
|
|
/* Validate producer lcores */
|
|
if (evt_lcores_has_overlap(opt->plcores, rte_get_master_lcore())) {
|
|
evt_err("producer lcores overlaps with master lcore");
|
|
return -1;
|
|
}
|
|
if (evt_has_disabled_lcore(opt->plcores)) {
|
|
evt_err("one or more producer lcores are not enabled");
|
|
return -1;
|
|
}
|
|
if (!evt_has_active_lcore(opt->plcores)) {
|
|
evt_err("minimum one producer is required");
|
|
return -1;
|
|
}
|
|
|
|
if (evt_has_invalid_stage(opt))
|
|
return -1;
|
|
|
|
if (evt_has_invalid_sched_type(opt))
|
|
return -1;
|
|
|
|
if (nb_queues > EVT_MAX_QUEUES) {
|
|
evt_err("number of queues exceeds %d", EVT_MAX_QUEUES);
|
|
return -1;
|
|
}
|
|
if (perf_nb_event_ports(opt) > EVT_MAX_PORTS) {
|
|
evt_err("number of ports exceeds %d", EVT_MAX_PORTS);
|
|
return -1;
|
|
}
|
|
|
|
/* Fixups */
|
|
if (opt->nb_stages == 1 && opt->fwd_latency) {
|
|
evt_info("fwd_latency is valid when nb_stages > 1, disabling");
|
|
opt->fwd_latency = 0;
|
|
}
|
|
if (opt->fwd_latency && !opt->q_priority) {
|
|
evt_info("enabled queue priority for latency measurement");
|
|
opt->q_priority = 1;
|
|
}
|
|
if (opt->nb_pkts == 0)
|
|
opt->nb_pkts = INT64_MAX/evt_nr_active_lcores(opt->plcores);
|
|
|
|
return 0;
|
|
}
|
|
|
|
void
|
|
perf_opt_dump(struct evt_options *opt, uint8_t nb_queues)
|
|
{
|
|
evt_dump("nb_prod_lcores", "%d", evt_nr_active_lcores(opt->plcores));
|
|
evt_dump_producer_lcores(opt);
|
|
evt_dump("nb_worker_lcores", "%d", evt_nr_active_lcores(opt->wlcores));
|
|
evt_dump_worker_lcores(opt);
|
|
evt_dump_nb_stages(opt);
|
|
evt_dump("nb_evdev_ports", "%d", perf_nb_event_ports(opt));
|
|
evt_dump("nb_evdev_queues", "%d", nb_queues);
|
|
evt_dump_queue_priority(opt);
|
|
evt_dump_sched_type_list(opt);
|
|
}
|
|
|
|
void
|
|
perf_eventdev_destroy(struct evt_test *test, struct evt_options *opt)
|
|
{
|
|
RTE_SET_USED(test);
|
|
|
|
rte_event_dev_stop(opt->dev_id);
|
|
rte_event_dev_close(opt->dev_id);
|
|
}
|
|
|
|
static inline void
|
|
perf_elt_init(struct rte_mempool *mp, void *arg __rte_unused,
|
|
void *obj, unsigned i __rte_unused)
|
|
{
|
|
memset(obj, 0, mp->elt_size);
|
|
}
|
|
|
|
int
|
|
perf_mempool_setup(struct evt_test *test, struct evt_options *opt)
|
|
{
|
|
struct test_perf *t = evt_test_priv(test);
|
|
|
|
t->pool = rte_mempool_create(test->name, /* mempool name */
|
|
opt->pool_sz, /* number of elements*/
|
|
sizeof(struct perf_elt), /* element size*/
|
|
512, /* cache size*/
|
|
0, NULL, NULL,
|
|
perf_elt_init, /* obj constructor */
|
|
NULL, opt->socket_id, 0); /* flags */
|
|
if (t->pool == NULL) {
|
|
evt_err("failed to create mempool");
|
|
return -ENOMEM;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
void
|
|
perf_mempool_destroy(struct evt_test *test, struct evt_options *opt)
|
|
{
|
|
RTE_SET_USED(opt);
|
|
struct test_perf *t = evt_test_priv(test);
|
|
|
|
rte_mempool_free(t->pool);
|
|
}
|
|
|
|
int
|
|
perf_test_setup(struct evt_test *test, struct evt_options *opt)
|
|
{
|
|
void *test_perf;
|
|
|
|
test_perf = rte_zmalloc_socket(test->name, sizeof(struct test_perf),
|
|
RTE_CACHE_LINE_SIZE, opt->socket_id);
|
|
if (test_perf == NULL) {
|
|
evt_err("failed to allocate test_perf memory");
|
|
goto nomem;
|
|
}
|
|
test->test_priv = test_perf;
|
|
|
|
struct test_perf *t = evt_test_priv(test);
|
|
|
|
t->outstand_pkts = opt->nb_pkts * evt_nr_active_lcores(opt->plcores);
|
|
t->nb_workers = evt_nr_active_lcores(opt->wlcores);
|
|
t->done = false;
|
|
t->nb_pkts = opt->nb_pkts;
|
|
t->nb_flows = opt->nb_flows;
|
|
t->result = EVT_TEST_FAILED;
|
|
t->opt = opt;
|
|
memcpy(t->sched_type_list, opt->sched_type_list,
|
|
sizeof(opt->sched_type_list));
|
|
return 0;
|
|
nomem:
|
|
return -ENOMEM;
|
|
}
|
|
|
|
void
|
|
perf_test_destroy(struct evt_test *test, struct evt_options *opt)
|
|
{
|
|
RTE_SET_USED(opt);
|
|
|
|
rte_free(test->test_priv);
|
|
}
|