ethdev: change vtune profiling approach

The patch changes rx_burst profiling approach:
	1. VTune's instrumentation is removed
	2. empty hook callback for profiling is added
This way all VTune-specific logic moves to the VTune side.
Hook is enabled only when CONFIG_RTE_ETHDEV_PROFILE_WITH_VTUNE option
is turned on. VTune uses this hook to attach to the polling cycle. It
is not possible to attach to the rx_burst directly, as it is inline.

Signed-off-by: Ilia Kurakin <ilia.kurakin@intel.com>
Acked-by: Keith Wiles <keith.wiles@intel.com>
Acked-by: Ferruh Yigit <ferruh.yigit@intel.com>
This commit is contained in:
Ilia Kurakin 2018-07-19 15:21:42 +03:00 committed by Ferruh Yigit
parent 5394547798
commit 2c1bbab7f0
5 changed files with 25 additions and 124 deletions

View File

@ -128,7 +128,7 @@ CONFIG_RTE_MAX_QUEUES_PER_PORT=1024
CONFIG_RTE_LIBRTE_IEEE1588=n
CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16
CONFIG_RTE_ETHDEV_RXTX_CALLBACKS=y
CONFIG_RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS=n
CONFIG_RTE_ETHDEV_PROFILE_WITH_VTUNE=n
#
# Turn off Tx preparation stage

View File

@ -33,38 +33,12 @@ Refer to the
for details about application profiling.
Empty cycles tracing
Profiling with VTune
~~~~~~~~~~~~~~~~~~~~
Iterations that yielded no RX packets (empty cycles, wasted iterations) can
be analyzed using VTune Amplifier. This profiling employs the
`Instrumentation and Tracing Technology (ITT) API
<https://software.intel.com/en-us/node/544195>`_
feature of VTune Amplifier and requires only reconfiguring the DPDK library,
no changes in a DPDK application are needed.
To trace wasted iterations on RX queues, first reconfigure DPDK with
``CONFIG_RTE_ETHDEV_RXTX_CALLBACKS`` and
``CONFIG_RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS`` enabled.
Then rebuild DPDK, specifying paths to the ITT header and library, which can
be found in any VTune Amplifier distribution in the *include* and *lib*
directories respectively:
.. code-block:: console
make EXTRA_CFLAGS=-I<path to ittnotify.h> \
EXTRA_LDLIBS="-L<path to libittnotify.a> -littnotify"
Finally, to see wasted iterations in your performance analysis results,
select the *"Analyze user tasks, events, and counters"* checkbox in the
*"Analysis Type"* tab when configuring analysis via VTune Amplifier GUI.
Alternatively, when running VTune Amplifier via command line, specify
``-knob enable-user-tasks=true`` option.
Collected regions of wasted iterations will be marked on VTune Amplifier's
timeline as ITT tasks. These ITT tasks have predefined names, containing
Ethernet device and RX queue identifiers.
To allow VTune attaching to the DPDK application, reconfigure and recompile
the DPDK with ``CONFIG_RTE_ETHDEV_RXTX_CALLBACKS`` and
``CONFIG_RTE_ETHDEV_PROFILE_WITH_VTUNE`` enabled.
Profiling on ARM64

View File

@ -1,87 +1,33 @@
/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(c) 2010-2017 Intel Corporation
* Copyright(c) 2010-2018 Intel Corporation
*/
#include "ethdev_profile.h"
/**
* This conditional block enables RX queues profiling by tracking wasted
* iterations, i.e. iterations which yielded no RX packets. Profiling is
* performed using the Instrumentation and Tracing Technology (ITT) API,
* employed by the Intel (R) VTune (TM) Amplifier.
* This conditional block enables Ethernet device profiling with
* Intel (R) VTune (TM) Amplifier.
*/
#ifdef RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS
#include <ittnotify.h>
#define ITT_MAX_NAME_LEN (100)
#ifdef RTE_ETHDEV_PROFILE_WITH_VTUNE
/**
* Auxiliary ITT structure belonging to Ethernet device and using to:
* - track RX queue state to determine whether it is wasting loop iterations
* - begin or end ITT task using task domain and task name (handle)
* Hook callback to trace rte_eth_rx_burst() calls.
*/
struct itt_profile_rx_data {
/**
* ITT domains for each queue.
*/
__itt_domain *domains[RTE_MAX_QUEUES_PER_PORT];
/**
* ITT task names for each queue.
*/
__itt_string_handle *handles[RTE_MAX_QUEUES_PER_PORT];
/**
* Flags indicating the queues state. Possible values:
* 1 - queue is wasting iterations,
* 0 - otherwise.
*/
uint8_t queue_state[RTE_MAX_QUEUES_PER_PORT];
};
/**
* The pool of *itt_profile_rx_data* structures.
*/
struct itt_profile_rx_data itt_rx_data[RTE_MAX_ETHPORTS];
/**
* This callback function manages ITT tasks collection on given port and queue.
* It must be registered with rte_eth_add_rx_callback() to be called from
* rte_eth_rx_burst(). To find more comments see rte_rx_callback_fn function
* type declaration.
*/
static uint16_t
collect_itt_rx_burst_cb(uint16_t port_id, uint16_t queue_id,
uint16_t
profile_hook_rx_burst_cb(
__rte_unused uint16_t port_id, __rte_unused uint16_t queue_id,
__rte_unused struct rte_mbuf *pkts[], uint16_t nb_pkts,
__rte_unused uint16_t max_pkts, __rte_unused void *user_param)
{
if (unlikely(nb_pkts == 0)) {
if (!itt_rx_data[port_id].queue_state[queue_id]) {
__itt_task_begin(
itt_rx_data[port_id].domains[queue_id],
__itt_null, __itt_null,
itt_rx_data[port_id].handles[queue_id]);
itt_rx_data[port_id].queue_state[queue_id] = 1;
}
} else {
if (unlikely(itt_rx_data[port_id].queue_state[queue_id])) {
__itt_task_end(
itt_rx_data[port_id].domains[queue_id]);
itt_rx_data[port_id].queue_state[queue_id] = 0;
}
}
return nb_pkts;
}
/**
* Initialization of itt_profile_rx_data for a given Ethernet device.
* Setting profiling rx callback for a given Ethernet device.
* This function must be invoked when ethernet device is being configured.
* Result will be stored in the global array *itt_rx_data*.
*
* @param port_id
* The port identifier of the Ethernet device.
* @param port_name
* The name of the Ethernet device.
* @param rx_queue_num
* The number of RX queues on specified port.
*
@ -90,46 +36,27 @@ collect_itt_rx_burst_cb(uint16_t port_id, uint16_t queue_id,
* - On failure, a negative value.
*/
static inline int
itt_profile_rx_init(uint16_t port_id, char *port_name, uint8_t rx_queue_num)
vtune_profile_rx_init(uint16_t port_id, uint8_t rx_queue_num)
{
uint16_t q_id;
for (q_id = 0; q_id < rx_queue_num; ++q_id) {
char domain_name[ITT_MAX_NAME_LEN];
snprintf(domain_name, sizeof(domain_name),
"RXBurst.WastedIterations.Port_%s.Queue_%d",
port_name, q_id);
itt_rx_data[port_id].domains[q_id]
= __itt_domain_create(domain_name);
char task_name[ITT_MAX_NAME_LEN];
snprintf(task_name, sizeof(task_name),
"port id: %d; queue id: %d",
port_id, q_id);
itt_rx_data[port_id].handles[q_id]
= __itt_string_handle_create(task_name);
itt_rx_data[port_id].queue_state[q_id] = 0;
if (!rte_eth_add_rx_callback(
port_id, q_id, collect_itt_rx_burst_cb, NULL)) {
port_id, q_id, profile_hook_rx_burst_cb, NULL)) {
return -rte_errno;
}
}
return 0;
}
#endif /* RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS */
#endif /* RTE_ETHDEV_PROFILE_WITH_VTUNE */
int
__rte_eth_profile_rx_init(__rte_unused uint16_t port_id,
__rte_eth_dev_profile_init(__rte_unused uint16_t port_id,
__rte_unused struct rte_eth_dev *dev)
{
#ifdef RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS
return itt_profile_rx_init(
port_id, dev->data->name, dev->data->nb_rx_queues);
#ifdef RTE_ETHDEV_PROFILE_WITH_VTUNE
return vtune_profile_rx_init(port_id, dev->data->nb_rx_queues);
#endif
return 0;
}

View File

@ -1,5 +1,5 @@
/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(c) 2010-2017 Intel Corporation
* Copyright(c) 2010-2018 Intel Corporation
*/
#ifndef _RTE_ETHDEV_PROFILE_H_
@ -8,7 +8,7 @@
#include "rte_ethdev.h"
/**
* Initialization of profiling RX queues for the Ethernet device.
* Initialization of the Ethernet device profiling.
* Implementation of this function depends on chosen profiling method,
* defined in configs.
*
@ -22,6 +22,6 @@
* - On failure, a negative value.
*/
int
__rte_eth_profile_rx_init(uint16_t port_id, struct rte_eth_dev *dev);
__rte_eth_dev_profile_init(uint16_t port_id, struct rte_eth_dev *dev);
#endif

View File

@ -1191,9 +1191,9 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
}
/* Initialize Rx profiling if enabled at compilation time. */
diag = __rte_eth_profile_rx_init(port_id, dev);
diag = __rte_eth_dev_profile_init(port_id, dev);
if (diag != 0) {
RTE_ETHDEV_LOG(ERR, "Port%u __rte_eth_profile_rx_init = %d\n",
RTE_ETHDEV_LOG(ERR, "Port%u __rte_eth_dev_profile_init = %d\n",
port_id, diag);
rte_eth_dev_rx_queue_config(dev, 0);
rte_eth_dev_tx_queue_config(dev, 0);