ethdev: change vtune profiling approach
The patch changes rx_burst profiling approach: 1. VTune's instrumentation is removed 2. empty hook callback for profiling is added This way all VTune-specific logic moves to the VTune side. Hook is enabled only when CONFIG_RTE_ETHDEV_PROFILE_WITH_VTUNE option is turned on. VTune uses this hook to attach to the polling cycle. It is not possible to attach to the rx_burst directly, as it is inline. Signed-off-by: Ilia Kurakin <ilia.kurakin@intel.com> Acked-by: Keith Wiles <keith.wiles@intel.com> Acked-by: Ferruh Yigit <ferruh.yigit@intel.com>
This commit is contained in:
parent
5394547798
commit
2c1bbab7f0
@ -128,7 +128,7 @@ CONFIG_RTE_MAX_QUEUES_PER_PORT=1024
|
||||
CONFIG_RTE_LIBRTE_IEEE1588=n
|
||||
CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16
|
||||
CONFIG_RTE_ETHDEV_RXTX_CALLBACKS=y
|
||||
CONFIG_RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS=n
|
||||
CONFIG_RTE_ETHDEV_PROFILE_WITH_VTUNE=n
|
||||
|
||||
#
|
||||
# Turn off Tx preparation stage
|
||||
|
@ -33,38 +33,12 @@ Refer to the
|
||||
for details about application profiling.
|
||||
|
||||
|
||||
Empty cycles tracing
|
||||
Profiling with VTune
|
||||
~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Iterations that yielded no RX packets (empty cycles, wasted iterations) can
|
||||
be analyzed using VTune Amplifier. This profiling employs the
|
||||
`Instrumentation and Tracing Technology (ITT) API
|
||||
<https://software.intel.com/en-us/node/544195>`_
|
||||
feature of VTune Amplifier and requires only reconfiguring the DPDK library,
|
||||
no changes in a DPDK application are needed.
|
||||
|
||||
To trace wasted iterations on RX queues, first reconfigure DPDK with
|
||||
``CONFIG_RTE_ETHDEV_RXTX_CALLBACKS`` and
|
||||
``CONFIG_RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS`` enabled.
|
||||
|
||||
Then rebuild DPDK, specifying paths to the ITT header and library, which can
|
||||
be found in any VTune Amplifier distribution in the *include* and *lib*
|
||||
directories respectively:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
make EXTRA_CFLAGS=-I<path to ittnotify.h> \
|
||||
EXTRA_LDLIBS="-L<path to libittnotify.a> -littnotify"
|
||||
|
||||
Finally, to see wasted iterations in your performance analysis results,
|
||||
select the *"Analyze user tasks, events, and counters"* checkbox in the
|
||||
*"Analysis Type"* tab when configuring analysis via VTune Amplifier GUI.
|
||||
Alternatively, when running VTune Amplifier via command line, specify
|
||||
``-knob enable-user-tasks=true`` option.
|
||||
|
||||
Collected regions of wasted iterations will be marked on VTune Amplifier's
|
||||
timeline as ITT tasks. These ITT tasks have predefined names, containing
|
||||
Ethernet device and RX queue identifiers.
|
||||
To allow VTune attaching to the DPDK application, reconfigure and recompile
|
||||
the DPDK with ``CONFIG_RTE_ETHDEV_RXTX_CALLBACKS`` and
|
||||
``CONFIG_RTE_ETHDEV_PROFILE_WITH_VTUNE`` enabled.
|
||||
|
||||
|
||||
Profiling on ARM64
|
||||
|
@ -1,87 +1,33 @@
|
||||
/* SPDX-License-Identifier: BSD-3-Clause
|
||||
* Copyright(c) 2010-2017 Intel Corporation
|
||||
* Copyright(c) 2010-2018 Intel Corporation
|
||||
*/
|
||||
|
||||
#include "ethdev_profile.h"
|
||||
|
||||
/**
|
||||
* This conditional block enables RX queues profiling by tracking wasted
|
||||
* iterations, i.e. iterations which yielded no RX packets. Profiling is
|
||||
* performed using the Instrumentation and Tracing Technology (ITT) API,
|
||||
* employed by the Intel (R) VTune (TM) Amplifier.
|
||||
* This conditional block enables Ethernet device profiling with
|
||||
* Intel (R) VTune (TM) Amplifier.
|
||||
*/
|
||||
#ifdef RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS
|
||||
|
||||
#include <ittnotify.h>
|
||||
|
||||
#define ITT_MAX_NAME_LEN (100)
|
||||
#ifdef RTE_ETHDEV_PROFILE_WITH_VTUNE
|
||||
|
||||
/**
|
||||
* Auxiliary ITT structure belonging to Ethernet device and using to:
|
||||
* - track RX queue state to determine whether it is wasting loop iterations
|
||||
* - begin or end ITT task using task domain and task name (handle)
|
||||
* Hook callback to trace rte_eth_rx_burst() calls.
|
||||
*/
|
||||
struct itt_profile_rx_data {
|
||||
/**
|
||||
* ITT domains for each queue.
|
||||
*/
|
||||
__itt_domain *domains[RTE_MAX_QUEUES_PER_PORT];
|
||||
/**
|
||||
* ITT task names for each queue.
|
||||
*/
|
||||
__itt_string_handle *handles[RTE_MAX_QUEUES_PER_PORT];
|
||||
/**
|
||||
* Flags indicating the queues state. Possible values:
|
||||
* 1 - queue is wasting iterations,
|
||||
* 0 - otherwise.
|
||||
*/
|
||||
uint8_t queue_state[RTE_MAX_QUEUES_PER_PORT];
|
||||
};
|
||||
|
||||
/**
|
||||
* The pool of *itt_profile_rx_data* structures.
|
||||
*/
|
||||
struct itt_profile_rx_data itt_rx_data[RTE_MAX_ETHPORTS];
|
||||
|
||||
|
||||
/**
|
||||
* This callback function manages ITT tasks collection on given port and queue.
|
||||
* It must be registered with rte_eth_add_rx_callback() to be called from
|
||||
* rte_eth_rx_burst(). To find more comments see rte_rx_callback_fn function
|
||||
* type declaration.
|
||||
*/
|
||||
static uint16_t
|
||||
collect_itt_rx_burst_cb(uint16_t port_id, uint16_t queue_id,
|
||||
uint16_t
|
||||
profile_hook_rx_burst_cb(
|
||||
__rte_unused uint16_t port_id, __rte_unused uint16_t queue_id,
|
||||
__rte_unused struct rte_mbuf *pkts[], uint16_t nb_pkts,
|
||||
__rte_unused uint16_t max_pkts, __rte_unused void *user_param)
|
||||
{
|
||||
if (unlikely(nb_pkts == 0)) {
|
||||
if (!itt_rx_data[port_id].queue_state[queue_id]) {
|
||||
__itt_task_begin(
|
||||
itt_rx_data[port_id].domains[queue_id],
|
||||
__itt_null, __itt_null,
|
||||
itt_rx_data[port_id].handles[queue_id]);
|
||||
itt_rx_data[port_id].queue_state[queue_id] = 1;
|
||||
}
|
||||
} else {
|
||||
if (unlikely(itt_rx_data[port_id].queue_state[queue_id])) {
|
||||
__itt_task_end(
|
||||
itt_rx_data[port_id].domains[queue_id]);
|
||||
itt_rx_data[port_id].queue_state[queue_id] = 0;
|
||||
}
|
||||
}
|
||||
return nb_pkts;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialization of itt_profile_rx_data for a given Ethernet device.
|
||||
* Setting profiling rx callback for a given Ethernet device.
|
||||
* This function must be invoked when ethernet device is being configured.
|
||||
* Result will be stored in the global array *itt_rx_data*.
|
||||
*
|
||||
* @param port_id
|
||||
* The port identifier of the Ethernet device.
|
||||
* @param port_name
|
||||
* The name of the Ethernet device.
|
||||
* @param rx_queue_num
|
||||
* The number of RX queues on specified port.
|
||||
*
|
||||
@ -90,46 +36,27 @@ collect_itt_rx_burst_cb(uint16_t port_id, uint16_t queue_id,
|
||||
* - On failure, a negative value.
|
||||
*/
|
||||
static inline int
|
||||
itt_profile_rx_init(uint16_t port_id, char *port_name, uint8_t rx_queue_num)
|
||||
vtune_profile_rx_init(uint16_t port_id, uint8_t rx_queue_num)
|
||||
{
|
||||
uint16_t q_id;
|
||||
|
||||
for (q_id = 0; q_id < rx_queue_num; ++q_id) {
|
||||
char domain_name[ITT_MAX_NAME_LEN];
|
||||
|
||||
snprintf(domain_name, sizeof(domain_name),
|
||||
"RXBurst.WastedIterations.Port_%s.Queue_%d",
|
||||
port_name, q_id);
|
||||
itt_rx_data[port_id].domains[q_id]
|
||||
= __itt_domain_create(domain_name);
|
||||
|
||||
char task_name[ITT_MAX_NAME_LEN];
|
||||
|
||||
snprintf(task_name, sizeof(task_name),
|
||||
"port id: %d; queue id: %d",
|
||||
port_id, q_id);
|
||||
itt_rx_data[port_id].handles[q_id]
|
||||
= __itt_string_handle_create(task_name);
|
||||
|
||||
itt_rx_data[port_id].queue_state[q_id] = 0;
|
||||
|
||||
if (!rte_eth_add_rx_callback(
|
||||
port_id, q_id, collect_itt_rx_burst_cb, NULL)) {
|
||||
port_id, q_id, profile_hook_rx_burst_cb, NULL)) {
|
||||
return -rte_errno;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif /* RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS */
|
||||
#endif /* RTE_ETHDEV_PROFILE_WITH_VTUNE */
|
||||
|
||||
int
|
||||
__rte_eth_profile_rx_init(__rte_unused uint16_t port_id,
|
||||
__rte_eth_dev_profile_init(__rte_unused uint16_t port_id,
|
||||
__rte_unused struct rte_eth_dev *dev)
|
||||
{
|
||||
#ifdef RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS
|
||||
return itt_profile_rx_init(
|
||||
port_id, dev->data->name, dev->data->nb_rx_queues);
|
||||
#ifdef RTE_ETHDEV_PROFILE_WITH_VTUNE
|
||||
return vtune_profile_rx_init(port_id, dev->data->nb_rx_queues);
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* SPDX-License-Identifier: BSD-3-Clause
|
||||
* Copyright(c) 2010-2017 Intel Corporation
|
||||
* Copyright(c) 2010-2018 Intel Corporation
|
||||
*/
|
||||
|
||||
#ifndef _RTE_ETHDEV_PROFILE_H_
|
||||
@ -8,7 +8,7 @@
|
||||
#include "rte_ethdev.h"
|
||||
|
||||
/**
|
||||
* Initialization of profiling RX queues for the Ethernet device.
|
||||
* Initialization of the Ethernet device profiling.
|
||||
* Implementation of this function depends on chosen profiling method,
|
||||
* defined in configs.
|
||||
*
|
||||
@ -22,6 +22,6 @@
|
||||
* - On failure, a negative value.
|
||||
*/
|
||||
int
|
||||
__rte_eth_profile_rx_init(uint16_t port_id, struct rte_eth_dev *dev);
|
||||
__rte_eth_dev_profile_init(uint16_t port_id, struct rte_eth_dev *dev);
|
||||
|
||||
#endif
|
||||
|
@ -1191,9 +1191,9 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
|
||||
}
|
||||
|
||||
/* Initialize Rx profiling if enabled at compilation time. */
|
||||
diag = __rte_eth_profile_rx_init(port_id, dev);
|
||||
diag = __rte_eth_dev_profile_init(port_id, dev);
|
||||
if (diag != 0) {
|
||||
RTE_ETHDEV_LOG(ERR, "Port%u __rte_eth_profile_rx_init = %d\n",
|
||||
RTE_ETHDEV_LOG(ERR, "Port%u __rte_eth_dev_profile_init = %d\n",
|
||||
port_id, diag);
|
||||
rte_eth_dev_rx_queue_config(dev, 0);
|
||||
rte_eth_dev_tx_queue_config(dev, 0);
|
||||
|
Loading…
x
Reference in New Issue
Block a user