event/dlb: add token pop API

The PMD uses a public interface to allow applications to
control the token pop mode. Supported token pop modes are
as follows, and they impact core scheduling affinity for
ldb ports.

AUTO_POP: Pop the CQ tokens immediately after dequeueing.
DELAYED_POP: Pop CQ tokens after (dequeue_depth - 1) events
	     are released. Supported on load-balanced ports
	     only.
DEFERRED_POP: Pop the CQ tokens during next dequeue operation.

Signed-off-by: Timothy McDaniel <timothy.mcdaniel@intel.com>
Reviewed-by: Gage Eads <gage.eads@intel.com>
This commit is contained in:
Timothy McDaniel 2020-11-01 17:30:12 -06:00 committed by Jerin Jacob
parent f007362194
commit b287267d62
8 changed files with 308 additions and 10 deletions

View File

@ -53,6 +53,7 @@ The public API headers are grouped by topics:
[dpaa2_cmdif] (@ref rte_pmd_dpaa2_cmdif.h),
[dpaa2_qdma] (@ref rte_pmd_dpaa2_qdma.h),
[crypto_scheduler] (@ref rte_cryptodev_scheduler.h),
[dlb] (@ref rte_pmd_dlb.h),
[dlb2] (@ref rte_pmd_dlb2.h)
- **memory**:

View File

@ -7,6 +7,7 @@ USE_MDFILE_AS_MAINPAGE = @TOPDIR@/doc/api/doxy-api-index.md
INPUT = @TOPDIR@/doc/api/doxy-api-index.md \
@TOPDIR@/drivers/bus/vdev \
@TOPDIR@/drivers/crypto/scheduler \
@TOPDIR@/drivers/event/dlb \
@TOPDIR@/drivers/event/dlb2 \
@TOPDIR@/drivers/mempool/dpaa2 \
@TOPDIR@/drivers/net/ark \

View File

@ -72,6 +72,25 @@ static struct rte_event_dev_info evdev_dlb_default_info = {
struct process_local_port_data
dlb_port[DLB_MAX_NUM_PORTS][NUM_DLB_PORT_TYPES];
static inline uint16_t
dlb_event_enqueue_delayed(void *event_port,
const struct rte_event events[]);
static inline uint16_t
dlb_event_enqueue_burst_delayed(void *event_port,
const struct rte_event events[],
uint16_t num);
static inline uint16_t
dlb_event_enqueue_new_burst_delayed(void *event_port,
const struct rte_event events[],
uint16_t num);
static inline uint16_t
dlb_event_enqueue_forward_burst_delayed(void *event_port,
const struct rte_event events[],
uint16_t num);
static int
dlb_hw_query_resources(struct dlb_eventdev *dlb)
{
@ -1003,6 +1022,33 @@ dlb_hw_create_ldb_port(struct dlb_eventdev *dlb,
qm_port->dequeue_depth = dequeue_depth;
/* When using the reserved token scheme, token_pop_thresh is
* initially 2 * dequeue_depth. Once the tokens are reserved,
* the enqueue code re-assigns it to dequeue_depth.
*/
qm_port->token_pop_thresh = cq_depth;
/* When the deferred scheduling vdev arg is selected, use deferred pop
* for all single-entry CQs.
*/
if (cfg.cq_depth == 1 || (cfg.cq_depth == 2 && use_rsvd_token_scheme)) {
if (dlb->defer_sched)
qm_port->token_pop_mode = DEFERRED_POP;
}
/* The default enqueue functions do not include delayed-pop support for
* performance reasons.
*/
if (qm_port->token_pop_mode == DELAYED_POP) {
dlb->event_dev->enqueue = dlb_event_enqueue_delayed;
dlb->event_dev->enqueue_burst =
dlb_event_enqueue_burst_delayed;
dlb->event_dev->enqueue_new_burst =
dlb_event_enqueue_new_burst_delayed;
dlb->event_dev->enqueue_forward_burst =
dlb_event_enqueue_forward_burst_delayed;
}
qm_port->owed_tokens = 0;
qm_port->issued_releases = 0;
@ -1163,6 +1209,8 @@ dlb_hw_create_dir_port(struct dlb_eventdev *dlb,
qm_port->dequeue_depth = dequeue_depth;
/* Directed ports are auto-pop, by default. */
qm_port->token_pop_mode = AUTO_POP;
qm_port->owed_tokens = 0;
qm_port->issued_releases = 0;
@ -2572,6 +2620,30 @@ dlb_event_build_hcws(struct dlb_port *qm_port,
}
}
static inline void
dlb_construct_token_pop_qe(struct dlb_port *qm_port, int idx)
{
struct dlb_cq_pop_qe *qe = (void *)qm_port->qe4;
int num = qm_port->owed_tokens;
if (qm_port->use_rsvd_token_scheme) {
/* Check if there's a deficit of reserved tokens, and return
* early if there are no (unreserved) tokens to consume.
*/
if (num <= qm_port->cq_rsvd_token_deficit) {
qm_port->cq_rsvd_token_deficit -= num;
qm_port->owed_tokens = 0;
return;
}
num -= qm_port->cq_rsvd_token_deficit;
qm_port->cq_rsvd_token_deficit = 0;
}
qe[idx].cmd_byte = DLB_POP_CMD_BYTE;
qe[idx].tokens = num - 1;
qm_port->owed_tokens = 0;
}
static __rte_always_inline void
dlb_pp_write(struct dlb_enqueue_qe *qe4,
struct process_local_port_data *port_data)
@ -2638,7 +2710,8 @@ dlb_consume_qe_immediate(struct dlb_port *qm_port, int num)
static inline uint16_t
__dlb_event_enqueue_burst(void *event_port,
const struct rte_event events[],
uint16_t num)
uint16_t num,
bool use_delayed)
{
struct dlb_eventdev_port *ev_port = event_port;
struct dlb_port *qm_port = &ev_port->qm_port;
@ -2666,6 +2739,35 @@ __dlb_event_enqueue_burst(void *event_port,
for (; j < DLB_NUM_QES_PER_CACHE_LINE && (i + j) < num; j++) {
const struct rte_event *ev = &events[i + j];
int16_t thresh = qm_port->token_pop_thresh;
if (use_delayed &&
qm_port->token_pop_mode == DELAYED_POP &&
(ev->op == RTE_EVENT_OP_FORWARD ||
ev->op == RTE_EVENT_OP_RELEASE) &&
qm_port->issued_releases >= thresh - 1) {
/* Insert the token pop QE and break out. This
* may result in a partial HCW, but that is
* simpler than supporting arbitrary QE
* insertion.
*/
dlb_construct_token_pop_qe(qm_port, j);
/* Reset the releases for the next QE batch */
qm_port->issued_releases -= thresh;
/* When using delayed token pop mode, the
* initial token threshold is the full CQ
* depth. After the first token pop, we need to
* reset it to the dequeue_depth.
*/
qm_port->token_pop_thresh =
qm_port->dequeue_depth;
pop_offs = 1;
j++;
break;
}
if (dlb_event_enqueue_prep(ev_port, qm_port, ev,
port_data, &sched_types[j],
@ -2701,14 +2803,29 @@ dlb_event_enqueue_burst(void *event_port,
const struct rte_event events[],
uint16_t num)
{
return __dlb_event_enqueue_burst(event_port, events, num);
return __dlb_event_enqueue_burst(event_port, events, num, false);
}
static inline uint16_t
dlb_event_enqueue_burst_delayed(void *event_port,
const struct rte_event events[],
uint16_t num)
{
return __dlb_event_enqueue_burst(event_port, events, num, true);
}
static inline uint16_t
dlb_event_enqueue(void *event_port,
const struct rte_event events[])
{
return __dlb_event_enqueue_burst(event_port, events, 1);
return __dlb_event_enqueue_burst(event_port, events, 1, false);
}
static inline uint16_t
dlb_event_enqueue_delayed(void *event_port,
const struct rte_event events[])
{
return __dlb_event_enqueue_burst(event_port, events, 1, true);
}
static uint16_t
@ -2716,7 +2833,15 @@ dlb_event_enqueue_new_burst(void *event_port,
const struct rte_event events[],
uint16_t num)
{
return __dlb_event_enqueue_burst(event_port, events, num);
return __dlb_event_enqueue_burst(event_port, events, num, false);
}
static uint16_t
dlb_event_enqueue_new_burst_delayed(void *event_port,
const struct rte_event events[],
uint16_t num)
{
return __dlb_event_enqueue_burst(event_port, events, num, true);
}
static uint16_t
@ -2724,7 +2849,15 @@ dlb_event_enqueue_forward_burst(void *event_port,
const struct rte_event events[],
uint16_t num)
{
return __dlb_event_enqueue_burst(event_port, events, num);
return __dlb_event_enqueue_burst(event_port, events, num, false);
}
static uint16_t
dlb_event_enqueue_forward_burst_delayed(void *event_port,
const struct rte_event events[],
uint16_t num)
{
return __dlb_event_enqueue_burst(event_port, events, num, true);
}
static __rte_always_inline int
@ -3124,7 +3257,8 @@ dlb_hw_dequeue(struct dlb_eventdev *dlb,
qm_port->owed_tokens += num;
dlb_consume_qe_immediate(qm_port, num);
if (num && qm_port->token_pop_mode == AUTO_POP)
dlb_consume_qe_immediate(qm_port, num);
ev_port->outstanding_releases += num;
@ -3249,7 +3383,8 @@ dlb_hw_dequeue_sparse(struct dlb_eventdev *dlb,
qm_port->owed_tokens += num;
dlb_consume_qe_immediate(qm_port, num);
if (num && qm_port->token_pop_mode == AUTO_POP)
dlb_consume_qe_immediate(qm_port, num);
ev_port->outstanding_releases += num;
@ -3293,6 +3428,28 @@ dlb_event_release(struct dlb_eventdev *dlb, uint8_t port_id, int n)
qm_port->qe4[3].cmd_byte = 0;
for (; j < DLB_NUM_QES_PER_CACHE_LINE && (i + j) < n; j++) {
int16_t thresh = qm_port->token_pop_thresh;
if (qm_port->token_pop_mode == DELAYED_POP &&
qm_port->issued_releases >= thresh - 1) {
/* Insert the token pop QE */
dlb_construct_token_pop_qe(qm_port, j);
/* Reset the releases for the next QE batch */
qm_port->issued_releases -= thresh;
/* When using delayed token pop mode, the
* initial token threshold is the full CQ
* depth. After the first token pop, we need to
* reset it to the dequeue_depth.
*/
qm_port->token_pop_thresh =
qm_port->dequeue_depth;
pop_offs = 1;
j++;
break;
}
qm_port->qe4[j].cmd_byte = DLB_COMP_CMD_BYTE;
qm_port->issued_releases++;
@ -3325,6 +3482,7 @@ dlb_event_dequeue_burst(void *event_port, struct rte_event *ev, uint16_t num,
uint64_t wait)
{
struct dlb_eventdev_port *ev_port = event_port;
struct dlb_port *qm_port = &ev_port->qm_port;
struct dlb_eventdev *dlb = ev_port->dlb;
uint16_t cnt;
int ret;
@ -3344,6 +3502,10 @@ dlb_event_dequeue_burst(void *event_port, struct rte_event *ev, uint16_t num,
DLB_INC_STAT(ev_port->stats.tx_implicit_rel, out_rels);
}
if (qm_port->token_pop_mode == DEFERRED_POP &&
qm_port->owed_tokens)
dlb_consume_qe_immediate(qm_port, qm_port->owed_tokens);
cnt = dlb_hw_dequeue(dlb, ev_port, ev, num, wait);
DLB_INC_STAT(ev_port->stats.traffic.total_polls, 1);
@ -3362,6 +3524,7 @@ dlb_event_dequeue_burst_sparse(void *event_port, struct rte_event *ev,
uint16_t num, uint64_t wait)
{
struct dlb_eventdev_port *ev_port = event_port;
struct dlb_port *qm_port = &ev_port->qm_port;
struct dlb_eventdev *dlb = ev_port->dlb;
uint16_t cnt;
int ret;
@ -3381,6 +3544,10 @@ dlb_event_dequeue_burst_sparse(void *event_port, struct rte_event *ev,
DLB_INC_STAT(ev_port->stats.tx_implicit_rel, out_rels);
}
if (qm_port->token_pop_mode == DEFERRED_POP &&
qm_port->owed_tokens)
dlb_consume_qe_immediate(qm_port, qm_port->owed_tokens);
cnt = dlb_hw_dequeue_sparse(dlb, ev_port, ev, num, wait);
DLB_INC_STAT(ev_port->stats.traffic.total_polls, 1);
@ -3687,7 +3854,7 @@ dlb_primary_eventdev_probe(struct rte_eventdev *dev,
struct dlb_devargs *dlb_args)
{
struct dlb_eventdev *dlb;
int err;
int err, i;
dlb = dev->data->dev_private;
@ -3736,6 +3903,10 @@ dlb_primary_eventdev_probe(struct rte_eventdev *dev,
return err;
}
/* Initialize each port's token pop mode */
for (i = 0; i < DLB_MAX_NUM_PORTS; i++)
dlb->ev_ports[i].qm_port.token_pop_mode = AUTO_POP;
rte_spinlock_init(&dlb->qm_instance.resource_lock);
dlb_iface_low_level_io_init(dlb);

View File

@ -16,6 +16,7 @@
#include "dlb_user.h"
#include "dlb_log.h"
#include "rte_pmd_dlb.h"
#ifndef RTE_LIBRTE_PMD_DLB_QUELL_STATS
#define DLB_INC_STAT(_stat, _incr_val) ((_stat) += _incr_val)
@ -262,6 +263,7 @@ struct dlb_port {
bool gen_bit;
uint16_t dir_credits;
uint32_t dequeue_depth;
enum dlb_token_pop_mode token_pop_mode;
int pp_mmio_base;
uint16_t cached_ldb_credits;
uint16_t ldb_pushcount_at_credit_expiry;
@ -273,6 +275,7 @@ struct dlb_port {
uint8_t cq_rsvd_token_deficit;
uint16_t owed_tokens;
int16_t issued_releases;
int16_t token_pop_thresh;
int cq_depth;
uint16_t cq_idx;
uint16_t cq_idx_unmasked;

View File

@ -12,9 +12,10 @@ sources = files('dlb.c',
'dlb_xstats.c',
'pf/dlb_main.c',
'pf/dlb_pf.c',
'pf/base/dlb_resource.c'
'pf/base/dlb_resource.c',
'rte_pmd_dlb.c',
)
headers = files()
headers = files('rte_pmd_dlb.h')
deps += ['mbuf', 'mempool', 'ring', 'pci', 'bus_pci']

View File

@ -0,0 +1,38 @@
/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(c) 2020 Intel Corporation
*/
#include "rte_eventdev.h"
#include "rte_eventdev_pmd.h"
#include "rte_pmd_dlb.h"
#include "dlb_priv.h"
#include "dlb_inline_fns.h"
int
rte_pmd_dlb_set_token_pop_mode(uint8_t dev_id,
uint8_t port_id,
enum dlb_token_pop_mode mode)
{
struct dlb_eventdev *dlb;
struct rte_eventdev *dev;
RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL);
dev = &rte_eventdevs[dev_id];
dlb = dlb_pmd_priv(dev);
if (mode >= NUM_TOKEN_POP_MODES)
return -EINVAL;
/* The event device must be configured, but not yet started */
if (!dlb->configured || dlb->run_state != DLB_RUN_STATE_STOPPED)
return -EINVAL;
/* The token pop mode must be set before configuring the port */
if (port_id >= dlb->num_ports || dlb->ev_ports[port_id].setup_done)
return -EINVAL;
dlb->ev_ports[port_id].qm_port.token_pop_mode = mode;
return 0;
}

View File

@ -0,0 +1,77 @@
/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(c) 2019-2020 Intel Corporation
*/
/*!
* @file rte_pmd_dlb.h
*
* @brief DLB PMD-specific functions
*
*/
#ifndef _RTE_PMD_DLB_H_
#define _RTE_PMD_DLB_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
/**
* @warning
* @b EXPERIMENTAL: this API may change, or be removed, without prior notice
*
* Selects the token pop mode for an DLB port.
*/
enum dlb_token_pop_mode {
/* Pop the CQ tokens immediately after dequeueing. */
AUTO_POP,
/* Pop CQ tokens after (dequeue_depth - 1) events are released.
* Supported on load-balanced ports only.
*/
DELAYED_POP,
/* Pop the CQ tokens during next dequeue operation. */
DEFERRED_POP,
/* NUM_TOKEN_POP_MODES must be last */
NUM_TOKEN_POP_MODES
};
/*!
* @warning
* @b EXPERIMENTAL: this API may change, or be removed, without prior notice
*
* Configure the token pop mode for an DLB port. By default, all ports use
* AUTO_POP. This function must be called before calling rte_event_port_setup()
* for the port, but after calling rte_event_dev_configure().
*
* @note
* The defer_sched vdev arg, which configures all load-balanced ports with
* dequeue_depth == 1 for DEFERRED_POP mode, takes precedence over this
* function.
*
* @param dev_id
* The identifier of the event device.
* @param port_id
* The identifier of the event port.
* @param mode
* The token pop mode.
*
* @return
* - 0: Success
* - EINVAL: Invalid dev_id, port_id, or mode
* - EINVAL: The DLB is not configured, is already running, or the port is
* already setup
*/
__rte_experimental
int
rte_pmd_dlb_set_token_pop_mode(uint8_t dev_id,
uint8_t port_id,
enum dlb_token_pop_mode mode);
#ifdef __cplusplus
}
#endif
#endif /* _RTE_PMD_DLB_H_ */

View File

@ -1,3 +1,9 @@
DPDK_21 {
local: *;
};
EXPERIMENTAL {
global:
rte_pmd_dlb_set_token_pop_mode;
};