numam-dpdk/drivers/event/dsw/dsw_evdev.h

308 lines
9.8 KiB
C
Raw Normal View History

/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(c) 2018 Ericsson AB
*/
#ifndef _DSW_EVDEV_H_
#define _DSW_EVDEV_H_
#include <eventdev_pmd.h>
#include <rte_event_ring.h>
#include <rte_eventdev.h>
#define DSW_PMD_NAME RTE_STR(event_dsw)
#define DSW_MAX_PORTS (64)
#define DSW_MAX_PORT_DEQUEUE_DEPTH (128)
#define DSW_MAX_PORT_ENQUEUE_DEPTH (128)
#define DSW_MAX_PORT_OUT_BUFFER (32)
#define DSW_MAX_QUEUES (16)
#define DSW_MAX_EVENTS (16384)
/* Multiple 24-bit flow ids will map to the same DSW-level flow. The
* number of DSW flows should be high enough make it unlikely that
* flow ids of several large flows hash to the same DSW-level flow.
* Such collisions will limit parallelism and thus the number of cores
* that may be utilized. However, configuring a large number of DSW
* flows might potentially, depending on traffic and actual
* application flow id value range, result in each such DSW-level flow
* being very small. The effect of migrating such flows will be small,
* in terms amount of processing load redistributed. This will in turn
* reduce the load balancing speed, since flow migration rate has an
* upper limit. Code changes are required to allow > 32k DSW-level
* flows.
*/
#define DSW_MAX_FLOWS_BITS (13)
#define DSW_MAX_FLOWS (1<<(DSW_MAX_FLOWS_BITS))
#define DSW_MAX_FLOWS_MASK (DSW_MAX_FLOWS-1)
/* Eventdev RTE_SCHED_TYPE_PARALLEL doesn't have a concept of flows,
* but the 'dsw' scheduler (more or less) randomly assign flow id to
* events on parallel queues, to be able to reuse some of the
* migration mechanism and scheduling logic from
* RTE_SCHED_TYPE_ATOMIC. By moving one of the parallel "flows" from a
* particular port, the likely-hood of events being scheduled to this
* port is reduced, and thus a kind of statistical load balancing is
* achieved.
*/
#define DSW_PARALLEL_FLOWS (1024)
/* 'Background tasks' are polling the control rings for *
* migration-related messages, or flush the output buffer (so
* buffered events doesn't linger too long). Shouldn't be too low,
* since the system won't benefit from the 'batching' effects from
* the output buffer, and shouldn't be too high, since it will make
* buffered events linger too long in case the port goes idle.
*/
#define DSW_MAX_PORT_OPS_PER_BG_TASK (128)
/* Avoid making small 'loans' from the central in-flight event credit
* pool, to improve efficiency.
*/
#define DSW_MIN_CREDIT_LOAN (64)
#define DSW_PORT_MAX_CREDITS (2*DSW_MIN_CREDIT_LOAN)
#define DSW_PORT_MIN_CREDITS (DSW_MIN_CREDIT_LOAN)
/* The rings are dimensioned so that all in-flight events can reside
* on any one of the port rings, to avoid the trouble of having to
* care about the case where there's no room on the destination port's
* input ring.
*/
#define DSW_IN_RING_SIZE (DSW_MAX_EVENTS)
#define DSW_MAX_LOAD (INT16_MAX)
#define DSW_LOAD_FROM_PERCENT(x) ((int16_t)(((x)*DSW_MAX_LOAD)/100))
#define DSW_LOAD_TO_PERCENT(x) ((100*x)/DSW_MAX_LOAD)
/* The thought behind keeping the load update interval shorter than
* the migration interval is that the load from newly migrated flows
* should 'show up' on the load measurement before new migrations are
* considered. This is to avoid having too many flows, from too many
* source ports, to be migrated too quickly to a lightly loaded port -
* in particular since this might cause the system to oscillate.
*/
#define DSW_LOAD_UPDATE_INTERVAL (DSW_MIGRATION_INTERVAL/4)
#define DSW_OLD_LOAD_WEIGHT (1)
/* The minimum time (in us) between two flow migrations. What puts an
* upper limit on the actual migration rate is primarily the pace in
* which the ports send and receive control messages, which in turn is
* largely a function of how much cycles are spent the processing of
* an event burst.
*/
#define DSW_MIGRATION_INTERVAL (1000)
#define DSW_MIN_SOURCE_LOAD_FOR_MIGRATION (DSW_LOAD_FROM_PERCENT(70))
#define DSW_MAX_TARGET_LOAD_FOR_MIGRATION (DSW_LOAD_FROM_PERCENT(95))
#define DSW_REBALANCE_THRESHOLD (DSW_LOAD_FROM_PERCENT(3))
#define DSW_MAX_EVENTS_RECORDED (128)
#define DSW_MAX_FLOWS_PER_MIGRATION (8)
/* Only one outstanding migration per port is allowed */
#define DSW_MAX_PAUSED_FLOWS (DSW_MAX_PORTS*DSW_MAX_FLOWS_PER_MIGRATION)
/* Enough room for pause request/confirm and unpaus request/confirm for
* all possible senders.
*/
#define DSW_CTL_IN_RING_SIZE ((DSW_MAX_PORTS-1)*4)
/* With DSW_SORT_DEQUEUED enabled, the scheduler will, at the point of
* dequeue(), arrange events so that events with the same flow id on
* the same queue forms a back-to-back "burst", and also so that such
* bursts of different flow ids, but on the same queue, also come
* consecutively. All this in an attempt to improve data and
* instruction cache usage for the application, at the cost of a
* scheduler overhead increase.
*/
/* #define DSW_SORT_DEQUEUED */
struct dsw_queue_flow {
uint8_t queue_id;
uint16_t flow_hash;
};
enum dsw_migration_state {
DSW_MIGRATION_STATE_IDLE,
DSW_MIGRATION_STATE_PAUSING,
DSW_MIGRATION_STATE_UNPAUSING
};
struct dsw_port {
uint16_t id;
/* Keeping a pointer here to avoid container_of() calls, which
* are expensive since they are very frequent and will result
* in an integer multiplication (since the port id is an index
* into the dsw_evdev port array).
*/
struct dsw_evdev *dsw;
uint16_t dequeue_depth;
uint16_t enqueue_depth;
int32_t inflight_credits;
int32_t new_event_threshold;
uint16_t pending_releases;
uint16_t next_parallel_flow_id;
uint16_t ops_since_bg_task;
/* most recent 'background' processing */
uint64_t last_bg;
/* For port load measurement. */
uint64_t next_load_update;
uint64_t load_update_interval;
uint64_t measurement_start;
uint64_t busy_start;
uint64_t busy_cycles;
uint64_t total_busy_cycles;
/* For the ctl interface and flow migration mechanism. */
uint64_t next_emigration;
uint64_t migration_interval;
enum dsw_migration_state migration_state;
uint64_t emigration_start;
uint64_t emigrations;
uint64_t emigration_latency;
uint8_t emigration_target_port_ids[DSW_MAX_FLOWS_PER_MIGRATION];
struct dsw_queue_flow
emigration_target_qfs[DSW_MAX_FLOWS_PER_MIGRATION];
uint8_t emigration_targets_len;
uint8_t cfm_cnt;
uint64_t immigrations;
uint16_t paused_flows_len;
struct dsw_queue_flow paused_flows[DSW_MAX_PAUSED_FLOWS];
/* In a very contrived worst case all inflight events can be
* laying around paused here.
*/
uint16_t paused_events_len;
struct rte_event paused_events[DSW_MAX_EVENTS];
event/dsw: fix flow migration Fix bug in flow migration, which under certain conditions causes reordering and violation of atomicity guarantees. The issue occurs when the processing of a flow (on an atomic queue) has resulted in events enqueued to a flow currently being migrated, and the former (producer) flow is also selected for migration. The events are buffered ("paused") on the originating port, and released (forwarded) when the migration has completed. However, at the time of "unpausing" the latter (consumer) flow, processing of the producer flow on the port to which it was migrated may have already produced events, for the same paused flow. This constitutes a race condition, and depending on which port wins, reordering may have been introduced. This patch forbids migration when a port has paused events, since those events may have been the result of processing a to-be-migrated flow. This patch also disallows processing events pertaining to a flow under migration, for the same reason. A new buffer is introduced, which holds such not-yet-processed events dequeued from the port's input ring. Such events are forwarded to the target port as a part of the migration process. The 'forwarding' migration state is eliminated, and instead background processing is only performed if there are no unreleased events on the port. The bug is primarily triggered in situations where multiple flows are migrated as one transaction, but may occur even if only a single flow is migrated (e.g., with older DSW versions, which does not support multi-flow migration). Fixes: f6257b22e767 ("event/dsw: add load balancing") Cc: stable@dpdk.org Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
2022-07-07 11:43:25 +00:00
uint16_t emigrating_events_len;
/* Buffer for not-yet-processed events pertaining to a flow
* emigrating from this port. These events will be forwarded
* to the target port.
*/
struct rte_event emigrating_events[DSW_MAX_EVENTS];
uint16_t seen_events_len;
uint16_t seen_events_idx;
struct dsw_queue_flow seen_events[DSW_MAX_EVENTS_RECORDED];
uint64_t enqueue_calls;
uint64_t new_enqueued;
uint64_t forward_enqueued;
uint64_t release_enqueued;
uint64_t queue_enqueued[DSW_MAX_QUEUES];
uint64_t dequeue_calls;
uint64_t dequeued;
uint64_t queue_dequeued[DSW_MAX_QUEUES];
uint16_t out_buffer_len[DSW_MAX_PORTS];
struct rte_event out_buffer[DSW_MAX_PORTS][DSW_MAX_PORT_OUT_BUFFER];
uint16_t in_buffer_len;
uint16_t in_buffer_start;
/* This buffer may contain events that were read up from the
* in_ring during the flow migration process.
*/
struct rte_event in_buffer[DSW_MAX_EVENTS];
struct rte_event_ring *in_ring __rte_cache_aligned;
struct rte_ring *ctl_in_ring __rte_cache_aligned;
/* Estimate of current port load. */
int16_t load __rte_cache_aligned;
/* Estimate of flows currently migrating to this port. */
int32_t immigration_load __rte_cache_aligned;
} __rte_cache_aligned;
struct dsw_queue {
uint8_t schedule_type;
uint8_t serving_ports[DSW_MAX_PORTS];
uint16_t num_serving_ports;
uint8_t flow_to_port_map[DSW_MAX_FLOWS] __rte_cache_aligned;
};
struct dsw_evdev {
struct rte_eventdev_data *data;
struct dsw_port ports[DSW_MAX_PORTS];
uint16_t num_ports;
struct dsw_queue queues[DSW_MAX_QUEUES];
uint8_t num_queues;
int32_t max_inflight;
int32_t credits_on_loan __rte_cache_aligned;
};
#define DSW_CTL_PAUS_REQ (0)
#define DSW_CTL_UNPAUS_REQ (1)
#define DSW_CTL_CFM (2)
struct dsw_ctl_msg {
uint8_t type;
uint8_t originating_port_id;
uint8_t qfs_len;
struct dsw_queue_flow qfs[DSW_MAX_FLOWS_PER_MIGRATION];
} __rte_aligned(4);
uint16_t dsw_event_enqueue(void *port, const struct rte_event *event);
uint16_t dsw_event_enqueue_burst(void *port,
const struct rte_event events[],
uint16_t events_len);
uint16_t dsw_event_enqueue_new_burst(void *port,
const struct rte_event events[],
uint16_t events_len);
uint16_t dsw_event_enqueue_forward_burst(void *port,
const struct rte_event events[],
uint16_t events_len);
uint16_t dsw_event_dequeue(void *port, struct rte_event *ev, uint64_t wait);
uint16_t dsw_event_dequeue_burst(void *port, struct rte_event *events,
uint16_t num, uint64_t wait);
void dsw_event_maintain(void *port, int op);
int dsw_xstats_get_names(const struct rte_eventdev *dev,
enum rte_event_dev_xstats_mode mode,
uint8_t queue_port_id,
struct rte_event_dev_xstats_name *xstats_names,
unsigned int *ids, unsigned int size);
int dsw_xstats_get(const struct rte_eventdev *dev,
enum rte_event_dev_xstats_mode mode, uint8_t queue_port_id,
const unsigned int ids[], uint64_t values[], unsigned int n);
uint64_t dsw_xstats_get_by_name(const struct rte_eventdev *dev,
const char *name, unsigned int *id);
static inline struct dsw_evdev *
dsw_pmd_priv(const struct rte_eventdev *eventdev)
{
return eventdev->data->dev_private;
}
#define DSW_LOG_DP(level, fmt, args...) \
RTE_LOG_DP(level, EVENTDEV, "[%s] %s() line %u: " fmt, \
DSW_PMD_NAME, \
__func__, __LINE__, ## args)
#define DSW_LOG_DP_PORT(level, port_id, fmt, args...) \
DSW_LOG_DP(level, "<Port %d> " fmt, port_id, ## args)
#endif