99a2dd955f
There is no reason for the DPDK libraries to all have 'librte_' prefix on the directory names. This prefix makes the directory names longer and also makes it awkward to add features referring to individual libraries in the build - should the lib names be specified with or without the prefix. Therefore, we can just remove the library prefix and use the library's unique name as the directory name, i.e. 'eal' rather than 'librte_eal' Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
178 lines
5.4 KiB
C
178 lines
5.4 KiB
C
/* SPDX-License-Identifier: BSD-3-Clause
|
|
* Copyright(c) 2017 Intel Corporation
|
|
*/
|
|
|
|
#ifndef _DIST_PRIV_H_
|
|
#define _DIST_PRIV_H_
|
|
|
|
/**
|
|
* @file
|
|
* RTE distributor
|
|
*
|
|
* The distributor is a component which is designed to pass packets
|
|
* one-at-a-time to workers, with dynamic load balancing.
|
|
*/
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
#define NO_FLAGS 0
|
|
#define RTE_DISTRIB_PREFIX "DT_"
|
|
|
|
/*
|
|
* We will use the bottom four bits of pointer for flags, shifting out
|
|
* the top four bits to make room (since a 64-bit pointer actually only uses
|
|
* 48 bits). An arithmetic-right-shift will then appropriately restore the
|
|
* original pointer value with proper sign extension into the top bits.
|
|
*/
|
|
#define RTE_DISTRIB_FLAG_BITS 4
|
|
#define RTE_DISTRIB_FLAGS_MASK (0x0F)
|
|
#define RTE_DISTRIB_NO_BUF 0 /**< empty flags: no buffer requested */
|
|
#define RTE_DISTRIB_GET_BUF (1) /**< worker requests a buffer, returns old */
|
|
#define RTE_DISTRIB_RETURN_BUF (2) /**< worker returns a buffer, no request */
|
|
#define RTE_DISTRIB_VALID_BUF (4) /**< set if bufptr contains ptr */
|
|
|
|
#define RTE_DISTRIB_BACKLOG_SIZE 8
|
|
#define RTE_DISTRIB_BACKLOG_MASK (RTE_DISTRIB_BACKLOG_SIZE - 1)
|
|
|
|
#define RTE_DISTRIB_MAX_RETURNS 128
|
|
#define RTE_DISTRIB_RETURNS_MASK (RTE_DISTRIB_MAX_RETURNS - 1)
|
|
|
|
/**
|
|
* Maximum number of workers allowed.
|
|
* Be aware of increasing the limit, because it is limited by how we track
|
|
* in-flight tags. See in_flight_bitmask and rte_distributor_process
|
|
*/
|
|
#define RTE_DISTRIB_MAX_WORKERS 64
|
|
|
|
#define RTE_DISTRIBUTOR_NAMESIZE 32 /**< Length of name for instance */
|
|
|
|
/**
|
|
* Buffer structure used to pass the pointer data between cores. This is cache
|
|
* line aligned, but to improve performance and prevent adjacent cache-line
|
|
* prefetches of buffers for other workers, e.g. when worker 1's buffer is on
|
|
* the next cache line to worker 0, we pad this out to three cache lines.
|
|
* Only 64-bits of the memory is actually used though.
|
|
*/
|
|
union rte_distributor_buffer_single {
|
|
volatile int64_t bufptr64;
|
|
char pad[RTE_CACHE_LINE_SIZE*3];
|
|
} __rte_cache_aligned;
|
|
|
|
/*
|
|
* Transfer up to 8 mbufs at a time to/from workers, and
|
|
* flow matching algorithm optimized for 8 flow IDs at a time
|
|
*/
|
|
#define RTE_DIST_BURST_SIZE 8
|
|
|
|
struct rte_distributor_backlog {
|
|
unsigned int start;
|
|
unsigned int count;
|
|
int64_t pkts[RTE_DIST_BURST_SIZE] __rte_cache_aligned;
|
|
uint16_t *tags; /* will point to second cacheline of inflights */
|
|
} __rte_cache_aligned;
|
|
|
|
|
|
struct rte_distributor_returned_pkts {
|
|
unsigned int start;
|
|
unsigned int count;
|
|
struct rte_mbuf *mbufs[RTE_DISTRIB_MAX_RETURNS];
|
|
};
|
|
|
|
struct rte_distributor_single {
|
|
TAILQ_ENTRY(rte_distributor_single) next; /**< Next in list. */
|
|
|
|
char name[RTE_DISTRIBUTOR_NAMESIZE]; /**< Name of the ring. */
|
|
unsigned int num_workers; /**< Number of workers polling */
|
|
|
|
uint32_t in_flight_tags[RTE_DISTRIB_MAX_WORKERS];
|
|
/**< Tracks the tag being processed per core */
|
|
uint64_t in_flight_bitmask;
|
|
/**< on/off bits for in-flight tags.
|
|
* Note that if RTE_DISTRIB_MAX_WORKERS is larger than 64 then
|
|
* the bitmask has to expand.
|
|
*/
|
|
|
|
struct rte_distributor_backlog backlog[RTE_DISTRIB_MAX_WORKERS];
|
|
|
|
union rte_distributor_buffer_single bufs[RTE_DISTRIB_MAX_WORKERS];
|
|
|
|
struct rte_distributor_returned_pkts returns;
|
|
};
|
|
|
|
/* All different signature compare functions */
|
|
enum rte_distributor_match_function {
|
|
RTE_DIST_MATCH_SCALAR = 0,
|
|
RTE_DIST_MATCH_VECTOR,
|
|
RTE_DIST_NUM_MATCH_FNS
|
|
};
|
|
|
|
/**
|
|
* Buffer structure used to pass the pointer data between cores. This is cache
|
|
* line aligned, but to improve performance and prevent adjacent cache-line
|
|
* prefetches of buffers for other workers, e.g. when worker 1's buffer is on
|
|
* the next cache line to worker 0, we pad this out to two cache lines.
|
|
* We can pass up to 8 mbufs at a time in one cacheline.
|
|
* There is a separate cacheline for returns in the burst API.
|
|
*/
|
|
struct rte_distributor_buffer {
|
|
volatile int64_t bufptr64[RTE_DIST_BURST_SIZE]
|
|
__rte_cache_aligned; /* <= outgoing to worker */
|
|
|
|
int64_t pad1 __rte_cache_aligned; /* <= one cache line */
|
|
|
|
volatile int64_t retptr64[RTE_DIST_BURST_SIZE]
|
|
__rte_cache_aligned; /* <= incoming from worker */
|
|
|
|
int64_t pad2 __rte_cache_aligned; /* <= one cache line */
|
|
|
|
int count __rte_cache_aligned; /* <= number of current mbufs */
|
|
};
|
|
|
|
struct rte_distributor {
|
|
TAILQ_ENTRY(rte_distributor) next; /**< Next in list. */
|
|
|
|
char name[RTE_DISTRIBUTOR_NAMESIZE]; /**< Name of the ring. */
|
|
unsigned int num_workers; /**< Number of workers polling */
|
|
unsigned int alg_type; /**< Number of alg types */
|
|
|
|
/**>
|
|
* First cache line in the this array are the tags inflight
|
|
* on the worker core. Second cache line are the backlog
|
|
* that are going to go to the worker core.
|
|
*/
|
|
uint16_t in_flight_tags[RTE_DISTRIB_MAX_WORKERS][RTE_DIST_BURST_SIZE*2]
|
|
__rte_cache_aligned;
|
|
|
|
struct rte_distributor_backlog backlog[RTE_DISTRIB_MAX_WORKERS]
|
|
__rte_cache_aligned;
|
|
|
|
struct rte_distributor_buffer bufs[RTE_DISTRIB_MAX_WORKERS];
|
|
|
|
struct rte_distributor_returned_pkts returns;
|
|
|
|
enum rte_distributor_match_function dist_match_fn;
|
|
|
|
struct rte_distributor_single *d_single;
|
|
|
|
uint8_t active[RTE_DISTRIB_MAX_WORKERS];
|
|
uint8_t activesum;
|
|
};
|
|
|
|
void
|
|
find_match_scalar(struct rte_distributor *d,
|
|
uint16_t *data_ptr,
|
|
uint16_t *output_ptr);
|
|
|
|
void
|
|
find_match_vec(struct rte_distributor *d,
|
|
uint16_t *data_ptr,
|
|
uint16_t *output_ptr);
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|
|
|
|
#endif /* _DIST_PRIV_H_ */
|