event/dlb2: optimize producer port probing
For best performance, applications running on certain cores should use the DLB device locally available on the same tile along with other resources. To allocate optimal resources, probing is done for each producer port (PP) for a given CPU and the best performing ports are allocated to producers. The CPU used for probing is either the first core of producer coremask (if present) or the second core of EAL coremask. This will be extended later to probe for all CPUs in the producer coremask or EAL coremask. Producer coremask can be passed along with the BDF of the DLB devices. "-a xx:y.z,producer_coremask=<core_mask>" Applications also need to pass RTE_EVENT_PORT_CFG_HINT_PRODUCER during rte_event_port_setup() for producer ports for optimal port allocation. For optimal load balancing ports that map to one or more QIDs in common should not be in numerical sequence. The port->QID mapping is application dependent, but the driver interleaves port IDs as much as possible to reduce the likelihood of sequential ports mapping to the same QID(s). Hence, DLB uses an initial allocation of Port IDs to maximize the average distance between an ID and its immediate neighbors. Using the initialport allocation option can be passed through devarg "default_port_allocation=y(or Y)". When events are dropped by workers or consumers that use LDB ports, completions are sent which are just ENQs and may impact the latency. To address this, probing is done for LDB ports as well. Probing is done on ports per 'cos'. When default cos is used, ports will be allocated from best ports from the best 'cos', else from best ports of the specific cos. Signed-off-by: Abdullah Sevincer <abdullah.sevincer@intel.com>
This commit is contained in:
parent
edbb4c09c5
commit
8d1d9070bb
@ -414,3 +414,39 @@ Note that the weight may not exceed the maximum CQ depth.
|
|||||||
--allow ea:00.0,cq_weight=all:<weight>
|
--allow ea:00.0,cq_weight=all:<weight>
|
||||||
--allow ea:00.0,cq_weight=qidA-qidB:<weight>
|
--allow ea:00.0,cq_weight=qidA-qidB:<weight>
|
||||||
--allow ea:00.0,cq_weight=qid:<weight>
|
--allow ea:00.0,cq_weight=qid:<weight>
|
||||||
|
|
||||||
|
Producer Coremask
|
||||||
|
~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
For best performance, applications running on certain cores should use
|
||||||
|
the DLB device locally available on the same tile along with other
|
||||||
|
resources. To allocate optimal resources, probing is done for each
|
||||||
|
producer port (PP) for a given CPU and the best performing ports are
|
||||||
|
allocated to producers. The cpu used for probing is either the first
|
||||||
|
core of producer coremask (if present) or the second core of EAL
|
||||||
|
coremask. This will be extended later to probe for all CPUs in the
|
||||||
|
producer coremask or EAL coremask. Producer coremask can be passed
|
||||||
|
along with the BDF of the DLB devices.
|
||||||
|
|
||||||
|
.. code-block:: console
|
||||||
|
|
||||||
|
-a xx:y.z,producer_coremask=<core_mask>
|
||||||
|
|
||||||
|
Default LDB Port Allocation
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
For optimal load balancing ports that map to one or more QIDs in common
|
||||||
|
should not be in numerical sequence. The port->QID mapping is application
|
||||||
|
dependent, but the driver interleaves port IDs as much as possible to
|
||||||
|
reduce the likelihood of sequential ports mapping to the same QID(s).
|
||||||
|
|
||||||
|
Hence, DLB uses an initial allocation of Port IDs to maximize the
|
||||||
|
average distance between an ID and its immediate neighbors. (i.e.the
|
||||||
|
distance from 1 to 0 and to 2, the distance from 2 to 1 and to 3, etc.).
|
||||||
|
Initial port allocation option can be passed through devarg. If y (or Y)
|
||||||
|
inial port allocation will be used, otherwise initial port allocation
|
||||||
|
won't be used.
|
||||||
|
|
||||||
|
.. code-block:: console
|
||||||
|
|
||||||
|
--allow ea:00.0,default_port_allocation=<y/Y>
|
||||||
|
@ -293,6 +293,23 @@ dlb2_string_to_int(int *result, const char *str)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
set_producer_coremask(const char *key __rte_unused,
|
||||||
|
const char *value,
|
||||||
|
void *opaque)
|
||||||
|
{
|
||||||
|
const char **mask_str = opaque;
|
||||||
|
|
||||||
|
if (value == NULL || opaque == NULL) {
|
||||||
|
DLB2_LOG_ERR("NULL pointer\n");
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
*mask_str = value;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
set_numa_node(const char *key __rte_unused, const char *value, void *opaque)
|
set_numa_node(const char *key __rte_unused, const char *value, void *opaque)
|
||||||
{
|
{
|
||||||
@ -617,6 +634,26 @@ set_vector_opts_enab(const char *key __rte_unused,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
set_default_ldb_port_allocation(const char *key __rte_unused,
|
||||||
|
const char *value,
|
||||||
|
void *opaque)
|
||||||
|
{
|
||||||
|
bool *default_ldb_port_allocation = opaque;
|
||||||
|
|
||||||
|
if (value == NULL || opaque == NULL) {
|
||||||
|
DLB2_LOG_ERR("NULL pointer\n");
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((*value == 'y') || (*value == 'Y'))
|
||||||
|
*default_ldb_port_allocation = true;
|
||||||
|
else
|
||||||
|
*default_ldb_port_allocation = false;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
set_qid_depth_thresh(const char *key __rte_unused,
|
set_qid_depth_thresh(const char *key __rte_unused,
|
||||||
const char *value,
|
const char *value,
|
||||||
@ -1785,6 +1822,9 @@ dlb2_hw_create_dir_port(struct dlb2_eventdev *dlb2,
|
|||||||
} else
|
} else
|
||||||
credit_high_watermark = enqueue_depth;
|
credit_high_watermark = enqueue_depth;
|
||||||
|
|
||||||
|
if (ev_port->conf.event_port_cfg & RTE_EVENT_PORT_CFG_HINT_PRODUCER)
|
||||||
|
cfg.is_producer = 1;
|
||||||
|
|
||||||
/* Per QM values */
|
/* Per QM values */
|
||||||
|
|
||||||
ret = dlb2_iface_dir_port_create(handle, &cfg, dlb2->poll_mode);
|
ret = dlb2_iface_dir_port_create(handle, &cfg, dlb2->poll_mode);
|
||||||
@ -1979,6 +2019,10 @@ dlb2_eventdev_port_setup(struct rte_eventdev *dev,
|
|||||||
}
|
}
|
||||||
ev_port->enq_retries = port_conf->enqueue_depth / sw_credit_quanta;
|
ev_port->enq_retries = port_conf->enqueue_depth / sw_credit_quanta;
|
||||||
|
|
||||||
|
/* Save off port config for reconfig */
|
||||||
|
ev_port->conf = *port_conf;
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Create port
|
* Create port
|
||||||
*/
|
*/
|
||||||
@ -2005,9 +2049,6 @@ dlb2_eventdev_port_setup(struct rte_eventdev *dev,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Save off port config for reconfig */
|
|
||||||
ev_port->conf = *port_conf;
|
|
||||||
|
|
||||||
ev_port->id = ev_port_id;
|
ev_port->id = ev_port_id;
|
||||||
ev_port->enq_configured = true;
|
ev_port->enq_configured = true;
|
||||||
ev_port->setup_done = true;
|
ev_port->setup_done = true;
|
||||||
@ -4700,6 +4741,8 @@ dlb2_parse_params(const char *params,
|
|||||||
DLB2_CQ_WEIGHT,
|
DLB2_CQ_WEIGHT,
|
||||||
DLB2_PORT_COS,
|
DLB2_PORT_COS,
|
||||||
DLB2_COS_BW,
|
DLB2_COS_BW,
|
||||||
|
DLB2_PRODUCER_COREMASK,
|
||||||
|
DLB2_DEFAULT_LDB_PORT_ALLOCATION_ARG,
|
||||||
NULL };
|
NULL };
|
||||||
|
|
||||||
if (params != NULL && params[0] != '\0') {
|
if (params != NULL && params[0] != '\0') {
|
||||||
@ -4881,6 +4924,29 @@ dlb2_parse_params(const char *params,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
ret = rte_kvargs_process(kvlist,
|
||||||
|
DLB2_PRODUCER_COREMASK,
|
||||||
|
set_producer_coremask,
|
||||||
|
&dlb2_args->producer_coremask);
|
||||||
|
if (ret != 0) {
|
||||||
|
DLB2_LOG_ERR(
|
||||||
|
"%s: Error parsing producer coremask",
|
||||||
|
name);
|
||||||
|
rte_kvargs_free(kvlist);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = rte_kvargs_process(kvlist,
|
||||||
|
DLB2_DEFAULT_LDB_PORT_ALLOCATION_ARG,
|
||||||
|
set_default_ldb_port_allocation,
|
||||||
|
&dlb2_args->default_ldb_port_allocation);
|
||||||
|
if (ret != 0) {
|
||||||
|
DLB2_LOG_ERR("%s: Error parsing ldb default port allocation arg",
|
||||||
|
name);
|
||||||
|
rte_kvargs_free(kvlist);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
rte_kvargs_free(kvlist);
|
rte_kvargs_free(kvlist);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -51,6 +51,8 @@
|
|||||||
#define DLB2_CQ_WEIGHT "cq_weight"
|
#define DLB2_CQ_WEIGHT "cq_weight"
|
||||||
#define DLB2_PORT_COS "port_cos"
|
#define DLB2_PORT_COS "port_cos"
|
||||||
#define DLB2_COS_BW "cos_bw"
|
#define DLB2_COS_BW "cos_bw"
|
||||||
|
#define DLB2_PRODUCER_COREMASK "producer_coremask"
|
||||||
|
#define DLB2_DEFAULT_LDB_PORT_ALLOCATION_ARG "default_port_allocation"
|
||||||
|
|
||||||
/* Begin HW related defines and structs */
|
/* Begin HW related defines and structs */
|
||||||
|
|
||||||
@ -386,6 +388,7 @@ struct dlb2_port {
|
|||||||
uint16_t hw_credit_quanta;
|
uint16_t hw_credit_quanta;
|
||||||
bool use_avx512;
|
bool use_avx512;
|
||||||
uint32_t cq_weight;
|
uint32_t cq_weight;
|
||||||
|
bool is_producer; /* True if port is of type producer */
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Per-process per-port mmio and memory pointers */
|
/* Per-process per-port mmio and memory pointers */
|
||||||
@ -669,6 +672,8 @@ struct dlb2_devargs {
|
|||||||
struct dlb2_cq_weight cq_weight;
|
struct dlb2_cq_weight cq_weight;
|
||||||
struct dlb2_port_cos port_cos;
|
struct dlb2_port_cos port_cos;
|
||||||
struct dlb2_cos_bw cos_bw;
|
struct dlb2_cos_bw cos_bw;
|
||||||
|
const char *producer_coremask;
|
||||||
|
bool default_ldb_port_allocation;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* End Eventdev related defines and structs */
|
/* End Eventdev related defines and structs */
|
||||||
@ -722,6 +727,8 @@ void dlb2_event_build_hcws(struct dlb2_port *qm_port,
|
|||||||
uint8_t *sched_type,
|
uint8_t *sched_type,
|
||||||
uint8_t *queue_id);
|
uint8_t *queue_id);
|
||||||
|
|
||||||
|
/* Extern functions */
|
||||||
|
extern int rte_eal_parse_coremask(const char *coremask, int *cores);
|
||||||
|
|
||||||
/* Extern globals */
|
/* Extern globals */
|
||||||
extern struct process_local_port_data dlb2_port[][DLB2_NUM_PORT_TYPES];
|
extern struct process_local_port_data dlb2_port[][DLB2_NUM_PORT_TYPES];
|
||||||
|
@ -498,6 +498,7 @@ struct dlb2_create_dir_port_args {
|
|||||||
__u16 cq_depth;
|
__u16 cq_depth;
|
||||||
__u16 cq_depth_threshold;
|
__u16 cq_depth_threshold;
|
||||||
__s32 queue_id;
|
__s32 queue_id;
|
||||||
|
__u8 is_producer;
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -249,6 +249,7 @@ struct dlb2_hw_domain {
|
|||||||
struct dlb2_list_head avail_ldb_queues;
|
struct dlb2_list_head avail_ldb_queues;
|
||||||
struct dlb2_list_head avail_ldb_ports[DLB2_NUM_COS_DOMAINS];
|
struct dlb2_list_head avail_ldb_ports[DLB2_NUM_COS_DOMAINS];
|
||||||
struct dlb2_list_head avail_dir_pq_pairs;
|
struct dlb2_list_head avail_dir_pq_pairs;
|
||||||
|
struct dlb2_list_head rsvd_dir_pq_pairs;
|
||||||
u32 total_hist_list_entries;
|
u32 total_hist_list_entries;
|
||||||
u32 avail_hist_list_entries;
|
u32 avail_hist_list_entries;
|
||||||
u32 hist_list_entry_base;
|
u32 hist_list_entry_base;
|
||||||
@ -347,6 +348,10 @@ struct dlb2_hw {
|
|||||||
struct dlb2_function_resources vdev[DLB2_MAX_NUM_VDEVS];
|
struct dlb2_function_resources vdev[DLB2_MAX_NUM_VDEVS];
|
||||||
struct dlb2_hw_domain domains[DLB2_MAX_NUM_DOMAINS];
|
struct dlb2_hw_domain domains[DLB2_MAX_NUM_DOMAINS];
|
||||||
u8 cos_reservation[DLB2_NUM_COS_DOMAINS];
|
u8 cos_reservation[DLB2_NUM_COS_DOMAINS];
|
||||||
|
int prod_core_list[RTE_MAX_LCORE];
|
||||||
|
u8 num_prod_cores;
|
||||||
|
int dir_pp_allocations[DLB2_MAX_NUM_DIR_PORTS_V2_5];
|
||||||
|
int ldb_pp_allocations[DLB2_MAX_NUM_LDB_PORTS];
|
||||||
|
|
||||||
/* Virtualization */
|
/* Virtualization */
|
||||||
int virt_mode;
|
int virt_mode;
|
||||||
|
@ -51,6 +51,7 @@ static void dlb2_init_domain_rsrc_lists(struct dlb2_hw_domain *domain)
|
|||||||
dlb2_list_init_head(&domain->used_dir_pq_pairs);
|
dlb2_list_init_head(&domain->used_dir_pq_pairs);
|
||||||
dlb2_list_init_head(&domain->avail_ldb_queues);
|
dlb2_list_init_head(&domain->avail_ldb_queues);
|
||||||
dlb2_list_init_head(&domain->avail_dir_pq_pairs);
|
dlb2_list_init_head(&domain->avail_dir_pq_pairs);
|
||||||
|
dlb2_list_init_head(&domain->rsvd_dir_pq_pairs);
|
||||||
|
|
||||||
for (i = 0; i < DLB2_NUM_COS_DOMAINS; i++)
|
for (i = 0; i < DLB2_NUM_COS_DOMAINS; i++)
|
||||||
dlb2_list_init_head(&domain->used_ldb_ports[i]);
|
dlb2_list_init_head(&domain->used_ldb_ports[i]);
|
||||||
@ -106,8 +107,10 @@ void dlb2_resource_free(struct dlb2_hw *hw)
|
|||||||
* Return:
|
* Return:
|
||||||
* Returns 0 upon success, <0 otherwise.
|
* Returns 0 upon success, <0 otherwise.
|
||||||
*/
|
*/
|
||||||
int dlb2_resource_init(struct dlb2_hw *hw, enum dlb2_hw_ver ver)
|
int dlb2_resource_init(struct dlb2_hw *hw, enum dlb2_hw_ver ver, const void *probe_args)
|
||||||
{
|
{
|
||||||
|
const struct dlb2_devargs *args = (const struct dlb2_devargs *)probe_args;
|
||||||
|
bool ldb_port_default = args ? args->default_ldb_port_allocation : false;
|
||||||
struct dlb2_list_entry *list;
|
struct dlb2_list_entry *list;
|
||||||
unsigned int i;
|
unsigned int i;
|
||||||
int ret;
|
int ret;
|
||||||
@ -122,6 +125,7 @@ int dlb2_resource_init(struct dlb2_hw *hw, enum dlb2_hw_ver ver)
|
|||||||
* the distance from 1 to 0 and to 2, the distance from 2 to 1 and to
|
* the distance from 1 to 0 and to 2, the distance from 2 to 1 and to
|
||||||
* 3, etc.).
|
* 3, etc.).
|
||||||
*/
|
*/
|
||||||
|
|
||||||
const u8 init_ldb_port_allocation[DLB2_MAX_NUM_LDB_PORTS] = {
|
const u8 init_ldb_port_allocation[DLB2_MAX_NUM_LDB_PORTS] = {
|
||||||
0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9,
|
0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9,
|
||||||
16, 23, 30, 21, 28, 19, 26, 17, 24, 31, 22, 29, 20, 27, 18, 25,
|
16, 23, 30, 21, 28, 19, 26, 17, 24, 31, 22, 29, 20, 27, 18, 25,
|
||||||
@ -164,7 +168,10 @@ int dlb2_resource_init(struct dlb2_hw *hw, enum dlb2_hw_ver ver)
|
|||||||
int cos_id = i >> DLB2_NUM_COS_DOMAINS;
|
int cos_id = i >> DLB2_NUM_COS_DOMAINS;
|
||||||
struct dlb2_ldb_port *port;
|
struct dlb2_ldb_port *port;
|
||||||
|
|
||||||
port = &hw->rsrcs.ldb_ports[init_ldb_port_allocation[i]];
|
if (ldb_port_default == true)
|
||||||
|
port = &hw->rsrcs.ldb_ports[init_ldb_port_allocation[i]];
|
||||||
|
else
|
||||||
|
port = &hw->rsrcs.ldb_ports[hw->ldb_pp_allocations[i]];
|
||||||
|
|
||||||
dlb2_list_add(&hw->pf.avail_ldb_ports[cos_id],
|
dlb2_list_add(&hw->pf.avail_ldb_ports[cos_id],
|
||||||
&port->func_list);
|
&port->func_list);
|
||||||
@ -172,7 +179,8 @@ int dlb2_resource_init(struct dlb2_hw *hw, enum dlb2_hw_ver ver)
|
|||||||
|
|
||||||
hw->pf.num_avail_dir_pq_pairs = DLB2_MAX_NUM_DIR_PORTS(hw->ver);
|
hw->pf.num_avail_dir_pq_pairs = DLB2_MAX_NUM_DIR_PORTS(hw->ver);
|
||||||
for (i = 0; i < hw->pf.num_avail_dir_pq_pairs; i++) {
|
for (i = 0; i < hw->pf.num_avail_dir_pq_pairs; i++) {
|
||||||
list = &hw->rsrcs.dir_pq_pairs[i].func_list;
|
int index = hw->dir_pp_allocations[i];
|
||||||
|
list = &hw->rsrcs.dir_pq_pairs[index].func_list;
|
||||||
|
|
||||||
dlb2_list_add(&hw->pf.avail_dir_pq_pairs, list);
|
dlb2_list_add(&hw->pf.avail_dir_pq_pairs, list);
|
||||||
}
|
}
|
||||||
@ -592,6 +600,7 @@ static int dlb2_attach_dir_ports(struct dlb2_hw *hw,
|
|||||||
u32 num_ports,
|
u32 num_ports,
|
||||||
struct dlb2_cmd_response *resp)
|
struct dlb2_cmd_response *resp)
|
||||||
{
|
{
|
||||||
|
int num_res = hw->num_prod_cores;
|
||||||
unsigned int i;
|
unsigned int i;
|
||||||
|
|
||||||
if (rsrcs->num_avail_dir_pq_pairs < num_ports) {
|
if (rsrcs->num_avail_dir_pq_pairs < num_ports) {
|
||||||
@ -611,12 +620,19 @@ static int dlb2_attach_dir_ports(struct dlb2_hw *hw,
|
|||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (num_res) {
|
||||||
|
dlb2_list_add(&domain->rsvd_dir_pq_pairs,
|
||||||
|
&port->domain_list);
|
||||||
|
num_res--;
|
||||||
|
} else {
|
||||||
|
dlb2_list_add(&domain->avail_dir_pq_pairs,
|
||||||
|
&port->domain_list);
|
||||||
|
}
|
||||||
|
|
||||||
dlb2_list_del(&rsrcs->avail_dir_pq_pairs, &port->func_list);
|
dlb2_list_del(&rsrcs->avail_dir_pq_pairs, &port->func_list);
|
||||||
|
|
||||||
port->domain_id = domain->id;
|
port->domain_id = domain->id;
|
||||||
port->owned = true;
|
port->owned = true;
|
||||||
|
|
||||||
dlb2_list_add(&domain->avail_dir_pq_pairs, &port->domain_list);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
rsrcs->num_avail_dir_pq_pairs -= num_ports;
|
rsrcs->num_avail_dir_pq_pairs -= num_ports;
|
||||||
@ -739,6 +755,199 @@ static int dlb2_attach_ldb_queues(struct dlb2_hw *hw,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
dlb2_pp_profile(struct dlb2_hw *hw, int port, int cpu, bool is_ldb)
|
||||||
|
{
|
||||||
|
u64 cycle_start = 0ULL, cycle_end = 0ULL;
|
||||||
|
struct dlb2_hcw hcw_mem[DLB2_HCW_MEM_SIZE], *hcw;
|
||||||
|
void __iomem *pp_addr;
|
||||||
|
cpu_set_t cpuset;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
CPU_ZERO(&cpuset);
|
||||||
|
CPU_SET(cpu, &cpuset);
|
||||||
|
sched_setaffinity(0, sizeof(cpuset), &cpuset);
|
||||||
|
|
||||||
|
pp_addr = os_map_producer_port(hw, port, is_ldb);
|
||||||
|
|
||||||
|
/* Point hcw to a 64B-aligned location */
|
||||||
|
hcw = (struct dlb2_hcw *)((uintptr_t)&hcw_mem[DLB2_HCW_64B_OFF] &
|
||||||
|
~DLB2_HCW_ALIGN_MASK);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Program the first HCW for a completion and token return and
|
||||||
|
* the other HCWs as NOOPS
|
||||||
|
*/
|
||||||
|
|
||||||
|
memset(hcw, 0, (DLB2_HCW_MEM_SIZE - DLB2_HCW_64B_OFF) * sizeof(*hcw));
|
||||||
|
hcw->qe_comp = 1;
|
||||||
|
hcw->cq_token = 1;
|
||||||
|
hcw->lock_id = 1;
|
||||||
|
|
||||||
|
cycle_start = rte_get_tsc_cycles();
|
||||||
|
for (i = 0; i < DLB2_NUM_PROBE_ENQS; i++)
|
||||||
|
dlb2_movdir64b(pp_addr, hcw);
|
||||||
|
|
||||||
|
cycle_end = rte_get_tsc_cycles();
|
||||||
|
|
||||||
|
os_unmap_producer_port(hw, pp_addr);
|
||||||
|
return (int)(cycle_end - cycle_start);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void *
|
||||||
|
dlb2_pp_profile_func(void *data)
|
||||||
|
{
|
||||||
|
struct dlb2_pp_thread_data *thread_data = data;
|
||||||
|
int cycles;
|
||||||
|
|
||||||
|
cycles = dlb2_pp_profile(thread_data->hw, thread_data->pp,
|
||||||
|
thread_data->cpu, thread_data->is_ldb);
|
||||||
|
|
||||||
|
thread_data->cycles = cycles;
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int dlb2_pp_cycle_comp(const void *a, const void *b)
|
||||||
|
{
|
||||||
|
const struct dlb2_pp_thread_data *x = a;
|
||||||
|
const struct dlb2_pp_thread_data *y = b;
|
||||||
|
|
||||||
|
return x->cycles - y->cycles;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Probe producer ports from different CPU cores */
|
||||||
|
static void
|
||||||
|
dlb2_get_pp_allocation(struct dlb2_hw *hw, int cpu, int port_type, int cos_id)
|
||||||
|
{
|
||||||
|
struct dlb2_dev *dlb2_dev = container_of(hw, struct dlb2_dev, hw);
|
||||||
|
int i, err, ver = DLB2_HW_DEVICE_FROM_PCI_ID(dlb2_dev->pdev);
|
||||||
|
bool is_ldb = (port_type == DLB2_LDB_PORT);
|
||||||
|
int num_ports = is_ldb ? DLB2_MAX_NUM_LDB_PORTS :
|
||||||
|
DLB2_MAX_NUM_DIR_PORTS(ver);
|
||||||
|
struct dlb2_pp_thread_data dlb2_thread_data[num_ports];
|
||||||
|
int *port_allocations = is_ldb ? hw->ldb_pp_allocations :
|
||||||
|
hw->dir_pp_allocations;
|
||||||
|
int num_sort = is_ldb ? DLB2_NUM_COS_DOMAINS : 1;
|
||||||
|
struct dlb2_pp_thread_data cos_cycles[num_sort];
|
||||||
|
int num_ports_per_sort = num_ports / num_sort;
|
||||||
|
pthread_t pthread;
|
||||||
|
|
||||||
|
dlb2_dev->enqueue_four = dlb2_movdir64b;
|
||||||
|
|
||||||
|
DLB2_LOG_INFO(" for %s: cpu core used in pp profiling: %d\n",
|
||||||
|
is_ldb ? "LDB" : "DIR", cpu);
|
||||||
|
|
||||||
|
memset(cos_cycles, 0, num_sort * sizeof(struct dlb2_pp_thread_data));
|
||||||
|
for (i = 0; i < num_ports; i++) {
|
||||||
|
int cos = is_ldb ? (i >> DLB2_NUM_COS_DOMAINS) : 0;
|
||||||
|
|
||||||
|
dlb2_thread_data[i].is_ldb = is_ldb;
|
||||||
|
dlb2_thread_data[i].pp = i;
|
||||||
|
dlb2_thread_data[i].cycles = 0;
|
||||||
|
dlb2_thread_data[i].hw = hw;
|
||||||
|
dlb2_thread_data[i].cpu = cpu;
|
||||||
|
|
||||||
|
err = pthread_create(&pthread, NULL, &dlb2_pp_profile_func,
|
||||||
|
&dlb2_thread_data[i]);
|
||||||
|
if (err) {
|
||||||
|
DLB2_LOG_ERR(": thread creation failed! err=%d", err);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = pthread_join(pthread, NULL);
|
||||||
|
if (err) {
|
||||||
|
DLB2_LOG_ERR(": thread join failed! err=%d", err);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
cos_cycles[cos].cycles += dlb2_thread_data[i].cycles;
|
||||||
|
|
||||||
|
if ((i + 1) % num_ports_per_sort == 0) {
|
||||||
|
int index = cos * num_ports_per_sort;
|
||||||
|
|
||||||
|
cos_cycles[cos].pp = index;
|
||||||
|
/*
|
||||||
|
* For LDB ports first sort with in a cos. Later sort
|
||||||
|
* the best cos based on total cycles for the cos.
|
||||||
|
* For DIR ports, there is a single sort across all
|
||||||
|
* ports.
|
||||||
|
*/
|
||||||
|
qsort(&dlb2_thread_data[index], num_ports_per_sort,
|
||||||
|
sizeof(struct dlb2_pp_thread_data),
|
||||||
|
dlb2_pp_cycle_comp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Re-arrange best ports by cos if default cos is used.
|
||||||
|
*/
|
||||||
|
if (is_ldb && cos_id == DLB2_COS_DEFAULT)
|
||||||
|
qsort(cos_cycles, num_sort,
|
||||||
|
sizeof(struct dlb2_pp_thread_data),
|
||||||
|
dlb2_pp_cycle_comp);
|
||||||
|
|
||||||
|
for (i = 0; i < num_ports; i++) {
|
||||||
|
int start = is_ldb ? cos_cycles[i / num_ports_per_sort].pp : 0;
|
||||||
|
int index = i % num_ports_per_sort;
|
||||||
|
|
||||||
|
port_allocations[i] = dlb2_thread_data[start + index].pp;
|
||||||
|
DLB2_LOG_INFO(": pp %d cycles %d", port_allocations[i],
|
||||||
|
dlb2_thread_data[start + index].cycles);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
dlb2_resource_probe(struct dlb2_hw *hw, const void *probe_args)
|
||||||
|
{
|
||||||
|
const struct dlb2_devargs *args = (const struct dlb2_devargs *)probe_args;
|
||||||
|
const char *mask = NULL;
|
||||||
|
int cpu = 0, cnt = 0, cores[RTE_MAX_LCORE];
|
||||||
|
int i, cos_id = DLB2_COS_DEFAULT;
|
||||||
|
|
||||||
|
if (args) {
|
||||||
|
mask = (const char *)args->producer_coremask;
|
||||||
|
cos_id = args->cos_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mask && rte_eal_parse_coremask(mask, cores)) {
|
||||||
|
DLB2_LOG_ERR(": Invalid producer coremask=%s", mask);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
hw->num_prod_cores = 0;
|
||||||
|
for (i = 0; i < RTE_MAX_LCORE; i++) {
|
||||||
|
if (rte_lcore_is_enabled(i)) {
|
||||||
|
if (mask) {
|
||||||
|
/*
|
||||||
|
* Populate the producer cores from parsed
|
||||||
|
* coremask
|
||||||
|
*/
|
||||||
|
if (cores[i] != -1) {
|
||||||
|
hw->prod_core_list[cores[i]] = i;
|
||||||
|
hw->num_prod_cores++;
|
||||||
|
}
|
||||||
|
} else if ((++cnt == DLB2_EAL_PROBE_CORE ||
|
||||||
|
rte_lcore_count() < DLB2_EAL_PROBE_CORE)) {
|
||||||
|
/*
|
||||||
|
* If no producer coremask is provided, use the
|
||||||
|
* second EAL core to probe
|
||||||
|
*/
|
||||||
|
cpu = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* Use the first core in producer coremask to probe */
|
||||||
|
if (hw->num_prod_cores)
|
||||||
|
cpu = hw->prod_core_list[0];
|
||||||
|
|
||||||
|
dlb2_get_pp_allocation(hw, cpu, DLB2_LDB_PORT, cos_id);
|
||||||
|
dlb2_get_pp_allocation(hw, cpu, DLB2_DIR_PORT, DLB2_COS_DEFAULT);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
dlb2_domain_attach_resources(struct dlb2_hw *hw,
|
dlb2_domain_attach_resources(struct dlb2_hw *hw,
|
||||||
struct dlb2_function_resources *rsrcs,
|
struct dlb2_function_resources *rsrcs,
|
||||||
@ -4359,6 +4568,8 @@ dlb2_verify_create_ldb_port_args(struct dlb2_hw *hw,
|
|||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
DLB2_LOG_INFO(": LDB: cos=%d port:%d\n", id, port->id.phys_id);
|
||||||
|
|
||||||
/* Check cache-line alignment */
|
/* Check cache-line alignment */
|
||||||
if ((cq_dma_base & 0x3F) != 0) {
|
if ((cq_dma_base & 0x3F) != 0) {
|
||||||
resp->status = DLB2_ST_INVALID_CQ_VIRT_ADDR;
|
resp->status = DLB2_ST_INVALID_CQ_VIRT_ADDR;
|
||||||
@ -4568,13 +4779,25 @@ dlb2_verify_create_dir_port_args(struct dlb2_hw *hw,
|
|||||||
/*
|
/*
|
||||||
* If the port's queue is not configured, validate that a free
|
* If the port's queue is not configured, validate that a free
|
||||||
* port-queue pair is available.
|
* port-queue pair is available.
|
||||||
|
* First try the 'res' list if the port is producer OR if
|
||||||
|
* 'avail' list is empty else fall back to 'avail' list
|
||||||
*/
|
*/
|
||||||
pq = DLB2_DOM_LIST_HEAD(domain->avail_dir_pq_pairs,
|
if (!dlb2_list_empty(&domain->rsvd_dir_pq_pairs) &&
|
||||||
typeof(*pq));
|
(args->is_producer ||
|
||||||
|
dlb2_list_empty(&domain->avail_dir_pq_pairs)))
|
||||||
|
pq = DLB2_DOM_LIST_HEAD(domain->rsvd_dir_pq_pairs,
|
||||||
|
typeof(*pq));
|
||||||
|
else
|
||||||
|
pq = DLB2_DOM_LIST_HEAD(domain->avail_dir_pq_pairs,
|
||||||
|
typeof(*pq));
|
||||||
|
|
||||||
if (!pq) {
|
if (!pq) {
|
||||||
resp->status = DLB2_ST_DIR_PORTS_UNAVAILABLE;
|
resp->status = DLB2_ST_DIR_PORTS_UNAVAILABLE;
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
DLB2_LOG_INFO(": DIR: port:%d is_producer=%d\n",
|
||||||
|
pq->id.phys_id, args->is_producer);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Check cache-line alignment */
|
/* Check cache-line alignment */
|
||||||
@ -4875,11 +5098,18 @@ int dlb2_hw_create_dir_port(struct dlb2_hw *hw,
|
|||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Configuration succeeded, so move the resource from the 'avail' to
|
* Configuration succeeded, so move the resource from the 'avail' or
|
||||||
* the 'used' list (if it's not already there).
|
* 'res' to the 'used' list (if it's not already there).
|
||||||
*/
|
*/
|
||||||
if (args->queue_id == -1) {
|
if (args->queue_id == -1) {
|
||||||
dlb2_list_del(&domain->avail_dir_pq_pairs, &port->domain_list);
|
struct dlb2_list_head *res = &domain->rsvd_dir_pq_pairs;
|
||||||
|
struct dlb2_list_head *avail = &domain->avail_dir_pq_pairs;
|
||||||
|
|
||||||
|
if ((args->is_producer && !dlb2_list_empty(res)) ||
|
||||||
|
dlb2_list_empty(avail))
|
||||||
|
dlb2_list_del(res, &port->domain_list);
|
||||||
|
else
|
||||||
|
dlb2_list_del(avail, &port->domain_list);
|
||||||
|
|
||||||
dlb2_list_add(&domain->used_dir_pq_pairs, &port->domain_list);
|
dlb2_list_add(&domain->used_dir_pq_pairs, &port->domain_list);
|
||||||
}
|
}
|
||||||
|
@ -23,7 +23,20 @@
|
|||||||
* Return:
|
* Return:
|
||||||
* Returns 0 upon success, <0 otherwise.
|
* Returns 0 upon success, <0 otherwise.
|
||||||
*/
|
*/
|
||||||
int dlb2_resource_init(struct dlb2_hw *hw, enum dlb2_hw_ver ver);
|
int dlb2_resource_init(struct dlb2_hw *hw, enum dlb2_hw_ver ver, const void *probe_args);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* dlb2_resource_probe() - probe hw resources
|
||||||
|
* @hw: pointer to struct dlb2_hw.
|
||||||
|
*
|
||||||
|
* This function probes hw resources for best port allocation to producer
|
||||||
|
* cores.
|
||||||
|
*
|
||||||
|
* Return:
|
||||||
|
* Returns 0 upon success, <0 otherwise.
|
||||||
|
*/
|
||||||
|
int dlb2_resource_probe(struct dlb2_hw *hw, const void *probe_args);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* dlb2_clr_pmcsr_disable() - power on bulk of DLB 2.0 logic
|
* dlb2_clr_pmcsr_disable() - power on bulk of DLB 2.0 logic
|
||||||
|
@ -147,7 +147,7 @@ static int dlb2_pf_wait_for_device_ready(struct dlb2_dev *dlb2_dev,
|
|||||||
}
|
}
|
||||||
|
|
||||||
struct dlb2_dev *
|
struct dlb2_dev *
|
||||||
dlb2_probe(struct rte_pci_device *pdev)
|
dlb2_probe(struct rte_pci_device *pdev, const void *probe_args)
|
||||||
{
|
{
|
||||||
struct dlb2_dev *dlb2_dev;
|
struct dlb2_dev *dlb2_dev;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
@ -208,6 +208,10 @@ dlb2_probe(struct rte_pci_device *pdev)
|
|||||||
if (ret)
|
if (ret)
|
||||||
goto wait_for_device_ready_fail;
|
goto wait_for_device_ready_fail;
|
||||||
|
|
||||||
|
ret = dlb2_resource_probe(&dlb2_dev->hw, probe_args);
|
||||||
|
if (ret)
|
||||||
|
goto resource_probe_fail;
|
||||||
|
|
||||||
ret = dlb2_pf_reset(dlb2_dev);
|
ret = dlb2_pf_reset(dlb2_dev);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto dlb2_reset_fail;
|
goto dlb2_reset_fail;
|
||||||
@ -216,7 +220,7 @@ dlb2_probe(struct rte_pci_device *pdev)
|
|||||||
if (ret)
|
if (ret)
|
||||||
goto init_driver_state_fail;
|
goto init_driver_state_fail;
|
||||||
|
|
||||||
ret = dlb2_resource_init(&dlb2_dev->hw, dlb_version);
|
ret = dlb2_resource_init(&dlb2_dev->hw, dlb_version, probe_args);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto resource_init_fail;
|
goto resource_init_fail;
|
||||||
|
|
||||||
@ -227,6 +231,7 @@ dlb2_probe(struct rte_pci_device *pdev)
|
|||||||
init_driver_state_fail:
|
init_driver_state_fail:
|
||||||
dlb2_reset_fail:
|
dlb2_reset_fail:
|
||||||
pci_mmap_bad_addr:
|
pci_mmap_bad_addr:
|
||||||
|
resource_probe_fail:
|
||||||
wait_for_device_ready_fail:
|
wait_for_device_ready_fail:
|
||||||
rte_free(dlb2_dev);
|
rte_free(dlb2_dev);
|
||||||
dlb2_dev_malloc_fail:
|
dlb2_dev_malloc_fail:
|
||||||
|
@ -15,7 +15,11 @@
|
|||||||
#include "base/dlb2_hw_types.h"
|
#include "base/dlb2_hw_types.h"
|
||||||
#include "../dlb2_user.h"
|
#include "../dlb2_user.h"
|
||||||
|
|
||||||
#define DLB2_DEFAULT_UNREGISTER_TIMEOUT_S 5
|
#define DLB2_EAL_PROBE_CORE 2
|
||||||
|
#define DLB2_NUM_PROBE_ENQS 1000
|
||||||
|
#define DLB2_HCW_MEM_SIZE 8
|
||||||
|
#define DLB2_HCW_64B_OFF 4
|
||||||
|
#define DLB2_HCW_ALIGN_MASK 0x3F
|
||||||
|
|
||||||
struct dlb2_dev;
|
struct dlb2_dev;
|
||||||
|
|
||||||
@ -31,15 +35,30 @@ struct dlb2_dev {
|
|||||||
/* struct list_head list; */
|
/* struct list_head list; */
|
||||||
struct device *dlb2_device;
|
struct device *dlb2_device;
|
||||||
bool domain_reset_failed;
|
bool domain_reset_failed;
|
||||||
|
/* The enqueue_four function enqueues four HCWs (one cache-line worth)
|
||||||
|
* to the HQM, using whichever mechanism is supported by the platform
|
||||||
|
* on which this driver is running.
|
||||||
|
*/
|
||||||
|
void (*enqueue_four)(void *qe4, void *pp_addr);
|
||||||
/* The resource mutex serializes access to driver data structures and
|
/* The resource mutex serializes access to driver data structures and
|
||||||
* hardware registers.
|
* hardware registers.
|
||||||
*/
|
*/
|
||||||
rte_spinlock_t resource_mutex;
|
rte_spinlock_t resource_mutex;
|
||||||
bool worker_launched;
|
bool worker_launched;
|
||||||
u8 revision;
|
u8 revision;
|
||||||
|
u8 version;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct dlb2_dev *dlb2_probe(struct rte_pci_device *pdev);
|
struct dlb2_pp_thread_data {
|
||||||
|
struct dlb2_hw *hw;
|
||||||
|
int pp;
|
||||||
|
int cpu;
|
||||||
|
bool is_ldb;
|
||||||
|
int cycles;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct dlb2_dev *dlb2_probe(struct rte_pci_device *pdev, const void *probe_args);
|
||||||
|
|
||||||
|
|
||||||
int dlb2_pf_reset(struct dlb2_dev *dlb2_dev);
|
int dlb2_pf_reset(struct dlb2_dev *dlb2_dev);
|
||||||
int dlb2_pf_create_sched_domain(struct dlb2_hw *hw,
|
int dlb2_pf_create_sched_domain(struct dlb2_hw *hw,
|
||||||
|
@ -702,6 +702,7 @@ dlb2_eventdev_pci_init(struct rte_eventdev *eventdev)
|
|||||||
struct dlb2_devargs dlb2_args = {
|
struct dlb2_devargs dlb2_args = {
|
||||||
.socket_id = rte_socket_id(),
|
.socket_id = rte_socket_id(),
|
||||||
.max_num_events = DLB2_MAX_NUM_LDB_CREDITS,
|
.max_num_events = DLB2_MAX_NUM_LDB_CREDITS,
|
||||||
|
.producer_coremask = NULL,
|
||||||
.num_dir_credits_override = -1,
|
.num_dir_credits_override = -1,
|
||||||
.qid_depth_thresholds = { {0} },
|
.qid_depth_thresholds = { {0} },
|
||||||
.poll_interval = DLB2_POLL_INTERVAL_DEFAULT,
|
.poll_interval = DLB2_POLL_INTERVAL_DEFAULT,
|
||||||
@ -713,6 +714,7 @@ dlb2_eventdev_pci_init(struct rte_eventdev *eventdev)
|
|||||||
};
|
};
|
||||||
struct dlb2_eventdev *dlb2;
|
struct dlb2_eventdev *dlb2;
|
||||||
int q;
|
int q;
|
||||||
|
const void *probe_args = NULL;
|
||||||
|
|
||||||
DLB2_LOG_DBG("Enter with dev_id=%d socket_id=%d",
|
DLB2_LOG_DBG("Enter with dev_id=%d socket_id=%d",
|
||||||
eventdev->data->dev_id, eventdev->data->socket_id);
|
eventdev->data->dev_id, eventdev->data->socket_id);
|
||||||
@ -728,16 +730,6 @@ dlb2_eventdev_pci_init(struct rte_eventdev *eventdev)
|
|||||||
dlb2 = dlb2_pmd_priv(eventdev); /* rte_zmalloc_socket mem */
|
dlb2 = dlb2_pmd_priv(eventdev); /* rte_zmalloc_socket mem */
|
||||||
dlb2->version = DLB2_HW_DEVICE_FROM_PCI_ID(pci_dev);
|
dlb2->version = DLB2_HW_DEVICE_FROM_PCI_ID(pci_dev);
|
||||||
|
|
||||||
/* Probe the DLB2 PF layer */
|
|
||||||
dlb2->qm_instance.pf_dev = dlb2_probe(pci_dev);
|
|
||||||
|
|
||||||
if (dlb2->qm_instance.pf_dev == NULL) {
|
|
||||||
DLB2_LOG_ERR("DLB2 PF Probe failed with error %d\n",
|
|
||||||
rte_errno);
|
|
||||||
ret = -rte_errno;
|
|
||||||
goto dlb2_probe_failed;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Were we invoked with runtime parameters? */
|
/* Were we invoked with runtime parameters? */
|
||||||
if (pci_dev->device.devargs) {
|
if (pci_dev->device.devargs) {
|
||||||
ret = dlb2_parse_params(pci_dev->device.devargs->args,
|
ret = dlb2_parse_params(pci_dev->device.devargs->args,
|
||||||
@ -749,6 +741,17 @@ dlb2_eventdev_pci_init(struct rte_eventdev *eventdev)
|
|||||||
ret, rte_errno);
|
ret, rte_errno);
|
||||||
goto dlb2_probe_failed;
|
goto dlb2_probe_failed;
|
||||||
}
|
}
|
||||||
|
probe_args = &dlb2_args;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Probe the DLB2 PF layer */
|
||||||
|
dlb2->qm_instance.pf_dev = dlb2_probe(pci_dev, probe_args);
|
||||||
|
|
||||||
|
if (dlb2->qm_instance.pf_dev == NULL) {
|
||||||
|
DLB2_LOG_ERR("DLB2 PF Probe failed with error %d\n",
|
||||||
|
rte_errno);
|
||||||
|
ret = -rte_errno;
|
||||||
|
goto dlb2_probe_failed;
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = dlb2_primary_eventdev_probe(eventdev,
|
ret = dlb2_primary_eventdev_probe(eventdev,
|
||||||
|
Loading…
Reference in New Issue
Block a user