diff --git a/doc/guides/eventdevs/dlb2.rst b/doc/guides/eventdevs/dlb2.rst index 5b21f13b68..f5bf5757c6 100644 --- a/doc/guides/eventdevs/dlb2.rst +++ b/doc/guides/eventdevs/dlb2.rst @@ -414,3 +414,39 @@ Note that the weight may not exceed the maximum CQ depth. --allow ea:00.0,cq_weight=all: --allow ea:00.0,cq_weight=qidA-qidB: --allow ea:00.0,cq_weight=qid: + +Producer Coremask +~~~~~~~~~~~~~~~~~ + +For best performance, applications running on certain cores should use +the DLB device locally available on the same tile along with other +resources. To allocate optimal resources, probing is done for each +producer port (PP) for a given CPU and the best performing ports are +allocated to producers. The cpu used for probing is either the first +core of producer coremask (if present) or the second core of EAL +coremask. This will be extended later to probe for all CPUs in the +producer coremask or EAL coremask. Producer coremask can be passed +along with the BDF of the DLB devices. + + .. code-block:: console + + -a xx:y.z,producer_coremask= + +Default LDB Port Allocation +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For optimal load balancing ports that map to one or more QIDs in common +should not be in numerical sequence. The port->QID mapping is application +dependent, but the driver interleaves port IDs as much as possible to +reduce the likelihood of sequential ports mapping to the same QID(s). + +Hence, DLB uses an initial allocation of Port IDs to maximize the +average distance between an ID and its immediate neighbors. (i.e.the +distance from 1 to 0 and to 2, the distance from 2 to 1 and to 3, etc.). +Initial port allocation option can be passed through devarg. If y (or Y) +inial port allocation will be used, otherwise initial port allocation +won't be used. + + .. code-block:: console + + --allow ea:00.0,default_port_allocation= diff --git a/drivers/event/dlb2/dlb2.c b/drivers/event/dlb2/dlb2.c index 759578378f..6a9db4b642 100644 --- a/drivers/event/dlb2/dlb2.c +++ b/drivers/event/dlb2/dlb2.c @@ -293,6 +293,23 @@ dlb2_string_to_int(int *result, const char *str) return 0; } +static int +set_producer_coremask(const char *key __rte_unused, + const char *value, + void *opaque) +{ + const char **mask_str = opaque; + + if (value == NULL || opaque == NULL) { + DLB2_LOG_ERR("NULL pointer\n"); + return -EINVAL; + } + + *mask_str = value; + + return 0; +} + static int set_numa_node(const char *key __rte_unused, const char *value, void *opaque) { @@ -617,6 +634,26 @@ set_vector_opts_enab(const char *key __rte_unused, return 0; } +static int +set_default_ldb_port_allocation(const char *key __rte_unused, + const char *value, + void *opaque) +{ + bool *default_ldb_port_allocation = opaque; + + if (value == NULL || opaque == NULL) { + DLB2_LOG_ERR("NULL pointer\n"); + return -EINVAL; + } + + if ((*value == 'y') || (*value == 'Y')) + *default_ldb_port_allocation = true; + else + *default_ldb_port_allocation = false; + + return 0; +} + static int set_qid_depth_thresh(const char *key __rte_unused, const char *value, @@ -1785,6 +1822,9 @@ dlb2_hw_create_dir_port(struct dlb2_eventdev *dlb2, } else credit_high_watermark = enqueue_depth; + if (ev_port->conf.event_port_cfg & RTE_EVENT_PORT_CFG_HINT_PRODUCER) + cfg.is_producer = 1; + /* Per QM values */ ret = dlb2_iface_dir_port_create(handle, &cfg, dlb2->poll_mode); @@ -1979,6 +2019,10 @@ dlb2_eventdev_port_setup(struct rte_eventdev *dev, } ev_port->enq_retries = port_conf->enqueue_depth / sw_credit_quanta; + /* Save off port config for reconfig */ + ev_port->conf = *port_conf; + + /* * Create port */ @@ -2005,9 +2049,6 @@ dlb2_eventdev_port_setup(struct rte_eventdev *dev, } } - /* Save off port config for reconfig */ - ev_port->conf = *port_conf; - ev_port->id = ev_port_id; ev_port->enq_configured = true; ev_port->setup_done = true; @@ -4700,6 +4741,8 @@ dlb2_parse_params(const char *params, DLB2_CQ_WEIGHT, DLB2_PORT_COS, DLB2_COS_BW, + DLB2_PRODUCER_COREMASK, + DLB2_DEFAULT_LDB_PORT_ALLOCATION_ARG, NULL }; if (params != NULL && params[0] != '\0') { @@ -4881,6 +4924,29 @@ dlb2_parse_params(const char *params, } + ret = rte_kvargs_process(kvlist, + DLB2_PRODUCER_COREMASK, + set_producer_coremask, + &dlb2_args->producer_coremask); + if (ret != 0) { + DLB2_LOG_ERR( + "%s: Error parsing producer coremask", + name); + rte_kvargs_free(kvlist); + return ret; + } + + ret = rte_kvargs_process(kvlist, + DLB2_DEFAULT_LDB_PORT_ALLOCATION_ARG, + set_default_ldb_port_allocation, + &dlb2_args->default_ldb_port_allocation); + if (ret != 0) { + DLB2_LOG_ERR("%s: Error parsing ldb default port allocation arg", + name); + rte_kvargs_free(kvlist); + return ret; + } + rte_kvargs_free(kvlist); } } diff --git a/drivers/event/dlb2/dlb2_priv.h b/drivers/event/dlb2/dlb2_priv.h index db431f7d8b..9ef5bcb901 100644 --- a/drivers/event/dlb2/dlb2_priv.h +++ b/drivers/event/dlb2/dlb2_priv.h @@ -51,6 +51,8 @@ #define DLB2_CQ_WEIGHT "cq_weight" #define DLB2_PORT_COS "port_cos" #define DLB2_COS_BW "cos_bw" +#define DLB2_PRODUCER_COREMASK "producer_coremask" +#define DLB2_DEFAULT_LDB_PORT_ALLOCATION_ARG "default_port_allocation" /* Begin HW related defines and structs */ @@ -386,6 +388,7 @@ struct dlb2_port { uint16_t hw_credit_quanta; bool use_avx512; uint32_t cq_weight; + bool is_producer; /* True if port is of type producer */ }; /* Per-process per-port mmio and memory pointers */ @@ -669,6 +672,8 @@ struct dlb2_devargs { struct dlb2_cq_weight cq_weight; struct dlb2_port_cos port_cos; struct dlb2_cos_bw cos_bw; + const char *producer_coremask; + bool default_ldb_port_allocation; }; /* End Eventdev related defines and structs */ @@ -722,6 +727,8 @@ void dlb2_event_build_hcws(struct dlb2_port *qm_port, uint8_t *sched_type, uint8_t *queue_id); +/* Extern functions */ +extern int rte_eal_parse_coremask(const char *coremask, int *cores); /* Extern globals */ extern struct process_local_port_data dlb2_port[][DLB2_NUM_PORT_TYPES]; diff --git a/drivers/event/dlb2/dlb2_user.h b/drivers/event/dlb2/dlb2_user.h index 901e2e0c66..28c6aaaf43 100644 --- a/drivers/event/dlb2/dlb2_user.h +++ b/drivers/event/dlb2/dlb2_user.h @@ -498,6 +498,7 @@ struct dlb2_create_dir_port_args { __u16 cq_depth; __u16 cq_depth_threshold; __s32 queue_id; + __u8 is_producer; }; /* diff --git a/drivers/event/dlb2/pf/base/dlb2_hw_types.h b/drivers/event/dlb2/pf/base/dlb2_hw_types.h index 9511521e67..87996ef621 100644 --- a/drivers/event/dlb2/pf/base/dlb2_hw_types.h +++ b/drivers/event/dlb2/pf/base/dlb2_hw_types.h @@ -249,6 +249,7 @@ struct dlb2_hw_domain { struct dlb2_list_head avail_ldb_queues; struct dlb2_list_head avail_ldb_ports[DLB2_NUM_COS_DOMAINS]; struct dlb2_list_head avail_dir_pq_pairs; + struct dlb2_list_head rsvd_dir_pq_pairs; u32 total_hist_list_entries; u32 avail_hist_list_entries; u32 hist_list_entry_base; @@ -347,6 +348,10 @@ struct dlb2_hw { struct dlb2_function_resources vdev[DLB2_MAX_NUM_VDEVS]; struct dlb2_hw_domain domains[DLB2_MAX_NUM_DOMAINS]; u8 cos_reservation[DLB2_NUM_COS_DOMAINS]; + int prod_core_list[RTE_MAX_LCORE]; + u8 num_prod_cores; + int dir_pp_allocations[DLB2_MAX_NUM_DIR_PORTS_V2_5]; + int ldb_pp_allocations[DLB2_MAX_NUM_LDB_PORTS]; /* Virtualization */ int virt_mode; diff --git a/drivers/event/dlb2/pf/base/dlb2_resource.c b/drivers/event/dlb2/pf/base/dlb2_resource.c index 0731416a43..280a8e51b1 100644 --- a/drivers/event/dlb2/pf/base/dlb2_resource.c +++ b/drivers/event/dlb2/pf/base/dlb2_resource.c @@ -51,6 +51,7 @@ static void dlb2_init_domain_rsrc_lists(struct dlb2_hw_domain *domain) dlb2_list_init_head(&domain->used_dir_pq_pairs); dlb2_list_init_head(&domain->avail_ldb_queues); dlb2_list_init_head(&domain->avail_dir_pq_pairs); + dlb2_list_init_head(&domain->rsvd_dir_pq_pairs); for (i = 0; i < DLB2_NUM_COS_DOMAINS; i++) dlb2_list_init_head(&domain->used_ldb_ports[i]); @@ -106,8 +107,10 @@ void dlb2_resource_free(struct dlb2_hw *hw) * Return: * Returns 0 upon success, <0 otherwise. */ -int dlb2_resource_init(struct dlb2_hw *hw, enum dlb2_hw_ver ver) +int dlb2_resource_init(struct dlb2_hw *hw, enum dlb2_hw_ver ver, const void *probe_args) { + const struct dlb2_devargs *args = (const struct dlb2_devargs *)probe_args; + bool ldb_port_default = args ? args->default_ldb_port_allocation : false; struct dlb2_list_entry *list; unsigned int i; int ret; @@ -122,6 +125,7 @@ int dlb2_resource_init(struct dlb2_hw *hw, enum dlb2_hw_ver ver) * the distance from 1 to 0 and to 2, the distance from 2 to 1 and to * 3, etc.). */ + const u8 init_ldb_port_allocation[DLB2_MAX_NUM_LDB_PORTS] = { 0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9, 16, 23, 30, 21, 28, 19, 26, 17, 24, 31, 22, 29, 20, 27, 18, 25, @@ -164,7 +168,10 @@ int dlb2_resource_init(struct dlb2_hw *hw, enum dlb2_hw_ver ver) int cos_id = i >> DLB2_NUM_COS_DOMAINS; struct dlb2_ldb_port *port; - port = &hw->rsrcs.ldb_ports[init_ldb_port_allocation[i]]; + if (ldb_port_default == true) + port = &hw->rsrcs.ldb_ports[init_ldb_port_allocation[i]]; + else + port = &hw->rsrcs.ldb_ports[hw->ldb_pp_allocations[i]]; dlb2_list_add(&hw->pf.avail_ldb_ports[cos_id], &port->func_list); @@ -172,7 +179,8 @@ int dlb2_resource_init(struct dlb2_hw *hw, enum dlb2_hw_ver ver) hw->pf.num_avail_dir_pq_pairs = DLB2_MAX_NUM_DIR_PORTS(hw->ver); for (i = 0; i < hw->pf.num_avail_dir_pq_pairs; i++) { - list = &hw->rsrcs.dir_pq_pairs[i].func_list; + int index = hw->dir_pp_allocations[i]; + list = &hw->rsrcs.dir_pq_pairs[index].func_list; dlb2_list_add(&hw->pf.avail_dir_pq_pairs, list); } @@ -592,6 +600,7 @@ static int dlb2_attach_dir_ports(struct dlb2_hw *hw, u32 num_ports, struct dlb2_cmd_response *resp) { + int num_res = hw->num_prod_cores; unsigned int i; if (rsrcs->num_avail_dir_pq_pairs < num_ports) { @@ -611,12 +620,19 @@ static int dlb2_attach_dir_ports(struct dlb2_hw *hw, return -EFAULT; } + if (num_res) { + dlb2_list_add(&domain->rsvd_dir_pq_pairs, + &port->domain_list); + num_res--; + } else { + dlb2_list_add(&domain->avail_dir_pq_pairs, + &port->domain_list); + } + dlb2_list_del(&rsrcs->avail_dir_pq_pairs, &port->func_list); port->domain_id = domain->id; port->owned = true; - - dlb2_list_add(&domain->avail_dir_pq_pairs, &port->domain_list); } rsrcs->num_avail_dir_pq_pairs -= num_ports; @@ -739,6 +755,199 @@ static int dlb2_attach_ldb_queues(struct dlb2_hw *hw, return 0; } +static int +dlb2_pp_profile(struct dlb2_hw *hw, int port, int cpu, bool is_ldb) +{ + u64 cycle_start = 0ULL, cycle_end = 0ULL; + struct dlb2_hcw hcw_mem[DLB2_HCW_MEM_SIZE], *hcw; + void __iomem *pp_addr; + cpu_set_t cpuset; + int i; + + CPU_ZERO(&cpuset); + CPU_SET(cpu, &cpuset); + sched_setaffinity(0, sizeof(cpuset), &cpuset); + + pp_addr = os_map_producer_port(hw, port, is_ldb); + + /* Point hcw to a 64B-aligned location */ + hcw = (struct dlb2_hcw *)((uintptr_t)&hcw_mem[DLB2_HCW_64B_OFF] & + ~DLB2_HCW_ALIGN_MASK); + + /* + * Program the first HCW for a completion and token return and + * the other HCWs as NOOPS + */ + + memset(hcw, 0, (DLB2_HCW_MEM_SIZE - DLB2_HCW_64B_OFF) * sizeof(*hcw)); + hcw->qe_comp = 1; + hcw->cq_token = 1; + hcw->lock_id = 1; + + cycle_start = rte_get_tsc_cycles(); + for (i = 0; i < DLB2_NUM_PROBE_ENQS; i++) + dlb2_movdir64b(pp_addr, hcw); + + cycle_end = rte_get_tsc_cycles(); + + os_unmap_producer_port(hw, pp_addr); + return (int)(cycle_end - cycle_start); +} + +static void * +dlb2_pp_profile_func(void *data) +{ + struct dlb2_pp_thread_data *thread_data = data; + int cycles; + + cycles = dlb2_pp_profile(thread_data->hw, thread_data->pp, + thread_data->cpu, thread_data->is_ldb); + + thread_data->cycles = cycles; + + return NULL; +} + +static int dlb2_pp_cycle_comp(const void *a, const void *b) +{ + const struct dlb2_pp_thread_data *x = a; + const struct dlb2_pp_thread_data *y = b; + + return x->cycles - y->cycles; +} + + +/* Probe producer ports from different CPU cores */ +static void +dlb2_get_pp_allocation(struct dlb2_hw *hw, int cpu, int port_type, int cos_id) +{ + struct dlb2_dev *dlb2_dev = container_of(hw, struct dlb2_dev, hw); + int i, err, ver = DLB2_HW_DEVICE_FROM_PCI_ID(dlb2_dev->pdev); + bool is_ldb = (port_type == DLB2_LDB_PORT); + int num_ports = is_ldb ? DLB2_MAX_NUM_LDB_PORTS : + DLB2_MAX_NUM_DIR_PORTS(ver); + struct dlb2_pp_thread_data dlb2_thread_data[num_ports]; + int *port_allocations = is_ldb ? hw->ldb_pp_allocations : + hw->dir_pp_allocations; + int num_sort = is_ldb ? DLB2_NUM_COS_DOMAINS : 1; + struct dlb2_pp_thread_data cos_cycles[num_sort]; + int num_ports_per_sort = num_ports / num_sort; + pthread_t pthread; + + dlb2_dev->enqueue_four = dlb2_movdir64b; + + DLB2_LOG_INFO(" for %s: cpu core used in pp profiling: %d\n", + is_ldb ? "LDB" : "DIR", cpu); + + memset(cos_cycles, 0, num_sort * sizeof(struct dlb2_pp_thread_data)); + for (i = 0; i < num_ports; i++) { + int cos = is_ldb ? (i >> DLB2_NUM_COS_DOMAINS) : 0; + + dlb2_thread_data[i].is_ldb = is_ldb; + dlb2_thread_data[i].pp = i; + dlb2_thread_data[i].cycles = 0; + dlb2_thread_data[i].hw = hw; + dlb2_thread_data[i].cpu = cpu; + + err = pthread_create(&pthread, NULL, &dlb2_pp_profile_func, + &dlb2_thread_data[i]); + if (err) { + DLB2_LOG_ERR(": thread creation failed! err=%d", err); + return; + } + + err = pthread_join(pthread, NULL); + if (err) { + DLB2_LOG_ERR(": thread join failed! err=%d", err); + return; + } + cos_cycles[cos].cycles += dlb2_thread_data[i].cycles; + + if ((i + 1) % num_ports_per_sort == 0) { + int index = cos * num_ports_per_sort; + + cos_cycles[cos].pp = index; + /* + * For LDB ports first sort with in a cos. Later sort + * the best cos based on total cycles for the cos. + * For DIR ports, there is a single sort across all + * ports. + */ + qsort(&dlb2_thread_data[index], num_ports_per_sort, + sizeof(struct dlb2_pp_thread_data), + dlb2_pp_cycle_comp); + } + } + + /* + * Re-arrange best ports by cos if default cos is used. + */ + if (is_ldb && cos_id == DLB2_COS_DEFAULT) + qsort(cos_cycles, num_sort, + sizeof(struct dlb2_pp_thread_data), + dlb2_pp_cycle_comp); + + for (i = 0; i < num_ports; i++) { + int start = is_ldb ? cos_cycles[i / num_ports_per_sort].pp : 0; + int index = i % num_ports_per_sort; + + port_allocations[i] = dlb2_thread_data[start + index].pp; + DLB2_LOG_INFO(": pp %d cycles %d", port_allocations[i], + dlb2_thread_data[start + index].cycles); + } +} + +int +dlb2_resource_probe(struct dlb2_hw *hw, const void *probe_args) +{ + const struct dlb2_devargs *args = (const struct dlb2_devargs *)probe_args; + const char *mask = NULL; + int cpu = 0, cnt = 0, cores[RTE_MAX_LCORE]; + int i, cos_id = DLB2_COS_DEFAULT; + + if (args) { + mask = (const char *)args->producer_coremask; + cos_id = args->cos_id; + } + + if (mask && rte_eal_parse_coremask(mask, cores)) { + DLB2_LOG_ERR(": Invalid producer coremask=%s", mask); + return -1; + } + + hw->num_prod_cores = 0; + for (i = 0; i < RTE_MAX_LCORE; i++) { + if (rte_lcore_is_enabled(i)) { + if (mask) { + /* + * Populate the producer cores from parsed + * coremask + */ + if (cores[i] != -1) { + hw->prod_core_list[cores[i]] = i; + hw->num_prod_cores++; + } + } else if ((++cnt == DLB2_EAL_PROBE_CORE || + rte_lcore_count() < DLB2_EAL_PROBE_CORE)) { + /* + * If no producer coremask is provided, use the + * second EAL core to probe + */ + cpu = i; + break; + } + } + } + /* Use the first core in producer coremask to probe */ + if (hw->num_prod_cores) + cpu = hw->prod_core_list[0]; + + dlb2_get_pp_allocation(hw, cpu, DLB2_LDB_PORT, cos_id); + dlb2_get_pp_allocation(hw, cpu, DLB2_DIR_PORT, DLB2_COS_DEFAULT); + + return 0; +} + static int dlb2_domain_attach_resources(struct dlb2_hw *hw, struct dlb2_function_resources *rsrcs, @@ -4359,6 +4568,8 @@ dlb2_verify_create_ldb_port_args(struct dlb2_hw *hw, return -EINVAL; } + DLB2_LOG_INFO(": LDB: cos=%d port:%d\n", id, port->id.phys_id); + /* Check cache-line alignment */ if ((cq_dma_base & 0x3F) != 0) { resp->status = DLB2_ST_INVALID_CQ_VIRT_ADDR; @@ -4568,13 +4779,25 @@ dlb2_verify_create_dir_port_args(struct dlb2_hw *hw, /* * If the port's queue is not configured, validate that a free * port-queue pair is available. + * First try the 'res' list if the port is producer OR if + * 'avail' list is empty else fall back to 'avail' list */ - pq = DLB2_DOM_LIST_HEAD(domain->avail_dir_pq_pairs, - typeof(*pq)); + if (!dlb2_list_empty(&domain->rsvd_dir_pq_pairs) && + (args->is_producer || + dlb2_list_empty(&domain->avail_dir_pq_pairs))) + pq = DLB2_DOM_LIST_HEAD(domain->rsvd_dir_pq_pairs, + typeof(*pq)); + else + pq = DLB2_DOM_LIST_HEAD(domain->avail_dir_pq_pairs, + typeof(*pq)); + if (!pq) { resp->status = DLB2_ST_DIR_PORTS_UNAVAILABLE; return -EINVAL; } + DLB2_LOG_INFO(": DIR: port:%d is_producer=%d\n", + pq->id.phys_id, args->is_producer); + } /* Check cache-line alignment */ @@ -4875,11 +5098,18 @@ int dlb2_hw_create_dir_port(struct dlb2_hw *hw, return ret; /* - * Configuration succeeded, so move the resource from the 'avail' to - * the 'used' list (if it's not already there). + * Configuration succeeded, so move the resource from the 'avail' or + * 'res' to the 'used' list (if it's not already there). */ if (args->queue_id == -1) { - dlb2_list_del(&domain->avail_dir_pq_pairs, &port->domain_list); + struct dlb2_list_head *res = &domain->rsvd_dir_pq_pairs; + struct dlb2_list_head *avail = &domain->avail_dir_pq_pairs; + + if ((args->is_producer && !dlb2_list_empty(res)) || + dlb2_list_empty(avail)) + dlb2_list_del(res, &port->domain_list); + else + dlb2_list_del(avail, &port->domain_list); dlb2_list_add(&domain->used_dir_pq_pairs, &port->domain_list); } diff --git a/drivers/event/dlb2/pf/base/dlb2_resource.h b/drivers/event/dlb2/pf/base/dlb2_resource.h index a7e6c90888..71bd6148f1 100644 --- a/drivers/event/dlb2/pf/base/dlb2_resource.h +++ b/drivers/event/dlb2/pf/base/dlb2_resource.h @@ -23,7 +23,20 @@ * Return: * Returns 0 upon success, <0 otherwise. */ -int dlb2_resource_init(struct dlb2_hw *hw, enum dlb2_hw_ver ver); +int dlb2_resource_init(struct dlb2_hw *hw, enum dlb2_hw_ver ver, const void *probe_args); + +/** + * dlb2_resource_probe() - probe hw resources + * @hw: pointer to struct dlb2_hw. + * + * This function probes hw resources for best port allocation to producer + * cores. + * + * Return: + * Returns 0 upon success, <0 otherwise. + */ +int dlb2_resource_probe(struct dlb2_hw *hw, const void *probe_args); + /** * dlb2_clr_pmcsr_disable() - power on bulk of DLB 2.0 logic diff --git a/drivers/event/dlb2/pf/dlb2_main.c b/drivers/event/dlb2/pf/dlb2_main.c index b6ec85b479..717aa4fc08 100644 --- a/drivers/event/dlb2/pf/dlb2_main.c +++ b/drivers/event/dlb2/pf/dlb2_main.c @@ -147,7 +147,7 @@ static int dlb2_pf_wait_for_device_ready(struct dlb2_dev *dlb2_dev, } struct dlb2_dev * -dlb2_probe(struct rte_pci_device *pdev) +dlb2_probe(struct rte_pci_device *pdev, const void *probe_args) { struct dlb2_dev *dlb2_dev; int ret = 0; @@ -208,6 +208,10 @@ dlb2_probe(struct rte_pci_device *pdev) if (ret) goto wait_for_device_ready_fail; + ret = dlb2_resource_probe(&dlb2_dev->hw, probe_args); + if (ret) + goto resource_probe_fail; + ret = dlb2_pf_reset(dlb2_dev); if (ret) goto dlb2_reset_fail; @@ -216,7 +220,7 @@ dlb2_probe(struct rte_pci_device *pdev) if (ret) goto init_driver_state_fail; - ret = dlb2_resource_init(&dlb2_dev->hw, dlb_version); + ret = dlb2_resource_init(&dlb2_dev->hw, dlb_version, probe_args); if (ret) goto resource_init_fail; @@ -227,6 +231,7 @@ dlb2_probe(struct rte_pci_device *pdev) init_driver_state_fail: dlb2_reset_fail: pci_mmap_bad_addr: +resource_probe_fail: wait_for_device_ready_fail: rte_free(dlb2_dev); dlb2_dev_malloc_fail: diff --git a/drivers/event/dlb2/pf/dlb2_main.h b/drivers/event/dlb2/pf/dlb2_main.h index 5aa51b1616..4c64d72e9c 100644 --- a/drivers/event/dlb2/pf/dlb2_main.h +++ b/drivers/event/dlb2/pf/dlb2_main.h @@ -15,7 +15,11 @@ #include "base/dlb2_hw_types.h" #include "../dlb2_user.h" -#define DLB2_DEFAULT_UNREGISTER_TIMEOUT_S 5 +#define DLB2_EAL_PROBE_CORE 2 +#define DLB2_NUM_PROBE_ENQS 1000 +#define DLB2_HCW_MEM_SIZE 8 +#define DLB2_HCW_64B_OFF 4 +#define DLB2_HCW_ALIGN_MASK 0x3F struct dlb2_dev; @@ -31,15 +35,30 @@ struct dlb2_dev { /* struct list_head list; */ struct device *dlb2_device; bool domain_reset_failed; + /* The enqueue_four function enqueues four HCWs (one cache-line worth) + * to the HQM, using whichever mechanism is supported by the platform + * on which this driver is running. + */ + void (*enqueue_four)(void *qe4, void *pp_addr); /* The resource mutex serializes access to driver data structures and * hardware registers. */ rte_spinlock_t resource_mutex; bool worker_launched; u8 revision; + u8 version; }; -struct dlb2_dev *dlb2_probe(struct rte_pci_device *pdev); +struct dlb2_pp_thread_data { + struct dlb2_hw *hw; + int pp; + int cpu; + bool is_ldb; + int cycles; +}; + +struct dlb2_dev *dlb2_probe(struct rte_pci_device *pdev, const void *probe_args); + int dlb2_pf_reset(struct dlb2_dev *dlb2_dev); int dlb2_pf_create_sched_domain(struct dlb2_hw *hw, diff --git a/drivers/event/dlb2/pf/dlb2_pf.c b/drivers/event/dlb2/pf/dlb2_pf.c index 71ac141b66..3d15250e11 100644 --- a/drivers/event/dlb2/pf/dlb2_pf.c +++ b/drivers/event/dlb2/pf/dlb2_pf.c @@ -702,6 +702,7 @@ dlb2_eventdev_pci_init(struct rte_eventdev *eventdev) struct dlb2_devargs dlb2_args = { .socket_id = rte_socket_id(), .max_num_events = DLB2_MAX_NUM_LDB_CREDITS, + .producer_coremask = NULL, .num_dir_credits_override = -1, .qid_depth_thresholds = { {0} }, .poll_interval = DLB2_POLL_INTERVAL_DEFAULT, @@ -713,6 +714,7 @@ dlb2_eventdev_pci_init(struct rte_eventdev *eventdev) }; struct dlb2_eventdev *dlb2; int q; + const void *probe_args = NULL; DLB2_LOG_DBG("Enter with dev_id=%d socket_id=%d", eventdev->data->dev_id, eventdev->data->socket_id); @@ -728,16 +730,6 @@ dlb2_eventdev_pci_init(struct rte_eventdev *eventdev) dlb2 = dlb2_pmd_priv(eventdev); /* rte_zmalloc_socket mem */ dlb2->version = DLB2_HW_DEVICE_FROM_PCI_ID(pci_dev); - /* Probe the DLB2 PF layer */ - dlb2->qm_instance.pf_dev = dlb2_probe(pci_dev); - - if (dlb2->qm_instance.pf_dev == NULL) { - DLB2_LOG_ERR("DLB2 PF Probe failed with error %d\n", - rte_errno); - ret = -rte_errno; - goto dlb2_probe_failed; - } - /* Were we invoked with runtime parameters? */ if (pci_dev->device.devargs) { ret = dlb2_parse_params(pci_dev->device.devargs->args, @@ -749,6 +741,17 @@ dlb2_eventdev_pci_init(struct rte_eventdev *eventdev) ret, rte_errno); goto dlb2_probe_failed; } + probe_args = &dlb2_args; + } + + /* Probe the DLB2 PF layer */ + dlb2->qm_instance.pf_dev = dlb2_probe(pci_dev, probe_args); + + if (dlb2->qm_instance.pf_dev == NULL) { + DLB2_LOG_ERR("DLB2 PF Probe failed with error %d\n", + rte_errno); + ret = -rte_errno; + goto dlb2_probe_failed; } ret = dlb2_primary_eventdev_probe(eventdev,