From 365fb925d3b39e10b781e0662c2b57e8303b2147 Mon Sep 17 00:00:00 2001 From: Shreyansh Jain Date: Fri, 27 Apr 2018 22:50:57 +0530 Subject: [PATCH] bus/fslmc: optimize physical to virtual address search With Hotplugging memory support, the order of memseg has been changed from physically contiguous to virtual contiguous. FSLMC bus and dpaa2 drivers depend on PA to VA address conversion when in Physical addressing mode. This patch creates a list of blocks requested to be pinned to the DPAA2 mempool. For searching physical addresses, it is expected that it would belong to this list (from hardware pool) and hence it is less expensive than memseg walks. Though, this has marginal impact on performance vis-a-vis legacy mode with physically contiguous memsegs. Signed-off-by: Shreyansh Jain Signed-off-by: Thomas Monjalon --- drivers/bus/fslmc/portal/dpaa2_hw_pvt.h | 23 ++++++++++ drivers/event/dpaa2/Makefile | 3 +- drivers/mempool/dpaa2/dpaa2_hw_mempool.c | 43 +++++++++++++++++++ .../dpaa2/rte_mempool_dpaa2_version.map | 1 + 4 files changed, 69 insertions(+), 1 deletion(-) diff --git a/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h b/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h index c76393d45e..e081afbacb 100644 --- a/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h +++ b/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h @@ -254,15 +254,38 @@ enum qbman_fd_format { */ #define DPAA2_EQ_RESP_ALWAYS 1 +/* Various structures representing contiguous memory maps */ +struct dpaa2_memseg { + TAILQ_ENTRY(dpaa2_memseg) next; + char *vaddr; + rte_iova_t iova; + size_t len; +}; + +TAILQ_HEAD(dpaa2_memseg_list, dpaa2_memseg); +extern struct dpaa2_memseg_list rte_dpaa2_memsegs; + #ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA extern uint8_t dpaa2_virt_mode; static void *dpaa2_mem_ptov(phys_addr_t paddr) __attribute__((unused)); /* todo - this is costly, need to write a fast coversion routine */ static void *dpaa2_mem_ptov(phys_addr_t paddr) { + struct dpaa2_memseg *ms; + if (dpaa2_virt_mode) return (void *)(size_t)paddr; + /* Check if the address is already part of the memseg list internally + * maintained by the dpaa2 driver. + */ + TAILQ_FOREACH(ms, &rte_dpaa2_memsegs, next) { + if (paddr >= ms->iova && paddr < + ms->iova + ms->len) + return RTE_PTR_ADD(ms->vaddr, (uintptr_t)(paddr - ms->iova)); + } + + /* If not, Fallback to full memseg list searching */ return rte_mem_iova2virt(paddr); } diff --git a/drivers/event/dpaa2/Makefile b/drivers/event/dpaa2/Makefile index a5b68b4fa6..5e1a632009 100644 --- a/drivers/event/dpaa2/Makefile +++ b/drivers/event/dpaa2/Makefile @@ -18,7 +18,8 @@ CFLAGS += -I$(RTE_SDK)/drivers/bus/fslmc/portal CFLAGS += -I$(RTE_SDK)/drivers/mempool/dpaa2 CFLAGS += -I$(RTE_SDK)/drivers/event/dpaa2 CFLAGS += -I$(RTE_SDK)/lib/librte_eal/linuxapp/eal -LDLIBS += -lrte_eal -lrte_eventdev -lrte_bus_fslmc -lrte_pmd_dpaa2 +LDLIBS += -lrte_eal -lrte_eventdev +LDLIBS += -lrte_bus_fslmc -lrte_mempool_dpaa2 -lrte_pmd_dpaa2 LDLIBS += -lrte_bus_vdev CFLAGS += -I$(RTE_SDK)/drivers/net/dpaa2 CFLAGS += -I$(RTE_SDK)/drivers/net/dpaa2/mc diff --git a/drivers/mempool/dpaa2/dpaa2_hw_mempool.c b/drivers/mempool/dpaa2/dpaa2_hw_mempool.c index ce7a4c577f..5d057fb4a4 100644 --- a/drivers/mempool/dpaa2/dpaa2_hw_mempool.c +++ b/drivers/mempool/dpaa2/dpaa2_hw_mempool.c @@ -32,6 +32,13 @@ struct dpaa2_bp_info rte_dpaa2_bpid_info[MAX_BPID]; static struct dpaa2_bp_list *h_bp_list; +/* List of all the memseg information locally maintained in dpaa2 driver. This + * is to optimize the PA_to_VA searches until a better mechanism (algo) is + * available. + */ +struct dpaa2_memseg_list rte_dpaa2_memsegs + = TAILQ_HEAD_INITIALIZER(rte_dpaa2_memsegs); + /* Dynamic logging identified for mempool */ int dpaa2_logtype_mempool; @@ -358,6 +365,41 @@ rte_hw_mbuf_get_count(const struct rte_mempool *mp) return num_of_bufs; } +static int +dpaa2_populate(struct rte_mempool *mp, unsigned int max_objs, + void *vaddr, rte_iova_t paddr, size_t len, + rte_mempool_populate_obj_cb_t *obj_cb, void *obj_cb_arg) +{ + struct dpaa2_memseg *ms; + + /* For each memory chunk pinned to the Mempool, a linked list of the + * contained memsegs is created for searching when PA to VA + * conversion is required. + */ + ms = rte_zmalloc(NULL, sizeof(struct dpaa2_memseg), 0); + if (!ms) { + DPAA2_MEMPOOL_ERR("Unable to allocate internal memory."); + DPAA2_MEMPOOL_WARN("Fast Physical to Virtual Addr translation would not be available."); + /* If the element is not added, it would only lead to failure + * in searching for the element and the logic would Fallback + * to traditional DPDK memseg traversal code. So, this is not + * a blocking error - but, error would be printed on screen. + */ + return 0; + } + + ms->vaddr = vaddr; + ms->iova = paddr; + ms->len = len; + /* Head insertions are generally faster than tail insertions as the + * buffers pinned are picked from rear end. + */ + TAILQ_INSERT_HEAD(&rte_dpaa2_memsegs, ms, next); + + return rte_mempool_op_populate_default(mp, max_objs, vaddr, paddr, len, + obj_cb, obj_cb_arg); +} + struct rte_mempool_ops dpaa2_mpool_ops = { .name = DPAA2_MEMPOOL_OPS_NAME, .alloc = rte_hw_mbuf_create_pool, @@ -365,6 +407,7 @@ struct rte_mempool_ops dpaa2_mpool_ops = { .enqueue = rte_hw_mbuf_free_bulk, .dequeue = rte_dpaa2_mbuf_alloc_bulk, .get_count = rte_hw_mbuf_get_count, + .populate = dpaa2_populate, }; MEMPOOL_REGISTER_OPS(dpaa2_mpool_ops); diff --git a/drivers/mempool/dpaa2/rte_mempool_dpaa2_version.map b/drivers/mempool/dpaa2/rte_mempool_dpaa2_version.map index a8aa685c37..82a5ec06d1 100644 --- a/drivers/mempool/dpaa2/rte_mempool_dpaa2_version.map +++ b/drivers/mempool/dpaa2/rte_mempool_dpaa2_version.map @@ -3,6 +3,7 @@ DPDK_17.05 { rte_dpaa2_bpid_info; rte_dpaa2_mbuf_alloc_bulk; + rte_dpaa2_memsegs; local: *; };