common/dpaax: add library for PA/VA translation table

A common library, valid for dpaaX drivers, which is used to maintain
a local copy of PA->VA translations.

In case of physical addressing mode (one of the option for FSLMC, and
only option for DPAA bus), the addresses of descriptors Rx'd are
physical. These need to be converted into equivalent VA for rte_mbuf
and other similar calls.

Using the rte_mem_virt2iova or rte_mem_virt2phy is expensive. This
library is an attempt to reduce the overall cost associated with
this translation.

A small table is maintained, containing continuous entries
representing a continguous physical range. Each of these entries
stores the equivalent VA, which is fed during mempool creation, or
memory allocation/deallocation callbacks.

Signed-off-by: Shreyansh Jain <shreyansh.jain@nxp.com>
This commit is contained in:
Shreyansh Jain 2018-10-15 17:31:54 +05:30 committed by Thomas Monjalon
parent e74a15694e
commit 2f3d633aa5
11 changed files with 672 additions and 1 deletions

View File

@ -428,6 +428,7 @@ F: drivers/bus/ifpga/
NXP buses
M: Hemant Agrawal <hemant.agrawal@nxp.com>
M: Shreyansh Jain <shreyansh.jain@nxp.com>
F: drivers/common/dpaax/
F: drivers/bus/dpaa/
F: drivers/bus/fslmc/

View File

@ -139,6 +139,11 @@ CONFIG_RTE_ETHDEV_PROFILE_WITH_VTUNE=n
#
CONFIG_RTE_ETHDEV_TX_PREPARE_NOOP=n
#
# Common libraries, before Bus/PMDs
#
CONFIG_RTE_LIBRTE_COMMON_DPAAX=n
#
# Compile the Intel FPGA bus
#

View File

@ -29,6 +29,11 @@ CONFIG_RTE_PROC_INFO=y
CONFIG_RTE_LIBRTE_VMBUS=y
CONFIG_RTE_LIBRTE_NETVSC_PMD=y
#
# Common libraries, before Bus/PMDs
#
CONFIG_RTE_LIBRTE_COMMON_DPAAX=y
# NXP DPAA BUS and drivers
CONFIG_RTE_LIBRTE_DPAA_BUS=y
CONFIG_RTE_LIBRTE_DPAA_MEMPOOL=y

View File

@ -14,4 +14,8 @@ ifneq (,$(findstring y,$(MVEP-y)))
DIRS-y += mvep
endif
ifeq ($(CONFIG_RTE_LIBRTE_COMMON_DPAAX),y)
DIRS-y += dpaax
endif
include $(RTE_SDK)/mk/rte.subdir.mk

View File

@ -0,0 +1,31 @@
# SPDX-License-Identifier: BSD-3-Clause
# Copyright 2018 NXP
#
include $(RTE_SDK)/mk/rte.vars.mk
#
# library name
#
LIB = librte_common_dpaax.a
CFLAGS += -DALLOW_EXPERIMENTAL_API
CFLAGS += -O3
CFLAGS += $(WERROR_FLAGS)
# versioning export map
EXPORT_MAP := rte_common_dpaax_version.map
# library version
LIBABIVER := 1
#
# all source are stored in SRCS-y
#
SRCS-y += dpaax_iova_table.c
LDLIBS += -lrte_eal
SYMLINK-y-include += dpaax_iova_table.h
include $(RTE_SDK)/mk/rte.lib.mk

View File

@ -0,0 +1,461 @@
/* SPDX-License-Identifier: BSD-3-Clause
* Copyright 2018 NXP
*/
#include <rte_memory.h>
#include "dpaax_iova_table.h"
#include "dpaax_logs.h"
/* Global dpaax logger identifier */
int dpaax_logger;
/* Global table reference */
struct dpaax_iova_table *dpaax_iova_table_p;
static int dpaax_handle_memevents(void);
/* A structure representing the device-tree node available in /proc/device-tree.
*/
struct reg_node {
phys_addr_t addr;
size_t len;
};
/* A ntohll equivalent routine
* XXX: This is only applicable for 64 bit environment.
*/
static void
rotate_8(unsigned char *arr)
{
uint32_t temp;
uint32_t *first_half;
uint32_t *second_half;
first_half = (uint32_t *)(arr);
second_half = (uint32_t *)(arr + 4);
temp = *first_half;
*first_half = *second_half;
*second_half = temp;
*first_half = ntohl(*first_half);
*second_half = ntohl(*second_half);
}
/* read_memory_nodes
* Memory layout for DPAAx platforms (LS1043, LS1046, LS1088, LS2088, LX2160)
* are populated by Uboot and available in device tree:
* /proc/device-tree/memory@<address>/reg <= register.
* Entries are of the form:
* (<8 byte start addr><8 byte length>)(..more similar blocks of start,len>)..
*
* @param count
* OUT populate number of entries found in memory node
* @return
* Pointer to array of reg_node elements, count size
*/
static struct reg_node *
read_memory_node(unsigned int *count)
{
int fd, ret, i;
unsigned int j;
glob_t result = {0};
struct stat statbuf = {0};
char file_data[MEM_NODE_FILE_LEN];
struct reg_node *nodes = NULL;
*count = 0;
ret = glob(MEM_NODE_PATH_GLOB, 0, NULL, &result);
if (ret != 0) {
DPAAX_ERR("Unable to glob device-tree memory node: (%s)(%d)",
MEM_NODE_PATH_GLOB, ret);
goto out;
}
if (result.gl_pathc != 1) {
/* Either more than one memory@<addr> node found, or none.
* In either case, cannot work ahead.
*/
DPAAX_ERR("Found (%zu) entries in device-tree. Not supported!",
result.gl_pathc);
goto out;
}
DPAAX_DEBUG("Opening and parsing device-tree node: (%s)",
result.gl_pathv[0]);
fd = open(result.gl_pathv[0], O_RDONLY);
if (fd < 0) {
DPAAX_ERR("Unable to open the device-tree node: (%s)(fd=%d)",
MEM_NODE_PATH_GLOB, fd);
goto cleanup;
}
/* Stat to get the file size */
ret = fstat(fd, &statbuf);
if (ret != 0) {
DPAAX_ERR("Unable to get device-tree memory node size.");
goto cleanup;
}
DPAAX_DEBUG("Size of device-tree mem node: %lu", statbuf.st_size);
if (statbuf.st_size > MEM_NODE_FILE_LEN) {
DPAAX_WARN("More memory nodes available than assumed.");
DPAAX_WARN("System may not work properly!");
}
ret = read(fd, file_data, statbuf.st_size > MEM_NODE_FILE_LEN ?
MEM_NODE_FILE_LEN : statbuf.st_size);
if (ret <= 0) {
DPAAX_ERR("Unable to read device-tree memory node: (%d)", ret);
goto cleanup;
}
/* The reg node should be multiple of 16 bytes, 8 bytes each for addr
* and len.
*/
*count = (statbuf.st_size / 16);
if ((*count) <= 0 || (statbuf.st_size % 16 != 0)) {
DPAAX_ERR("Invalid memory node values or count. (size=%lu)",
statbuf.st_size);
goto cleanup;
}
/* each entry is of 16 bytes, and size/16 is total count of entries */
nodes = malloc(sizeof(struct reg_node) * (*count));
if (!nodes) {
DPAAX_ERR("Failure in allocating working memory.");
goto cleanup;
}
memset(nodes, 0, sizeof(struct reg_node) * (*count));
for (i = 0, j = 0; i < (statbuf.st_size) && j < (*count); i += 16, j++) {
memcpy(&nodes[j], file_data + i, 16);
/* Rotate (ntohl) each 8 byte entry */
rotate_8((unsigned char *)(&(nodes[j].addr)));
rotate_8((unsigned char *)(&(nodes[j].len)));
}
DPAAX_DEBUG("Device-tree memory node data:");
do {
DPAAX_DEBUG("\n %08" PRIx64 " %08zu", nodes[j].addr, nodes[j].len);
} while (--j);
cleanup:
close(fd);
globfree(&result);
out:
return nodes;
}
int
dpaax_iova_table_populate(void)
{
int ret;
unsigned int i, node_count;
size_t tot_memory_size, total_table_size;
struct reg_node *nodes;
struct dpaax_iovat_element *entry;
/* dpaax_iova_table_p is a singleton - only one instance should be
* created.
*/
if (dpaax_iova_table_p) {
DPAAX_DEBUG("Multiple allocation attempt for IOVA Table (%p)",
dpaax_iova_table_p);
/* This can be an error case as well - some path not cleaning
* up table - but, for now, it is assumed that if IOVA Table
* pointer is valid, table is allocated.
*/
return 0;
}
nodes = read_memory_node(&node_count);
if (nodes == NULL || node_count <= 0) {
DPAAX_WARN("PA->VA translation not available;");
DPAAX_WARN("Expect performance impact.");
return -1;
}
tot_memory_size = 0;
for (i = 0; i < node_count; i++)
tot_memory_size += nodes[i].len;
DPAAX_DEBUG("Total available PA memory size: %zu", tot_memory_size);
/* Total table size = meta data + tot_memory_size/8 */
total_table_size = sizeof(struct dpaax_iova_table) +
(sizeof(struct dpaax_iovat_element) * node_count) +
((tot_memory_size / DPAAX_MEM_SPLIT) * sizeof(uint64_t));
/* TODO: This memory doesn't need to shared but needs to be always
* pinned to RAM (no swap out) - using hugepage rather than malloc
*/
dpaax_iova_table_p = rte_zmalloc(NULL, total_table_size, 0);
if (dpaax_iova_table_p == NULL) {
DPAAX_WARN("Unable to allocate memory for PA->VA Table;");
DPAAX_WARN("PA->VA translation not available;");
DPAAX_WARN("Expect performance impact.");
free(nodes);
return -1;
}
/* Initialize table */
dpaax_iova_table_p->count = node_count;
entry = dpaax_iova_table_p->entries;
DPAAX_DEBUG("IOVA Table entries: (entry start = %p)", (void *)entry);
DPAAX_DEBUG("\t(entry),(start),(len),(next)");
for (i = 0; i < node_count; i++) {
/* dpaax_iova_table_p
* | dpaax_iova_table_p->entries
* | |
* | |
* V V
* +------+------+-------+---+----------+---------+---
* |iova_ |entry | entry | | pages | pages |
* |table | 1 | 2 |...| entry 1 | entry2 |
* +-----'+.-----+-------+---+;---------+;--------+---
* \ \ / /
* `~~~~~~|~~~~~>pages /
* \ /
* `~~~~~~~~~~~>pages
*/
entry[i].start = nodes[i].addr;
entry[i].len = nodes[i].len;
if (i > 0)
entry[i].pages = entry[i-1].pages +
((entry[i-1].len/DPAAX_MEM_SPLIT));
else
entry[i].pages = (uint64_t *)((unsigned char *)entry +
(sizeof(struct dpaax_iovat_element) *
node_count));
DPAAX_DEBUG("\t(%u),(%8"PRIx64"),(%8zu),(%8p)",
i, entry[i].start, entry[i].len, entry[i].pages);
}
/* Release memory associated with nodes array - not required now */
free(nodes);
DPAAX_DEBUG("Adding mem-event handler\n");
ret = dpaax_handle_memevents();
if (ret) {
DPAAX_ERR("Unable to add mem-event handler");
DPAAX_WARN("Cases with non-buffer pool mem won't work!");
}
return 0;
}
void
dpaax_iova_table_depopulate(void)
{
if (dpaax_iova_table_p == NULL)
return;
rte_free(dpaax_iova_table_p->entries);
dpaax_iova_table_p = NULL;
DPAAX_DEBUG("IOVA Table cleanedup");
}
int
dpaax_iova_table_update(phys_addr_t paddr, void *vaddr, size_t length)
{
int found = 0;
unsigned int i;
size_t req_length = length, e_offset;
struct dpaax_iovat_element *entry;
uintptr_t align_vaddr;
phys_addr_t align_paddr;
align_paddr = paddr & DPAAX_MEM_SPLIT_MASK;
align_vaddr = ((uintptr_t)vaddr & DPAAX_MEM_SPLIT_MASK);
/* Check if paddr is available in table */
entry = dpaax_iova_table_p->entries;
for (i = 0; i < dpaax_iova_table_p->count; i++) {
if (align_paddr < entry[i].start) {
/* Address lower than start, but not found in previous
* iteration shouldn't exist.
*/
DPAAX_ERR("Add: Incorrect entry for PA->VA Table"
"(%"PRIu64")", paddr);
DPAAX_ERR("Add: Lowest address: %"PRIu64"",
entry[i].start);
return -1;
}
if (align_paddr > (entry[i].start + entry[i].len))
continue;
/* align_paddr >= start && align_paddr < (start + len) */
found = 1;
do {
e_offset = ((align_paddr - entry[i].start) / DPAAX_MEM_SPLIT);
/* TODO: Whatif something already exists at this
* location - is that an error? For now, ignoring the
* case.
*/
entry[i].pages[e_offset] = align_vaddr;
DPAAX_DEBUG("Added: vaddr=%zu for Phy:%"PRIu64" at %zu"
" remaining len %zu", align_vaddr,
align_paddr, e_offset, req_length);
/* Incoming request can be larger than the
* DPAAX_MEM_SPLIT size - in which case, multiple
* entries in entry->pages[] are filled up.
*/
if (req_length <= DPAAX_MEM_SPLIT)
break;
align_paddr += DPAAX_MEM_SPLIT;
align_vaddr += DPAAX_MEM_SPLIT;
req_length -= DPAAX_MEM_SPLIT;
} while (1);
break;
}
if (!found) {
/* There might be case where the incoming physical address is
* beyond the address discovered in the memory node of
* device-tree. Specially if some malloc'd area is used by EAL
* and the memevent handlers passes that across. But, this is
* not necessarily an error.
*/
DPAAX_DEBUG("Add: Unable to find slot for vaddr:(%p),"
" phy(%"PRIu64")",
vaddr, paddr);
return -1;
}
DPAAX_DEBUG("Add: Found slot at (%"PRIu64")[(%zu)] for vaddr:(%p),"
" phy(%"PRIu64"), len(%zu)", entry[i].start, e_offset,
vaddr, paddr, length);
return 0;
}
/* dpaax_iova_table_dump
* Dump the table, with its entries, on screen. Only works in Debug Mode
* Not for weak hearted - the tables can get quite large
*/
void
dpaax_iova_table_dump(void)
{
unsigned int i, j;
struct dpaax_iovat_element *entry;
/* In case DEBUG is not enabled, some 'if' conditions might misbehave
* as they have nothing else in them except a DPAAX_DEBUG() which if
* tuned out would leave 'if' naked.
*/
if (rte_log_get_global_level() < RTE_LOG_DEBUG) {
DPAAX_ERR("Set log level to Debug for PA->Table dump!");
return;
}
DPAAX_DEBUG(" === Start of PA->VA Translation Table ===");
if (dpaax_iova_table_p == NULL)
DPAAX_DEBUG("\tNULL");
entry = dpaax_iova_table_p->entries;
for (i = 0; i < dpaax_iova_table_p->count; i++) {
DPAAX_DEBUG("\t(%16i),(%16"PRIu64"),(%16zu),(%16p)",
i, entry[i].start, entry[i].len, entry[i].pages);
DPAAX_DEBUG("\t\t (PA), (VA)");
for (j = 0; j < (entry->len/DPAAX_MEM_SPLIT); j++) {
if (entry[i].pages[j] == 0)
continue;
DPAAX_DEBUG("\t\t(%16"PRIx64"),(%16"PRIx64")",
(entry[i].start + (j * sizeof(uint64_t))),
entry[i].pages[j]);
}
}
DPAAX_DEBUG(" === End of PA->VA Translation Table ===");
}
static void
dpaax_memevent_cb(enum rte_mem_event type, const void *addr, size_t len,
void *arg __rte_unused)
{
struct rte_memseg_list *msl;
struct rte_memseg *ms;
size_t cur_len = 0, map_len = 0;
phys_addr_t phys_addr;
void *virt_addr;
int ret;
DPAAX_DEBUG("Called with addr=%p, len=%zu", addr, len);
msl = rte_mem_virt2memseg_list(addr);
while (cur_len < len) {
const void *va = RTE_PTR_ADD(addr, cur_len);
ms = rte_mem_virt2memseg(va, msl);
phys_addr = rte_mem_virt2phy(ms->addr);
virt_addr = ms->addr;
map_len = ms->len;
DPAAX_DEBUG("Request for %s, va=%p, virt_addr=%p,"
"iova=%"PRIu64", map_len=%zu",
type == RTE_MEM_EVENT_ALLOC ?
"alloc" : "dealloc",
va, virt_addr, phys_addr, map_len);
if (type == RTE_MEM_EVENT_ALLOC)
ret = dpaax_iova_table_update(phys_addr, virt_addr,
map_len);
else
/* In case of mem_events for MEM_EVENT_FREE, complete
* hugepage is released and its PA entry is set to 0.
*/
ret = dpaax_iova_table_update(phys_addr, 0, map_len);
if (ret != 0) {
DPAAX_ERR("PA-Table entry update failed. "
"Map=%d, addr=%p, len=%zu, err:(%d)",
type, va, map_len, ret);
return;
}
cur_len += map_len;
}
}
static int
dpaax_memevent_walk_memsegs(const struct rte_memseg_list *msl __rte_unused,
const struct rte_memseg *ms, size_t len,
void *arg __rte_unused)
{
DPAAX_DEBUG("Walking for %p (pa=%"PRIu64") and len %zu",
ms->addr, ms->phys_addr, len);
dpaax_iova_table_update(rte_mem_virt2phy(ms->addr), ms->addr, len);
return 0;
}
static int
dpaax_handle_memevents(void)
{
/* First, walk through all memsegs and pin them, before installing
* handler. This assures that all memseg which have already been
* identified/allocated by EAL, are already part of PA->VA Table. This
* is especially for cases where application allocates memory before
* the EAL or this is an externally allocated memory passed to EAL.
*/
rte_memseg_contig_walk_thread_unsafe(dpaax_memevent_walk_memsegs, NULL);
return rte_mem_event_callback_register("dpaax_memevents_cb",
dpaax_memevent_cb, NULL);
}
RTE_INIT(dpaax_log)
{
dpaax_logger = rte_log_register("pmd.common.dpaax");
if (dpaax_logger >= 0)
rte_log_set_level(dpaax_logger, RTE_LOG_NOTICE);
}

View File

@ -0,0 +1,102 @@
/* SPDX-License-Identifier: BSD-3-Clause
* Copyright 2018 NXP
*/
#ifndef _DPAAX_IOVA_TABLE_H_
#define _DPAAX_IOVA_TABLE_H_
#include <unistd.h>
#include <stdio.h>
#include <string.h>
#include <stdbool.h>
#include <stdlib.h>
#include <inttypes.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <dirent.h>
#include <fcntl.h>
#include <glob.h>
#include <errno.h>
#include <arpa/inet.h>
#include <rte_eal.h>
#include <rte_branch_prediction.h>
#include <rte_memory.h>
#include <rte_malloc.h>
struct dpaax_iovat_element {
phys_addr_t start; /**< Start address of block of physical pages */
size_t len; /**< Difference of end-start for quick access */
uint64_t *pages; /**< VA for each physical page in this block */
};
struct dpaax_iova_table {
unsigned int count; /**< No. of blocks of contiguous physical pages */
struct dpaax_iovat_element entries[0];
};
/* Pointer to the table, which is common for DPAA/DPAA2 and only a single
* instance is required across net/crypto/event drivers. This table is
* populated iff devices are found on the bus.
*/
extern struct dpaax_iova_table *dpaax_iova_table_p;
/* Device tree file for memory layout is named 'memory@<addr>' where the 'addr'
* is SoC dependent, or even Uboot fixup dependent.
*/
#define MEM_NODE_PATH_GLOB "/proc/device-tree/memory[@0-9]*/reg"
/* Device file should be multiple of 16 bytes, each containing 8 byte of addr
* and its length. Assuming max of 5 entries.
*/
#define MEM_NODE_FILE_LEN ((16 * 5) + 1)
/* Table is made up of DPAAX_MEM_SPLIT elements for each contiguous zone. This
* helps avoid separate handling for cases where more than one size of hugepage
* is supported.
*/
#define DPAAX_MEM_SPLIT (1<<21)
#define DPAAX_MEM_SPLIT_MASK ~(DPAAX_MEM_SPLIT - 1) /**< Floor aligned */
#define DPAAX_MEM_SPLIT_MASK_OFF (DPAAX_MEM_SPLIT - 1) /**< Offset */
/* APIs exposed */
int dpaax_iova_table_populate(void);
void dpaax_iova_table_depopulate(void);
int dpaax_iova_table_update(phys_addr_t paddr, void *vaddr, size_t length);
void dpaax_iova_table_dump(void);
static inline void *dpaax_iova_table_get_va(phys_addr_t paddr) __attribute__((hot));
static inline void *
dpaax_iova_table_get_va(phys_addr_t paddr) {
unsigned int i = 0, index;
void *vaddr = 0;
phys_addr_t paddr_align = paddr & DPAAX_MEM_SPLIT_MASK;
size_t offset = paddr & DPAAX_MEM_SPLIT_MASK_OFF;
struct dpaax_iovat_element *entry;
entry = dpaax_iova_table_p->entries;
do {
if (unlikely(i > dpaax_iova_table_p->count))
break;
if (paddr_align < entry[i].start) {
/* Incorrect paddr; Not in memory range */
return NULL;
}
if (paddr_align > (entry[i].start + entry[i].len)) {
i++;
continue;
}
/* paddr > entry->start && paddr <= entry->(start+len) */
index = (paddr_align - entry[i].start)/DPAAX_MEM_SPLIT;
vaddr = (void *)((uintptr_t)entry[i].pages[index] + offset);
break;
} while (1);
return vaddr;
}
#endif /* _DPAAX_IOVA_TABLE_H_ */

View File

@ -0,0 +1,39 @@
/* SPDX-License-Identifier: BSD-3-Clause
* Copyright 2018 NXP
*/
#ifndef _DPAAX_LOGS_H_
#define _DPAAX_LOGS_H_
#include <rte_log.h>
extern int dpaax_logger;
#define DPAAX_LOG(level, fmt, args...) \
rte_log(RTE_LOG_ ## level, dpaax_logger, "dpaax: " fmt "\n", \
##args)
/* Debug logs are with Function names */
#define DPAAX_DEBUG(fmt, args...) \
rte_log(RTE_LOG_DEBUG, dpaax_logger, "dpaax: %s(): " fmt "\n", \
__func__, ##args)
#define DPAAX_INFO(fmt, args...) \
DPAAX_LOG(INFO, fmt, ## args)
#define DPAAX_ERR(fmt, args...) \
DPAAX_LOG(ERR, fmt, ## args)
#define DPAAX_WARN(fmt, args...) \
DPAAX_LOG(WARNING, fmt, ## args)
/* DP Logs, toggled out at compile time if level lower than current level */
#define DPAAX_DP_LOG(level, fmt, args...) \
RTE_LOG_DP(level, PMD, fmt, ## args)
#define DPAAX_DP_DEBUG(fmt, args...) \
DPAAX_DP_LOG(DEBUG, fmt, ## args)
#define DPAAX_DP_INFO(fmt, args...) \
DPAAX_DP_LOG(INFO, fmt, ## args)
#define DPAAX_DP_WARN(fmt, args...) \
DPAAX_DP_LOG(WARNING, fmt, ## args)
#endif /* _DPAAX_LOGS_H_ */

View File

@ -0,0 +1,12 @@
# SPDX-License-Identifier: BSD-3-Clause
# Copyright(c) 2018 NXP
allow_experimental_apis = true
if host_machine.system() != 'linux'
build = false
endif
sources = files('dpaax_iova_table.c')
cflags += ['-D_GNU_SOURCE']

View File

@ -0,0 +1,11 @@
DPDK_18.11 {
global:
dpaax_iova_table_update;
dpaax_iova_table_depopulate;
dpaax_iova_table_dump;
dpaax_iova_table_p;
dpaax_iova_table_populate;
local: *;
};

View File

@ -2,6 +2,6 @@
# Copyright(c) 2018 Cavium, Inc
std_deps = ['eal']
drivers = ['mvep', 'octeontx', 'qat']
drivers = ['dpaax', 'mvep', 'octeontx', 'qat']
config_flag_fmt = 'RTE_LIBRTE_@0@_COMMON'
driver_name_fmt = 'rte_common_@0@'