By default on 32-bit systems, file offsets are given as 32-bit values which prevents support for large files. While this is unlikely to be a problem, enabling large file support globally makes "make" and "meson" builds consistent, since meson always enables large file support, and without this change, the size of "struct stat" fields will be different between the two builds. The only location where this appears to be significant is in the dpaax common code, where a printf needs to be updated for 32-bit builds. Signed-off-by: Bruce Richardson <bruce.richardson@intel.com> Acked-by: Luca Boccassi <bluca@debian.org> Acked-by: Shreyansh Jain <shreyansh.jain@nxp.com>
466 lines
13 KiB
C
466 lines
13 KiB
C
/* SPDX-License-Identifier: BSD-3-Clause
|
|
* Copyright 2018 NXP
|
|
*/
|
|
|
|
#include <rte_memory.h>
|
|
|
|
#include "dpaax_iova_table.h"
|
|
#include "dpaax_logs.h"
|
|
|
|
/* Global dpaax logger identifier */
|
|
int dpaax_logger;
|
|
|
|
/* Global table reference */
|
|
struct dpaax_iova_table *dpaax_iova_table_p;
|
|
|
|
static int dpaax_handle_memevents(void);
|
|
|
|
/* A structure representing the device-tree node available in /proc/device-tree.
|
|
*/
|
|
struct reg_node {
|
|
phys_addr_t addr;
|
|
size_t len;
|
|
};
|
|
|
|
/* A ntohll equivalent routine
|
|
* XXX: This is only applicable for 64 bit environment.
|
|
*/
|
|
static void
|
|
rotate_8(unsigned char *arr)
|
|
{
|
|
uint32_t temp;
|
|
uint32_t *first_half;
|
|
uint32_t *second_half;
|
|
|
|
first_half = (uint32_t *)(arr);
|
|
second_half = (uint32_t *)(arr + 4);
|
|
|
|
temp = *first_half;
|
|
*first_half = *second_half;
|
|
*second_half = temp;
|
|
|
|
*first_half = ntohl(*first_half);
|
|
*second_half = ntohl(*second_half);
|
|
}
|
|
|
|
/* read_memory_nodes
|
|
* Memory layout for DPAAx platforms (LS1043, LS1046, LS1088, LS2088, LX2160)
|
|
* are populated by Uboot and available in device tree:
|
|
* /proc/device-tree/memory@<address>/reg <= register.
|
|
* Entries are of the form:
|
|
* (<8 byte start addr><8 byte length>)(..more similar blocks of start,len>)..
|
|
*
|
|
* @param count
|
|
* OUT populate number of entries found in memory node
|
|
* @return
|
|
* Pointer to array of reg_node elements, count size
|
|
*/
|
|
static struct reg_node *
|
|
read_memory_node(unsigned int *count)
|
|
{
|
|
int fd, ret, i;
|
|
unsigned int j;
|
|
glob_t result = {0};
|
|
struct stat statbuf = {0};
|
|
char file_data[MEM_NODE_FILE_LEN];
|
|
struct reg_node *nodes = NULL;
|
|
|
|
*count = 0;
|
|
|
|
ret = glob(MEM_NODE_PATH_GLOB, 0, NULL, &result);
|
|
if (ret != 0) {
|
|
DPAAX_DEBUG("Unable to glob device-tree memory node: (%s)(%d)",
|
|
MEM_NODE_PATH_GLOB, ret);
|
|
goto out;
|
|
}
|
|
|
|
if (result.gl_pathc != 1) {
|
|
/* Either more than one memory@<addr> node found, or none.
|
|
* In either case, cannot work ahead.
|
|
*/
|
|
DPAAX_DEBUG("Found (%zu) entries in device-tree. Not supported!",
|
|
result.gl_pathc);
|
|
goto out;
|
|
}
|
|
|
|
DPAAX_DEBUG("Opening and parsing device-tree node: (%s)",
|
|
result.gl_pathv[0]);
|
|
fd = open(result.gl_pathv[0], O_RDONLY);
|
|
if (fd < 0) {
|
|
DPAAX_DEBUG("Unable to open the device-tree node: (%s)(fd=%d)",
|
|
MEM_NODE_PATH_GLOB, fd);
|
|
goto cleanup;
|
|
}
|
|
|
|
/* Stat to get the file size */
|
|
ret = fstat(fd, &statbuf);
|
|
if (ret != 0) {
|
|
DPAAX_DEBUG("Unable to get device-tree memory node size.");
|
|
goto cleanup;
|
|
}
|
|
|
|
DPAAX_DEBUG("Size of device-tree mem node: %" PRIu64, statbuf.st_size);
|
|
if (statbuf.st_size > MEM_NODE_FILE_LEN) {
|
|
DPAAX_DEBUG("More memory nodes available than assumed.");
|
|
DPAAX_DEBUG("System may not work properly!");
|
|
}
|
|
|
|
ret = read(fd, file_data, statbuf.st_size > MEM_NODE_FILE_LEN ?
|
|
MEM_NODE_FILE_LEN : statbuf.st_size);
|
|
if (ret <= 0) {
|
|
DPAAX_DEBUG("Unable to read device-tree memory node: (%d)",
|
|
ret);
|
|
goto cleanup;
|
|
}
|
|
|
|
/* The reg node should be multiple of 16 bytes, 8 bytes each for addr
|
|
* and len.
|
|
*/
|
|
*count = (statbuf.st_size / 16);
|
|
if ((*count) <= 0 || (statbuf.st_size % 16 != 0)) {
|
|
DPAAX_DEBUG("Invalid memory node values or count. (size=%" PRIu64 ")",
|
|
statbuf.st_size);
|
|
goto cleanup;
|
|
}
|
|
|
|
/* each entry is of 16 bytes, and size/16 is total count of entries */
|
|
nodes = malloc(sizeof(struct reg_node) * (*count));
|
|
if (!nodes) {
|
|
DPAAX_DEBUG("Failure in allocating working memory.");
|
|
goto cleanup;
|
|
}
|
|
memset(nodes, 0, sizeof(struct reg_node) * (*count));
|
|
|
|
for (i = 0, j = 0; i < (statbuf.st_size) && j < (*count); i += 16, j++) {
|
|
memcpy(&nodes[j], file_data + i, 16);
|
|
/* Rotate (ntohl) each 8 byte entry */
|
|
rotate_8((unsigned char *)(&(nodes[j].addr)));
|
|
rotate_8((unsigned char *)(&(nodes[j].len)));
|
|
}
|
|
|
|
DPAAX_DEBUG("Device-tree memory node data:");
|
|
do {
|
|
DPAAX_DEBUG("\n %08" PRIx64 " %08zu", nodes[j].addr, nodes[j].len);
|
|
} while (--j);
|
|
|
|
cleanup:
|
|
close(fd);
|
|
globfree(&result);
|
|
out:
|
|
return nodes;
|
|
}
|
|
|
|
int
|
|
dpaax_iova_table_populate(void)
|
|
{
|
|
int ret;
|
|
unsigned int i, node_count;
|
|
size_t tot_memory_size, total_table_size;
|
|
struct reg_node *nodes;
|
|
struct dpaax_iovat_element *entry;
|
|
|
|
/* dpaax_iova_table_p is a singleton - only one instance should be
|
|
* created.
|
|
*/
|
|
if (dpaax_iova_table_p) {
|
|
DPAAX_DEBUG("Multiple allocation attempt for IOVA Table (%p)",
|
|
dpaax_iova_table_p);
|
|
/* This can be an error case as well - some path not cleaning
|
|
* up table - but, for now, it is assumed that if IOVA Table
|
|
* pointer is valid, table is allocated.
|
|
*/
|
|
return 0;
|
|
}
|
|
|
|
nodes = read_memory_node(&node_count);
|
|
if (nodes == NULL) {
|
|
DPAAX_WARN("PA->VA translation not available;");
|
|
DPAAX_WARN("Expect performance impact.");
|
|
return -1;
|
|
}
|
|
|
|
tot_memory_size = 0;
|
|
for (i = 0; i < node_count; i++)
|
|
tot_memory_size += nodes[i].len;
|
|
|
|
DPAAX_DEBUG("Total available PA memory size: %zu", tot_memory_size);
|
|
|
|
/* Total table size = meta data + tot_memory_size/8 */
|
|
total_table_size = sizeof(struct dpaax_iova_table) +
|
|
(sizeof(struct dpaax_iovat_element) * node_count) +
|
|
((tot_memory_size / DPAAX_MEM_SPLIT) * sizeof(uint64_t));
|
|
|
|
/* TODO: This memory doesn't need to shared but needs to be always
|
|
* pinned to RAM (no swap out) - using hugepage rather than malloc
|
|
*/
|
|
dpaax_iova_table_p = rte_zmalloc(NULL, total_table_size, 0);
|
|
if (dpaax_iova_table_p == NULL) {
|
|
DPAAX_WARN("Unable to allocate memory for PA->VA Table;");
|
|
DPAAX_WARN("PA->VA translation not available;");
|
|
DPAAX_WARN("Expect performance impact.");
|
|
free(nodes);
|
|
return -1;
|
|
}
|
|
|
|
/* Initialize table */
|
|
dpaax_iova_table_p->count = node_count;
|
|
entry = dpaax_iova_table_p->entries;
|
|
|
|
DPAAX_DEBUG("IOVA Table entries: (entry start = %p)", (void *)entry);
|
|
DPAAX_DEBUG("\t(entry),(start),(len),(next)");
|
|
|
|
for (i = 0; i < node_count; i++) {
|
|
/* dpaax_iova_table_p
|
|
* | dpaax_iova_table_p->entries
|
|
* | |
|
|
* | |
|
|
* V V
|
|
* +------+------+-------+---+----------+---------+---
|
|
* |iova_ |entry | entry | | pages | pages |
|
|
* |table | 1 | 2 |...| entry 1 | entry2 |
|
|
* +-----'+.-----+-------+---+;---------+;--------+---
|
|
* \ \ / /
|
|
* `~~~~~~|~~~~~>pages /
|
|
* \ /
|
|
* `~~~~~~~~~~~>pages
|
|
*/
|
|
entry[i].start = nodes[i].addr;
|
|
entry[i].len = nodes[i].len;
|
|
if (i > 0)
|
|
entry[i].pages = entry[i-1].pages +
|
|
((entry[i-1].len/DPAAX_MEM_SPLIT));
|
|
else
|
|
entry[i].pages = (uint64_t *)((unsigned char *)entry +
|
|
(sizeof(struct dpaax_iovat_element) *
|
|
node_count));
|
|
|
|
DPAAX_DEBUG("\t(%u),(%8"PRIx64"),(%8zu),(%8p)",
|
|
i, entry[i].start, entry[i].len, entry[i].pages);
|
|
}
|
|
|
|
/* Release memory associated with nodes array - not required now */
|
|
free(nodes);
|
|
|
|
DPAAX_DEBUG("Adding mem-event handler\n");
|
|
ret = dpaax_handle_memevents();
|
|
if (ret) {
|
|
DPAAX_ERR("Unable to add mem-event handler");
|
|
DPAAX_WARN("Cases with non-buffer pool mem won't work!");
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
void
|
|
dpaax_iova_table_depopulate(void)
|
|
{
|
|
if (dpaax_iova_table_p == NULL)
|
|
return;
|
|
|
|
rte_free(dpaax_iova_table_p->entries);
|
|
dpaax_iova_table_p = NULL;
|
|
|
|
DPAAX_DEBUG("IOVA Table cleanedup");
|
|
}
|
|
|
|
int
|
|
dpaax_iova_table_update(phys_addr_t paddr, void *vaddr, size_t length)
|
|
{
|
|
int found = 0;
|
|
unsigned int i;
|
|
size_t req_length = length, e_offset;
|
|
struct dpaax_iovat_element *entry;
|
|
uintptr_t align_vaddr;
|
|
phys_addr_t align_paddr;
|
|
|
|
if (unlikely(dpaax_iova_table_p == NULL))
|
|
return -1;
|
|
|
|
align_paddr = paddr & DPAAX_MEM_SPLIT_MASK;
|
|
align_vaddr = ((uintptr_t)vaddr & DPAAX_MEM_SPLIT_MASK);
|
|
|
|
/* Check if paddr is available in table */
|
|
entry = dpaax_iova_table_p->entries;
|
|
for (i = 0; i < dpaax_iova_table_p->count; i++) {
|
|
if (align_paddr < entry[i].start) {
|
|
/* Address lower than start, but not found in previous
|
|
* iteration shouldn't exist.
|
|
*/
|
|
DPAAX_ERR("Add: Incorrect entry for PA->VA Table"
|
|
"(%"PRIu64")", paddr);
|
|
DPAAX_ERR("Add: Lowest address: %"PRIu64"",
|
|
entry[i].start);
|
|
return -1;
|
|
}
|
|
|
|
if (align_paddr > (entry[i].start + entry[i].len))
|
|
continue;
|
|
|
|
/* align_paddr >= start && align_paddr < (start + len) */
|
|
found = 1;
|
|
|
|
do {
|
|
e_offset = ((align_paddr - entry[i].start) / DPAAX_MEM_SPLIT);
|
|
/* TODO: Whatif something already exists at this
|
|
* location - is that an error? For now, ignoring the
|
|
* case.
|
|
*/
|
|
entry[i].pages[e_offset] = align_vaddr;
|
|
DPAAX_DEBUG("Added: vaddr=%zu for Phy:%"PRIu64" at %zu"
|
|
" remaining len %zu", align_vaddr,
|
|
align_paddr, e_offset, req_length);
|
|
|
|
/* Incoming request can be larger than the
|
|
* DPAAX_MEM_SPLIT size - in which case, multiple
|
|
* entries in entry->pages[] are filled up.
|
|
*/
|
|
if (req_length <= DPAAX_MEM_SPLIT)
|
|
break;
|
|
align_paddr += DPAAX_MEM_SPLIT;
|
|
align_vaddr += DPAAX_MEM_SPLIT;
|
|
req_length -= DPAAX_MEM_SPLIT;
|
|
} while (1);
|
|
|
|
break;
|
|
}
|
|
|
|
if (!found) {
|
|
/* There might be case where the incoming physical address is
|
|
* beyond the address discovered in the memory node of
|
|
* device-tree. Specially if some malloc'd area is used by EAL
|
|
* and the memevent handlers passes that across. But, this is
|
|
* not necessarily an error.
|
|
*/
|
|
DPAAX_DEBUG("Add: Unable to find slot for vaddr:(%p),"
|
|
" phy(%"PRIu64")",
|
|
vaddr, paddr);
|
|
return -1;
|
|
}
|
|
|
|
DPAAX_DEBUG("Add: Found slot at (%"PRIu64")[(%zu)] for vaddr:(%p),"
|
|
" phy(%"PRIu64"), len(%zu)", entry[i].start, e_offset,
|
|
vaddr, paddr, length);
|
|
return 0;
|
|
}
|
|
|
|
/* dpaax_iova_table_dump
|
|
* Dump the table, with its entries, on screen. Only works in Debug Mode
|
|
* Not for weak hearted - the tables can get quite large
|
|
*/
|
|
void
|
|
dpaax_iova_table_dump(void)
|
|
{
|
|
unsigned int i, j;
|
|
struct dpaax_iovat_element *entry;
|
|
|
|
/* In case DEBUG is not enabled, some 'if' conditions might misbehave
|
|
* as they have nothing else in them except a DPAAX_DEBUG() which if
|
|
* tuned out would leave 'if' naked.
|
|
*/
|
|
if (rte_log_get_global_level() < RTE_LOG_DEBUG) {
|
|
DPAAX_ERR("Set log level to Debug for PA->Table dump!");
|
|
return;
|
|
}
|
|
|
|
DPAAX_DEBUG(" === Start of PA->VA Translation Table ===");
|
|
if (dpaax_iova_table_p == NULL)
|
|
DPAAX_DEBUG("\tNULL");
|
|
|
|
entry = dpaax_iova_table_p->entries;
|
|
for (i = 0; i < dpaax_iova_table_p->count; i++) {
|
|
DPAAX_DEBUG("\t(%16i),(%16"PRIu64"),(%16zu),(%16p)",
|
|
i, entry[i].start, entry[i].len, entry[i].pages);
|
|
DPAAX_DEBUG("\t\t (PA), (VA)");
|
|
for (j = 0; j < (entry->len/DPAAX_MEM_SPLIT); j++) {
|
|
if (entry[i].pages[j] == 0)
|
|
continue;
|
|
DPAAX_DEBUG("\t\t(%16"PRIx64"),(%16"PRIx64")",
|
|
(entry[i].start + (j * sizeof(uint64_t))),
|
|
entry[i].pages[j]);
|
|
}
|
|
}
|
|
DPAAX_DEBUG(" === End of PA->VA Translation Table ===");
|
|
}
|
|
|
|
static void
|
|
dpaax_memevent_cb(enum rte_mem_event type, const void *addr, size_t len,
|
|
void *arg __rte_unused)
|
|
{
|
|
struct rte_memseg_list *msl;
|
|
struct rte_memseg *ms;
|
|
size_t cur_len = 0, map_len = 0;
|
|
phys_addr_t phys_addr;
|
|
void *virt_addr;
|
|
int ret;
|
|
|
|
DPAAX_DEBUG("Called with addr=%p, len=%zu", addr, len);
|
|
|
|
msl = rte_mem_virt2memseg_list(addr);
|
|
|
|
while (cur_len < len) {
|
|
const void *va = RTE_PTR_ADD(addr, cur_len);
|
|
|
|
ms = rte_mem_virt2memseg(va, msl);
|
|
phys_addr = rte_mem_virt2phy(ms->addr);
|
|
virt_addr = ms->addr;
|
|
map_len = ms->len;
|
|
|
|
DPAAX_DEBUG("Request for %s, va=%p, virt_addr=%p,"
|
|
"iova=%"PRIu64", map_len=%zu",
|
|
type == RTE_MEM_EVENT_ALLOC ?
|
|
"alloc" : "dealloc",
|
|
va, virt_addr, phys_addr, map_len);
|
|
|
|
if (type == RTE_MEM_EVENT_ALLOC)
|
|
ret = dpaax_iova_table_update(phys_addr, virt_addr,
|
|
map_len);
|
|
else
|
|
/* In case of mem_events for MEM_EVENT_FREE, complete
|
|
* hugepage is released and its PA entry is set to 0.
|
|
*/
|
|
ret = dpaax_iova_table_update(phys_addr, 0, map_len);
|
|
|
|
if (ret != 0) {
|
|
DPAAX_DEBUG("PA-Table entry update failed. "
|
|
"Map=%d, addr=%p, len=%zu, err:(%d)",
|
|
type, va, map_len, ret);
|
|
return;
|
|
}
|
|
|
|
cur_len += map_len;
|
|
}
|
|
}
|
|
|
|
static int
|
|
dpaax_memevent_walk_memsegs(const struct rte_memseg_list *msl __rte_unused,
|
|
const struct rte_memseg *ms, size_t len,
|
|
void *arg __rte_unused)
|
|
{
|
|
DPAAX_DEBUG("Walking for %p (pa=%"PRIu64") and len %zu",
|
|
ms->addr, ms->phys_addr, len);
|
|
dpaax_iova_table_update(rte_mem_virt2phy(ms->addr), ms->addr, len);
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
dpaax_handle_memevents(void)
|
|
{
|
|
/* First, walk through all memsegs and pin them, before installing
|
|
* handler. This assures that all memseg which have already been
|
|
* identified/allocated by EAL, are already part of PA->VA Table. This
|
|
* is especially for cases where application allocates memory before
|
|
* the EAL or this is an externally allocated memory passed to EAL.
|
|
*/
|
|
rte_memseg_contig_walk_thread_unsafe(dpaax_memevent_walk_memsegs, NULL);
|
|
|
|
return rte_mem_event_callback_register("dpaax_memevents_cb",
|
|
dpaax_memevent_cb, NULL);
|
|
}
|
|
|
|
RTE_INIT(dpaax_log)
|
|
{
|
|
dpaax_logger = rte_log_register("pmd.common.dpaax");
|
|
if (dpaax_logger >= 0)
|
|
rte_log_set_level(dpaax_logger, RTE_LOG_ERR);
|
|
}
|