99aeb28b2f
The GVT-d emulation tries to allocate some specific memory. It could happen that this address doesn't exist. In that case, GVT-d will fall back to allocate any address. Nevertheless, this only works if the e820 fails with an error instead of exiting on an assertion. Reviewed by: markj MFC after: 1 week Sponsored by: Beckhoff Automation GmbH & Co. KG Differential Revision: https://reviews.freebsd.org/D40034
469 lines
11 KiB
C
469 lines
11 KiB
C
/*-
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
*
|
|
* Copyright (c) 2021 Beckhoff Automation GmbH & Co. KG
|
|
* Author: Corvin Köhne <c.koehne@beckhoff.com>
|
|
*/
|
|
|
|
#include <sys/types.h>
|
|
#include <sys/queue.h>
|
|
|
|
#include <machine/vmm.h>
|
|
|
|
#include <assert.h>
|
|
#include <err.h>
|
|
#include <errno.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#include "e820.h"
|
|
#include "qemu_fwcfg.h"
|
|
|
|
/*
|
|
* E820 always uses 64 bit entries. Emulation code will use vm_paddr_t since it
|
|
* works on physical addresses. If vm_paddr_t is larger than uint64_t E820 can't
|
|
* hold all possible physical addresses and we can get into trouble.
|
|
*/
|
|
static_assert(sizeof(vm_paddr_t) <= sizeof(uint64_t),
|
|
"Unable to represent physical memory by E820 table");
|
|
|
|
#define E820_FWCFG_FILE_NAME "etc/e820"
|
|
|
|
#define KB (1024UL)
|
|
#define MB (1024 * KB)
|
|
#define GB (1024 * MB)
|
|
|
|
/*
|
|
* Fix E820 memory holes:
|
|
* [ A0000, C0000) VGA
|
|
* [ C0000, 100000) ROM
|
|
*/
|
|
#define E820_VGA_MEM_BASE 0xA0000
|
|
#define E820_VGA_MEM_END 0xC0000
|
|
#define E820_ROM_MEM_BASE 0xC0000
|
|
#define E820_ROM_MEM_END 0x100000
|
|
|
|
struct e820_element {
|
|
TAILQ_ENTRY(e820_element) chain;
|
|
uint64_t base;
|
|
uint64_t end;
|
|
enum e820_memory_type type;
|
|
};
|
|
static TAILQ_HEAD(e820_table, e820_element) e820_table = TAILQ_HEAD_INITIALIZER(
|
|
e820_table);
|
|
|
|
static struct e820_element *
|
|
e820_element_alloc(uint64_t base, uint64_t end, enum e820_memory_type type)
|
|
{
|
|
struct e820_element *element;
|
|
|
|
element = calloc(1, sizeof(*element));
|
|
if (element == NULL) {
|
|
return (NULL);
|
|
}
|
|
|
|
element->base = base;
|
|
element->end = end;
|
|
element->type = type;
|
|
|
|
return (element);
|
|
}
|
|
|
|
static const char *
|
|
e820_get_type_name(const enum e820_memory_type type)
|
|
{
|
|
switch (type) {
|
|
case E820_TYPE_MEMORY:
|
|
return ("RAM");
|
|
case E820_TYPE_RESERVED:
|
|
return ("Reserved");
|
|
case E820_TYPE_ACPI:
|
|
return ("ACPI");
|
|
case E820_TYPE_NVS:
|
|
return ("NVS");
|
|
default:
|
|
return ("Unknown");
|
|
}
|
|
}
|
|
|
|
void
|
|
e820_dump_table(void)
|
|
{
|
|
struct e820_element *element;
|
|
uint64_t i;
|
|
|
|
fprintf(stderr, "E820 map:\n");
|
|
|
|
i = 0;
|
|
TAILQ_FOREACH(element, &e820_table, chain) {
|
|
fprintf(stderr, " (%4lu) [%16lx, %16lx] %s\n", i,
|
|
element->base, element->end,
|
|
e820_get_type_name(element->type));
|
|
|
|
++i;
|
|
}
|
|
}
|
|
|
|
struct qemu_fwcfg_item *
|
|
e820_get_fwcfg_item(void)
|
|
{
|
|
struct qemu_fwcfg_item *fwcfg_item;
|
|
struct e820_element *element;
|
|
struct e820_entry *entries;
|
|
int count, i;
|
|
|
|
count = 0;
|
|
TAILQ_FOREACH(element, &e820_table, chain) {
|
|
++count;
|
|
}
|
|
if (count == 0) {
|
|
warnx("%s: E820 table empty", __func__);
|
|
return (NULL);
|
|
}
|
|
|
|
fwcfg_item = calloc(1, sizeof(struct qemu_fwcfg_item));
|
|
if (fwcfg_item == NULL) {
|
|
return (NULL);
|
|
}
|
|
|
|
fwcfg_item->size = count * sizeof(struct e820_entry);
|
|
fwcfg_item->data = calloc(count, sizeof(struct e820_entry));
|
|
if (fwcfg_item->data == NULL) {
|
|
free(fwcfg_item);
|
|
return (NULL);
|
|
}
|
|
|
|
i = 0;
|
|
entries = (struct e820_entry *)fwcfg_item->data;
|
|
TAILQ_FOREACH(element, &e820_table, chain) {
|
|
struct e820_entry *entry = &entries[i];
|
|
|
|
entry->base = element->base;
|
|
entry->length = element->end - element->base;
|
|
entry->type = element->type;
|
|
|
|
++i;
|
|
}
|
|
|
|
return (fwcfg_item);
|
|
}
|
|
|
|
static int
|
|
e820_add_entry(const uint64_t base, const uint64_t end,
|
|
const enum e820_memory_type type)
|
|
{
|
|
struct e820_element *new_element;
|
|
struct e820_element *element;
|
|
struct e820_element *ram_element;
|
|
|
|
assert(end >= base);
|
|
|
|
new_element = e820_element_alloc(base, end, type);
|
|
if (new_element == NULL) {
|
|
return (ENOMEM);
|
|
}
|
|
|
|
/*
|
|
* E820 table should always be sorted in ascending order. Therefore,
|
|
* search for a range whose end is larger than the base parameter.
|
|
*/
|
|
TAILQ_FOREACH(element, &e820_table, chain) {
|
|
if (element->end > base) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* System memory requires special handling.
|
|
*/
|
|
if (type == E820_TYPE_MEMORY) {
|
|
/*
|
|
* base is larger than of any existing element. Add new system
|
|
* memory at the end of the table.
|
|
*/
|
|
if (element == NULL) {
|
|
TAILQ_INSERT_TAIL(&e820_table, new_element, chain);
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* System memory shouldn't overlap with any existing element.
|
|
*/
|
|
assert(end >= element->base);
|
|
|
|
TAILQ_INSERT_BEFORE(element, new_element, chain);
|
|
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* If some one tries to allocate a specific address, it could happen, that
|
|
* this address is not allocatable. Therefore, do some checks. If the
|
|
* address is not allocatable, don't panic. The user may have a fallback and
|
|
* tries to allocate another address. This is true for the GVT-d emulation
|
|
* which tries to reuse the host address of the graphics stolen memory and
|
|
* falls back to allocating the highest address below 4 GB.
|
|
*/
|
|
if (element == NULL || element->type != E820_TYPE_MEMORY ||
|
|
(base < element->base || end > element->end))
|
|
return (ENOMEM);
|
|
|
|
if (base == element->base) {
|
|
/*
|
|
* New element at system memory base boundary. Add new
|
|
* element before current and adjust the base of the old
|
|
* element.
|
|
*
|
|
* Old table:
|
|
* [ 0x1000, 0x4000] RAM <-- element
|
|
* New table:
|
|
* [ 0x1000, 0x2000] Reserved
|
|
* [ 0x2000, 0x4000] RAM <-- element
|
|
*/
|
|
TAILQ_INSERT_BEFORE(element, new_element, chain);
|
|
element->base = end;
|
|
} else if (end == element->end) {
|
|
/*
|
|
* New element at system memory end boundary. Add new
|
|
* element after current and adjust the end of the
|
|
* current element.
|
|
*
|
|
* Old table:
|
|
* [ 0x1000, 0x4000] RAM <-- element
|
|
* New table:
|
|
* [ 0x1000, 0x3000] RAM <-- element
|
|
* [ 0x3000, 0x4000] Reserved
|
|
*/
|
|
TAILQ_INSERT_AFTER(&e820_table, element, new_element, chain);
|
|
element->end = base;
|
|
} else {
|
|
/*
|
|
* New element inside system memory entry. Split it by
|
|
* adding a system memory element and the new element
|
|
* before current.
|
|
*
|
|
* Old table:
|
|
* [ 0x1000, 0x4000] RAM <-- element
|
|
* New table:
|
|
* [ 0x1000, 0x2000] RAM
|
|
* [ 0x2000, 0x3000] Reserved
|
|
* [ 0x3000, 0x4000] RAM <-- element
|
|
*/
|
|
ram_element = e820_element_alloc(element->base, base,
|
|
E820_TYPE_MEMORY);
|
|
if (ram_element == NULL) {
|
|
return (ENOMEM);
|
|
}
|
|
TAILQ_INSERT_BEFORE(element, ram_element, chain);
|
|
TAILQ_INSERT_BEFORE(element, new_element, chain);
|
|
element->base = end;
|
|
}
|
|
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
e820_add_memory_hole(const uint64_t base, const uint64_t end)
|
|
{
|
|
struct e820_element *element;
|
|
struct e820_element *ram_element;
|
|
|
|
assert(end >= base);
|
|
|
|
/*
|
|
* E820 table should be always sorted in ascending order. Therefore,
|
|
* search for an element which end is larger than the base parameter.
|
|
*/
|
|
TAILQ_FOREACH(element, &e820_table, chain) {
|
|
if (element->end > base) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (element == NULL || end <= element->base) {
|
|
/* Nothing to do. Hole already exists */
|
|
return (0);
|
|
}
|
|
|
|
/* Memory holes are only allowed in system memory */
|
|
assert(element->type == E820_TYPE_MEMORY);
|
|
|
|
if (base == element->base) {
|
|
/*
|
|
* New hole at system memory base boundary.
|
|
*
|
|
* Old table:
|
|
* [ 0x1000, 0x4000] RAM
|
|
* New table:
|
|
* [ 0x2000, 0x4000] RAM
|
|
*/
|
|
element->base = end;
|
|
} else if (end == element->end) {
|
|
/*
|
|
* New hole at system memory end boundary.
|
|
*
|
|
* Old table:
|
|
* [ 0x1000, 0x4000] RAM
|
|
* New table:
|
|
* [ 0x1000, 0x3000] RAM
|
|
*/
|
|
element->end = base;
|
|
} else {
|
|
/*
|
|
* New hole inside system memory entry. Split the system memory.
|
|
*
|
|
* Old table:
|
|
* [ 0x1000, 0x4000] RAM <-- element
|
|
* New table:
|
|
* [ 0x1000, 0x2000] RAM
|
|
* [ 0x3000, 0x4000] RAM <-- element
|
|
*/
|
|
ram_element = e820_element_alloc(element->base, base,
|
|
E820_TYPE_MEMORY);
|
|
if (ram_element == NULL) {
|
|
return (ENOMEM);
|
|
}
|
|
TAILQ_INSERT_BEFORE(element, ram_element, chain);
|
|
element->base = end;
|
|
}
|
|
|
|
return (0);
|
|
}
|
|
|
|
static uint64_t
|
|
e820_alloc_highest(const uint64_t max_address, const uint64_t length,
|
|
const uint64_t alignment, const enum e820_memory_type type)
|
|
{
|
|
struct e820_element *element;
|
|
|
|
TAILQ_FOREACH_REVERSE(element, &e820_table, e820_table, chain) {
|
|
uint64_t address, base, end;
|
|
|
|
end = MIN(max_address, element->end);
|
|
base = roundup2(element->base, alignment);
|
|
|
|
/*
|
|
* If end - length == 0, we would allocate memory at address 0. This
|
|
* address is mostly unusable and we should avoid allocating it.
|
|
* Therefore, search for another block in that case.
|
|
*/
|
|
if (element->type != E820_TYPE_MEMORY || end < base ||
|
|
end - base < length || end - length == 0) {
|
|
continue;
|
|
}
|
|
|
|
address = rounddown2(end - length, alignment);
|
|
|
|
if (e820_add_entry(address, address + length, type) != 0) {
|
|
return (0);
|
|
}
|
|
|
|
return (address);
|
|
}
|
|
|
|
return (0);
|
|
}
|
|
|
|
static uint64_t
|
|
e820_alloc_lowest(const uint64_t min_address, const uint64_t length,
|
|
const uint64_t alignment, const enum e820_memory_type type)
|
|
{
|
|
struct e820_element *element;
|
|
|
|
TAILQ_FOREACH(element, &e820_table, chain) {
|
|
uint64_t base, end;
|
|
|
|
end = element->end;
|
|
base = MAX(min_address, roundup2(element->base, alignment));
|
|
|
|
/*
|
|
* If base == 0, we would allocate memory at address 0. This
|
|
* address is mostly unusable and we should avoid allocating it.
|
|
* Therefore, search for another block in that case.
|
|
*/
|
|
if (element->type != E820_TYPE_MEMORY || end < base ||
|
|
end - base < length || base == 0) {
|
|
continue;
|
|
}
|
|
|
|
if (e820_add_entry(base, base + length, type) != 0) {
|
|
return (0);
|
|
}
|
|
|
|
return (base);
|
|
}
|
|
|
|
return (0);
|
|
}
|
|
|
|
uint64_t
|
|
e820_alloc(const uint64_t address, const uint64_t length,
|
|
const uint64_t alignment, const enum e820_memory_type type,
|
|
const enum e820_allocation_strategy strategy)
|
|
{
|
|
assert(powerof2(alignment));
|
|
assert((address & (alignment - 1)) == 0);
|
|
|
|
switch (strategy) {
|
|
case E820_ALLOCATE_ANY:
|
|
/*
|
|
* Allocate any address. Therefore, ignore the address parameter
|
|
* and reuse the code path for allocating the lowest address.
|
|
*/
|
|
return (e820_alloc_lowest(0, length, alignment, type));
|
|
case E820_ALLOCATE_LOWEST:
|
|
return (e820_alloc_lowest(address, length, alignment, type));
|
|
case E820_ALLOCATE_HIGHEST:
|
|
return (e820_alloc_highest(address, length, alignment, type));
|
|
case E820_ALLOCATE_SPECIFIC:
|
|
if (e820_add_entry(address, address + length, type) != 0) {
|
|
return (0);
|
|
}
|
|
|
|
return (address);
|
|
}
|
|
|
|
return (0);
|
|
}
|
|
|
|
int
|
|
e820_init(struct vmctx *const ctx)
|
|
{
|
|
uint64_t lowmem_size, highmem_size;
|
|
int error;
|
|
|
|
TAILQ_INIT(&e820_table);
|
|
|
|
lowmem_size = vm_get_lowmem_size(ctx);
|
|
error = e820_add_entry(0, lowmem_size, E820_TYPE_MEMORY);
|
|
if (error) {
|
|
warnx("%s: Could not add lowmem", __func__);
|
|
return (error);
|
|
}
|
|
|
|
highmem_size = vm_get_highmem_size(ctx);
|
|
if (highmem_size != 0) {
|
|
error = e820_add_entry(4 * GB, 4 * GB + highmem_size,
|
|
E820_TYPE_MEMORY);
|
|
if (error) {
|
|
warnx("%s: Could not add highmem", __func__);
|
|
return (error);
|
|
}
|
|
}
|
|
|
|
error = e820_add_memory_hole(E820_VGA_MEM_BASE, E820_VGA_MEM_END);
|
|
if (error) {
|
|
warnx("%s: Could not add VGA memory", __func__);
|
|
return (error);
|
|
}
|
|
|
|
error = e820_add_memory_hole(E820_ROM_MEM_BASE, E820_ROM_MEM_END);
|
|
if (error) {
|
|
warnx("%s: Could not add ROM area", __func__);
|
|
return (error);
|
|
}
|
|
|
|
return (0);
|
|
}
|