199 lines
4.5 KiB
C
199 lines
4.5 KiB
C
#include <pthread.h>
|
|
#include <sys/types.h>
|
|
#include <sys/cpuset.h>
|
|
#include <sys/domainset.h>
|
|
#include <sys/thr.h>
|
|
#include <sys/mman.h>
|
|
#include <stdint.h>
|
|
#include <stdio.h>
|
|
#include <errno.h>
|
|
#include <stdatomic.h>
|
|
#include <string.h>
|
|
#include <assert.h>
|
|
|
|
#include <nms.h>
|
|
|
|
#define MAX_NUMA_DOMAINS (64)
|
|
#define MAX_REGIONS (64)
|
|
#define REGION_SIZE (1024 * 1024 * 1024)
|
|
#define PAGE_SIZE (4096)
|
|
|
|
struct nms_region {
|
|
uintptr_t start_addr;
|
|
size_t size;
|
|
size_t occupied;
|
|
};
|
|
|
|
struct nms_desc {
|
|
// alloc
|
|
pthread_mutex_t alloc_lock;
|
|
|
|
struct nms_region regions[MAX_NUMA_DOMAINS][MAX_REGIONS];
|
|
int region_sz[MAX_NUMA_DOMAINS];
|
|
};
|
|
|
|
static _Atomic(int) initialized = 0;
|
|
static struct nms_desc g_desc;
|
|
|
|
void *
|
|
nms_alloc_static(int node_id, size_t sz)
|
|
{
|
|
long tid;
|
|
domainset_t orig_dom;
|
|
int orig_policy;
|
|
void * region;
|
|
|
|
thr_self(&tid);
|
|
DOMAINSET_ZERO(&orig_dom);
|
|
|
|
// save existing thread's allocation strategy
|
|
int ret = cpuset_getdomain(CPU_LEVEL_WHICH, CPU_WHICH_TID, tid, sizeof(orig_dom), &orig_dom, &orig_policy);
|
|
if (ret != 0) {
|
|
fprintf(stderr, "libnms: cpuset_getdomain failed with %d\n", errno);
|
|
return NULL;
|
|
}
|
|
|
|
domainset_t tmp_domain;
|
|
DOMAINSET_ZERO(&tmp_domain);
|
|
DOMAINSET_SET(node_id, &tmp_domain);
|
|
|
|
ret = cpuset_setdomain(CPU_LEVEL_WHICH, CPU_WHICH_TID, tid, sizeof(tmp_domain), &tmp_domain, DOMAINSET_POLICY_ROUNDROBIN);
|
|
if (ret != 0) {
|
|
fprintf(stderr, "libnms: cpuset_setdomain failed with %d\n", errno);
|
|
return NULL;
|
|
}
|
|
|
|
if ((region = mmap(NULL, sz, PROT_READ | PROT_WRITE, MAP_ANON | MAP_ALIGNED_SUPER | MAP_NOCORE | MAP_PRIVATE | MAP_NOSYNC | MAP_PREFAULT_READ, -1, 0)) == MAP_FAILED) {
|
|
fprintf(stderr, "libnms: mmap failed with %d\n", errno);
|
|
return NULL;
|
|
}
|
|
|
|
// touch the pages to prefault the pages
|
|
int sum;
|
|
for (size_t i = 0; i < sz; i++) {
|
|
sum += *(uint8_t *)((char *)region + i);
|
|
*(uint8_t *)((char *)region + i) = 0;
|
|
}
|
|
|
|
// restore existing thread's allocation strategy
|
|
ret = cpuset_setdomain(CPU_LEVEL_WHICH, CPU_WHICH_TID, tid, sizeof(orig_dom), &orig_dom, orig_policy);
|
|
if (ret != 0) {
|
|
fprintf(stderr, "libnms: cpuset_setdomain failed with %d\n", errno);
|
|
munmap(region, REGION_SIZE);
|
|
return NULL;
|
|
}
|
|
|
|
return region;
|
|
}
|
|
|
|
static int
|
|
nms_desc_init(struct nms_desc * desc, int verbose)
|
|
{
|
|
memset(desc, 0, sizeof(struct nms_desc));
|
|
pthread_mutex_init(&desc->alloc_lock, NULL);
|
|
return 0;
|
|
}
|
|
|
|
static void *
|
|
nms_region_malloc(struct nms_region * region, size_t size)
|
|
{
|
|
void * ret = NULL;
|
|
if (region->size >= region->occupied + size) {
|
|
ret = (void *)(region->start_addr + region->occupied);
|
|
region->occupied += size;
|
|
region->occupied = (region->occupied + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
static int
|
|
nms_desc_add_region(struct nms_desc * desc, int nodeid, size_t size)
|
|
{
|
|
void * ret;
|
|
int idx;
|
|
|
|
ret = nms_alloc_static(nodeid, REGION_SIZE);
|
|
if (ret == NULL) {
|
|
fprintf(stderr, "libnms: failed to allocate region on node %d\n", nodeid);
|
|
return ENOMEM;
|
|
}
|
|
|
|
desc->region_sz[nodeid]++;
|
|
idx = desc->region_sz[nodeid] - 1;
|
|
desc->regions[nodeid][idx].start_addr = (uintptr_t)ret;
|
|
desc->regions[nodeid][idx].occupied = 0;
|
|
desc->regions[nodeid][idx].size = REGION_SIZE;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void *
|
|
nms_desc_malloc(struct nms_desc * desc, unsigned int nodeid, size_t size)
|
|
{
|
|
void * ret = NULL;
|
|
int idx;
|
|
int new_region = 0;
|
|
|
|
if (size > REGION_SIZE) {
|
|
return NULL;
|
|
}
|
|
|
|
pthread_mutex_lock(&desc->alloc_lock);
|
|
|
|
retry:
|
|
if (desc->region_sz[nodeid] > 0) {
|
|
idx = desc->region_sz[nodeid] - 1;
|
|
ret = nms_region_malloc(&desc->regions[nodeid][idx], size);
|
|
}
|
|
|
|
if (ret == NULL) {
|
|
// we need a new region
|
|
if (nms_desc_add_region(desc, nodeid, REGION_SIZE) != 0) {
|
|
pthread_mutex_unlock(&desc->alloc_lock);
|
|
return NULL;
|
|
}
|
|
fprintf(stdout, "libnms: malloc request of size %zu -> allocated new region on node %d\n", size, nodeid);
|
|
goto retry;
|
|
}
|
|
|
|
pthread_mutex_unlock(&desc->alloc_lock);
|
|
return ret;
|
|
}
|
|
|
|
static void
|
|
nms_desc_free(struct nms_desc * desc __attribute__((unused)), unsigned int node __attribute__((unused)), void * addr __attribute__((unused)))
|
|
{
|
|
// dummy function
|
|
}
|
|
|
|
int
|
|
nms_init(int verbose)
|
|
{
|
|
int expected = 0;
|
|
if (atomic_compare_exchange_strong(&initialized, &expected, 2)) {
|
|
nms_desc_init(&g_desc, verbose);
|
|
atomic_store(&initialized, 1);
|
|
} else {
|
|
while(atomic_load(&initialized) != 1) {
|
|
}
|
|
fprintf(stdout,"libnms: already initialized.\n");
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
void *
|
|
nms_malloc(int nodeid, size_t sz)
|
|
{
|
|
assert(atomic_load(&initialized) == 1);
|
|
return nms_desc_malloc(&g_desc, nodeid, sz);
|
|
}
|
|
|
|
void
|
|
nms_free(int nodeid, void * addr)
|
|
{
|
|
assert(atomic_load(&initialized) == 1);
|
|
nms_desc_free(&g_desc, nodeid, addr);
|
|
}
|
|
|