This commit is contained in:
quackerd 2022-05-27 04:18:49 +08:00
parent b0cde80b54
commit 41586f9ad0
6 changed files with 178 additions and 112 deletions

65
alloc.c
View File

@ -8,27 +8,17 @@
#include <errno.h>
#include "topo.h"
static pthread_mutex_t alloc_lock;
#define NM_MAX_OBJS_PER_LVL (256)
#define MEM_OBJ_SIZE (4096) // 4k
#define MEM_OBJ_NUM (1024 * 256) // 4k * 1024 * 256 = 1GB per region
#define MEM_REGION_NUM (4) // 4 x 1GB = 4GB total
static int nm_mem_idx[NM_MAX_OBJS_PER_LVL];
static int nm_mem_region_idx[NM_MAX_OBJS_PER_LVL];
static void* nm_mem_regions[NM_MAX_OBJS_PER_LVL][MEM_REGION_NUM];
struct topo_obj;
#include "topop.h"
int
topo_alloc_init(int verbose, struct topo_obj * tobj)
topo_alloc_init(struct topo_desc * desc, int verbose)
{
long tid;
thr_self(&tid);
domainset_t orig_dom;
int orig_policy;
pthread_mutex_init(&alloc_lock, NULL);
pthread_mutex_init(&desc->alloc_lock, NULL);
DOMAINSET_ZERO(&orig_dom);
@ -39,40 +29,37 @@ topo_alloc_init(int verbose, struct topo_obj * tobj)
}
domainset_t tmp_domain;
for (int i = 0; i < topo_num_numa(tobj); i++) {
for (int i = 0; i < desc->num_numa; i++) {
DOMAINSET_ZERO(&tmp_domain);
DOMAINSET_SET(i, &tmp_domain);
ret = cpuset_setdomain(CPU_LEVEL_WHICH, CPU_WHICH_TID, tid, sizeof(tmp_domain), &tmp_domain, DOMAINSET_POLICY_PREFER);
if (ret != 0) {
if (verbose) {
fprintf(stderr, "libnm: cpuset_setdomain failed with %d\n", errno);
}
return ret;
}
for (unsigned int j = 0; j < MEM_REGION_NUM; j++) {
if ((nm_mem_regions[i][j] = mmap(NULL, MEM_OBJ_NUM * MEM_OBJ_SIZE, PROT_READ | PROT_WRITE,
MAP_ANON | MAP_ALIGNED_SUPER | MAP_NOCORE | MAP_PRIVATE | MAP_NOSYNC,
-1, 0)) == MAP_FAILED) {
if (verbose) {
for (unsigned int j = 0; j < ALLOC_MEM_REGION_NUM; j++) {
if ((desc->mem_regions[i][j] = mmap(NULL, ALLOC_MEM_OBJ_NUM * ALLOC_MEM_OBJ_SIZE, PROT_READ | PROT_WRITE,
MAP_ANON | MAP_ALIGNED_SUPER | MAP_NOCORE | MAP_PRIVATE | MAP_NOSYNC, -1, 0)) == MAP_FAILED) {
fprintf(stderr, "libnm: mmap failed with %d\n", errno);
}
return -1;
}
// touch the pages to prefault the pages
for (unsigned int k = 0; k < MEM_OBJ_NUM; k++) {
*(uint32_t*)((char*)nm_mem_regions[i][j] + k * MEM_OBJ_SIZE) = 0;
for (unsigned int k = 0; k < ALLOC_MEM_OBJ_NUM; k++) {
*(uint32_t*)((char*)desc->mem_regions[i][j] + k * ALLOC_MEM_OBJ_SIZE) = 0;
}
if (verbose) {
fprintf(stdout, "libnm: reserved %u bytes (%u MB) on node %d. vaddr: 0x%p\n", MEM_OBJ_NUM * MEM_OBJ_SIZE, MEM_OBJ_SIZE * MEM_OBJ_NUM / 1024 / 1024, i, nm_mem_regions[i][j]);
fprintf(stdout, "libnm: reserved %u bytes (%u MB) on node %d. vaddr: 0x%p\n", ALLOC_MEM_OBJ_NUM * ALLOC_MEM_OBJ_SIZE,
ALLOC_MEM_OBJ_SIZE * ALLOC_MEM_OBJ_NUM / 1024 / 1024,
i, desc->mem_regions[i][j]);
}
}
nm_mem_idx[i] = 0;
nm_mem_region_idx[i] = 0;
desc->mem_idx[i] = 0;
desc->mem_region_idx[i] = 0;
}
// restore existing thread's allocation strategy
@ -82,35 +69,35 @@ topo_alloc_init(int verbose, struct topo_obj * tobj)
}
void *
topo_malloc(unsigned int node, size_t size)
topo_desc_malloc(struct topo_desc * desc, unsigned int node, size_t size)
{
void * ret = NULL;
int num_objs = (size + MEM_OBJ_SIZE - 1) / MEM_OBJ_SIZE;
int num_objs = (size + ALLOC_MEM_OBJ_SIZE - 1) / ALLOC_MEM_OBJ_SIZE;
int retry = 0;
pthread_mutex_lock(&alloc_lock);
int cur_region = nm_mem_region_idx[node];
int cur_idx = nm_mem_idx[node];
pthread_mutex_lock(&desc->alloc_lock);
int cur_region = desc->mem_region_idx[node];
int cur_idx = desc->mem_idx[node];
retry:
if ((int)MEM_OBJ_NUM - cur_idx >= num_objs) {
ret = (char*)nm_mem_regions[node][cur_region] + MEM_OBJ_SIZE * cur_idx;
nm_mem_region_idx[node] = cur_region;
nm_mem_idx[node] = cur_idx + num_objs;
} else if (!retry && (cur_region < (int)MEM_REGION_NUM)) {
if ((int)ALLOC_MEM_OBJ_NUM - cur_idx >= num_objs) {
ret = (char*)desc->mem_regions[node][cur_region] + ALLOC_MEM_OBJ_SIZE * cur_idx;
desc->mem_region_idx[node] = cur_region;
desc->mem_idx[node] = cur_idx + num_objs;
} else if (!retry && (cur_region < (int)ALLOC_MEM_REGION_NUM)) {
// check next region
cur_region++;
cur_idx = 0;
retry = 1;
goto retry;
}
pthread_mutex_unlock(&alloc_lock);
pthread_mutex_unlock(&desc->alloc_lock);
return ret;
}
void
topo_free(unsigned int node __attribute__((unused)), void * addr __attribute__((unused)))
topo_desc_free(struct topo_desc * desc __attribute__((unused)), unsigned int node __attribute__((unused)), void * addr __attribute__((unused)))
{
// dummy function
}

View File

@ -7,6 +7,8 @@
extern "C" {
#endif
struct topo_desc;
int
topo_num_core();
@ -20,7 +22,7 @@ void
topo_destroy();
int
topo_init(int verbose, int alloc_init);
topo_init(int enable_alloc, int verbose);
void *
topo_malloc(unsigned int node, size_t size);

View File

@ -96,7 +96,7 @@ int ts_test()
int main()
{
topo_init(1, 1);
topo_init(1, 0);
topo_init(0, 1);
ts_test();
return 0;
}

View File

@ -5,30 +5,30 @@
#include <sys/sysctl.h>
#include "topo.h"
static uint64_t sysctl_tsc_freq = 0;
#include "topop.h"
#define S2NS (1000000000UL)
static uint64_t
tsc2ns(uint64_t tsc)
tsc2ns(uint64_t tsc, uint64_t tsc_freq)
{
return (uint64_t)(
(double)tsc / (double)sysctl_tsc_freq * S2NS);
(double)tsc / (double)tsc_freq * S2NS);
}
int
topo_ts_init(int verbose)
topo_ts_init(struct topo_desc * desc, int verbose)
{
int rc;
size_t sz = sizeof(sysctl_tsc_freq);
size_t sz = sizeof(desc->tsc_freq);
// init nm_tsc2ns
if ((rc = sysctlbyname(
"machdep.tsc_freq", &sysctl_tsc_freq, &sz, NULL, 0)) < 0) {
if ((rc = sysctlbyname("machdep.tsc_freq", &desc->tsc_freq, &sz, NULL, 0)) < 0) {
fprintf(stderr,"libtopo: failed to query tsc frequency via sysctl (%d)\n", errno);
} else {
if (verbose) {
fprintf(stderr,
"libnm: failed to query tsc frequency via sysctl (%d)\n", errno);
fprintf(stdout,"libtopo: tsc frequency = %lu\n", desc->tsc_freq);
}
}
@ -36,11 +36,11 @@ topo_ts_init(int verbose)
}
uint64_t
topo_uptime_ns()
topo_desc_uptime_ns(struct topo_desc * desc)
{
unsigned int dummy;
_mm_lfence();
uint64_t tsc = __rdtscp(&dummy);
_mm_lfence();
return tsc2ns(tsc);
return tsc2ns(tsc, desc->tsc_freq);
}

135
topo.c
View File

@ -17,9 +17,8 @@
#include "libxml/xmlversion.h"
#include "topo.h"
#include "topop.h"
#define TOPO_MAX_CHILDREN (256)
#define TOPO_INVALID (-1)
#define TOPO_CACHE_L2 (2)
@ -41,9 +40,9 @@ struct topo_obj {
struct topo_obj * children[TOPO_MAX_CHILDREN];
};
extern int topo_alloc_init(int verbose, struct topo_obj * tobj);
extern int topo_ts_init(int verbose);
static struct topo_obj * g_tobj = NULL;
static struct topo_desc g_default_desc = {0};
static _Atomic(int) initialized = 0;
static volatile int init_rc = 0;
static void
mask_to_cpuset(const char * mask, cpuset_t * cset)
@ -80,7 +79,7 @@ mask_to_cpuset(const char * mask, cpuset_t * cset)
}
static void
preorder_dump(struct topo_obj * root, int indent) {
tobj_preorder_dump(struct topo_obj * root, int indent) {
cpuset_t tmpset;
CPU_COPY(&root->mask, &tmpset);
const char * flag_str;
@ -110,12 +109,12 @@ preorder_dump(struct topo_obj * root, int indent) {
}
printf("\n");
for (int i = 0; i < root->num_children; i++) {
preorder_dump(root->children[i], indent + 1);
tobj_preorder_dump(root->children[i], indent + 1);
}
}
static xmlAttr *
find_attr(xmlNode * root, const char * attr_name)
xml_find_attr(xmlNode * root, const char * attr_name)
{
xmlAttr * attr = root->properties;
while (attr != NULL) {
@ -129,7 +128,7 @@ find_attr(xmlNode * root, const char * attr_name)
}
static void
free_root(struct topo_obj * root)
tobj_free_root(struct topo_obj * root)
{
if (root == NULL) {
return;
@ -137,7 +136,7 @@ free_root(struct topo_obj * root)
if (root->num_children > 0) {
for (int i = 0; i < root->num_children; i++) {
free_root(root->children[i]);
tobj_free_root(root->children[i]);
}
}
@ -145,21 +144,21 @@ free_root(struct topo_obj * root)
}
static int
populate_root(xmlNode * root, struct topo_obj * parent, struct topo_obj ** out)
tobj_populate_root(xmlNode * root, struct topo_obj * parent, struct topo_obj ** out)
{
int rc = 0;
if (root->type == XML_ELEMENT_NODE) {
if (strcmp((const char*)root->name, "group") == 0) {
// this is a group, then check cache-level
xmlAttr * attr = find_attr(root, "cache-level");
xmlAttr * attr = xml_find_attr(root, "cache-level");
if (attr == NULL) {
fprintf(stderr, "libtopo: could not find attr cache-level in group.\n");
return -1;
}
xmlAttr * lvl_attr = find_attr(root, "level");
xmlAttr * lvl_attr = xml_find_attr(root, "level");
if (lvl_attr == NULL) {
fprintf(stderr, "libtopo: could not find attr level in group.\n");
return -1;
@ -193,7 +192,7 @@ populate_root(xmlNode * root, struct topo_obj * parent, struct topo_obj ** out)
parent = tobj;
} else if (strcmp((const char*)root->name, "cpu") == 0) {
xmlAttr * attr = find_attr(root, "mask");
xmlAttr * attr = xml_find_attr(root, "mask");
if (attr == NULL) {
fprintf(stderr, "libtopo: could not find attr mask in cpu.\n");
@ -202,7 +201,7 @@ populate_root(xmlNode * root, struct topo_obj * parent, struct topo_obj ** out)
mask_to_cpuset((const char *)attr->children->content, &parent->mask);
} else if (strcmp((const char *)root->name, "flag") == 0) {
xmlAttr * attr = find_attr(root, "name");
xmlAttr * attr = xml_find_attr(root, "name");
if (attr == NULL) {
fprintf(stderr, "libtopo: could not find attr name in flag.\n");
@ -222,7 +221,7 @@ populate_root(xmlNode * root, struct topo_obj * parent, struct topo_obj ** out)
xmlNode * child = root->children;
while (child != NULL) {
if ((rc = populate_root(child, parent, out)) != 0) {
if ((rc = tobj_populate_root(child, parent, out)) != 0) {
break;
}
child = child->next;
@ -232,7 +231,7 @@ populate_root(xmlNode * root, struct topo_obj * parent, struct topo_obj ** out)
}
static int
init(struct topo_obj **out)
tobj_populate(struct topo_obj **out)
{
int rc;
size_t sz;
@ -262,12 +261,12 @@ init(struct topo_obj **out)
errno = EINVAL;
return -1;
}
rc = populate_root(xmlDocGetRootElement(doc), NULL, out) != 0;
rc = tobj_populate_root(xmlDocGetRootElement(doc), NULL, out) != 0;
free(buf);
xmlFreeDoc(doc);
if (rc != 0) {
free_root(*out);
tobj_free_root(*out);
errno = EINVAL;
return rc;
}
@ -276,7 +275,7 @@ init(struct topo_obj **out)
}
static struct topo_obj *
find_node_by_mask(struct topo_obj * root, cpuset_t * set)
tobj_find_node_by_mask(struct topo_obj * root, cpuset_t * set)
{
if (!CPU_SUBSET(&root->mask, set)) {
return NULL;
@ -284,7 +283,7 @@ find_node_by_mask(struct topo_obj * root, cpuset_t * set)
struct topo_obj * best_fit;
for (int i = 0; i < root->num_children; i++) {
best_fit = find_node_by_mask(root->children[i], set);
best_fit = tobj_find_node_by_mask(root->children[i], set);
if (best_fit != NULL) {
break;
}
@ -294,7 +293,7 @@ find_node_by_mask(struct topo_obj * root, cpuset_t * set)
}
static struct topo_obj *
find_node_by_flag(struct topo_obj * root, int flag)
tobj_find_node_by_flag(struct topo_obj * root, int flag)
{
if (root->flags & flag) {
return root;
@ -302,7 +301,7 @@ find_node_by_flag(struct topo_obj * root, int flag)
struct topo_obj * result;
for (int i = 0; i < root->num_children; i++) {
result = find_node_by_flag(root->children[i], flag);
result = tobj_find_node_by_flag(root->children[i], flag);
if (result != NULL) {
return result;
}
@ -312,13 +311,13 @@ find_node_by_flag(struct topo_obj * root, int flag)
}
static int
_topo_core_to_numa(struct topo_obj * root, int coreid)
tobj_core_to_numa(struct topo_obj * root, int coreid)
{
cpuset_t set;
CPU_ZERO(&set);
CPU_SET(coreid, &set);
struct topo_obj * obj = find_node_by_mask(root, &set);
struct topo_obj * obj = tobj_find_node_by_mask(root, &set);
if (obj == NULL) {
return TOPO_INVALID;
}
@ -341,9 +340,9 @@ _topo_core_to_numa(struct topo_obj * root, int coreid)
}
static int
_topo_num_numa(struct topo_obj * root)
tobj_get_num_numa(struct topo_obj * root)
{
struct topo_obj * result = find_node_by_flag(root, TOPO_FLAG_NUMA);
struct topo_obj * result = tobj_find_node_by_flag(root, TOPO_FLAG_NUMA);
if (result == NULL) {
return 1;
@ -357,78 +356,114 @@ _topo_num_numa(struct topo_obj * root)
}
static int
_topo_num_core(struct topo_obj * root)
tobj_get_num_core(struct topo_obj * root)
{
return CPU_COUNT(&root->mask);
}
static void
_topo_destroy(struct topo_obj * obj)
desc_destroy(struct topo_desc * desc)
{
free_root(obj);
tobj_free_root(desc->root);
}
static int
_topo_init(int verbose, int alloc_init, struct topo_obj ** obj)
desc_init(struct topo_desc * desc, int enable_alloc, int verbose)
{
int rc = init(obj);
desc->root = NULL;
int rc = tobj_populate(&desc->root);
if (verbose) {
if (rc == 0) {
int num_cores = _topo_num_core(*obj);
fprintf(stdout, "libtopo: %d cores, %d numa domains detected.\n", num_cores, _topo_num_numa(*obj));
for (int i = 0; i < num_cores; i++) {
fprintf(stdout, "libtopo: core #%d @ numa domain %d.\n", i, _topo_core_to_numa(*obj, i));
desc->num_core = tobj_get_num_core(desc->root);
desc->num_numa = tobj_get_num_numa(desc->root);
if (desc->num_core > TOPO_MAX_CHILDREN) {
fprintf(stderr, "libtopo: too many cores - %d\n", desc->num_core);
rc = ENOMEM;
} else {
for (int i = 0; i < desc->num_core; i++) {
desc->core_to_numa_lookup[i] = tobj_core_to_numa(desc->root, i);
}
if (verbose) {
fprintf(stdout, "libtopo: %d cores, %d numa domains detected.\n", desc->num_core, desc->num_numa);
for (int i = 0; i < desc->num_core; i++) {
fprintf(stdout, "libtopo: core #%d @ numa domain %d.\n", i, desc->core_to_numa_lookup[i]);
}
fprintf(stdout, "libtopo: parsed tree:\n");
preorder_dump(*obj, 1);
tobj_preorder_dump(desc->root, 1);
}
}
} else {
fprintf(stderr, "libtopo: failed to parse topo: %d\n", errno);
}
if (rc == 0) {
rc = topo_ts_init(desc, verbose);
}
rc = topo_ts_init(verbose);
if (enable_alloc && rc == 0) {
rc = topo_alloc_init(desc, verbose);
}
if (alloc_init && rc == 0) {
rc = topo_alloc_init(verbose, *obj);
if (rc != 0) {
if (desc->root != NULL) {
tobj_free_root(desc->root);
}
}
return rc;
}
//
// public APIs
//
int
topo_num_core()
{
return _topo_num_core(g_tobj);
return g_default_desc.num_core;
}
int
topo_num_numa()
{
return _topo_num_numa(g_tobj);
return g_default_desc.num_numa;
}
int
topo_core_to_numa(int coreid)
{
return _topo_core_to_numa(g_tobj, coreid);
return g_default_desc.core_to_numa_lookup[coreid];
}
static _Atomic(int) initialized = 0;
static volatile int init_rc = 0;
void
topo_destroy()
{
_topo_destroy(g_tobj);
desc_destroy(&g_default_desc);
}
void *
topo_malloc(unsigned int node, size_t size)
{
return topo_desc_malloc(&g_default_desc, node, size);
}
void
topo_free(unsigned int node, void * addr)
{
return topo_desc_free(&g_default_desc, node, addr);
}
uint64_t
topo_uptime_ns()
{
return topo_desc_uptime_ns(&g_default_desc);
}
int
topo_init(int verbose, int alloc_init)
topo_init(int enable_alloc, int verbose)
{
int expected = 0;
if (atomic_compare_exchange_strong(&initialized, &expected, 2)) {
init_rc = _topo_init(verbose, alloc_init, &g_tobj);
init_rc = desc_init(&g_default_desc, enable_alloc, verbose);
atomic_store(&initialized, 1);
} else {
while(atomic_load(&initialized) != 1) {} // wait for init

42
topop.h Normal file
View File

@ -0,0 +1,42 @@
#pragma once
#include <stdint.h>
#include <pthread.h>
#define TOPO_MAX_CHILDREN (256)
#define ALLOC_MAX_OBJS_PER_LVL (256)
#define ALLOC_MEM_OBJ_SIZE (4096) // 4k
#define ALLOC_MEM_OBJ_NUM (1024 * 256) // 4k * 1024 * 256 = 1GB per region
#define ALLOC_MEM_REGION_NUM (4) // 4 x 1GB = 4GB total
struct topo_desc {
struct topo_obj * root;
int core_to_numa_lookup[TOPO_MAX_CHILDREN];
int num_core;
int num_numa;
// ts
uint64_t tsc_freq;
// alloc
pthread_mutex_t alloc_lock;
int mem_idx[ALLOC_MAX_OBJS_PER_LVL];
int mem_region_idx[ALLOC_MAX_OBJS_PER_LVL];
void* mem_regions[ALLOC_MAX_OBJS_PER_LVL][ALLOC_MEM_REGION_NUM];
};
int
topo_alloc_init(struct topo_desc * desc, int verbose);
int
topo_ts_init(struct topo_desc *desc, int verbose);
uint64_t
topo_desc_uptime_ns(struct topo_desc * desc);
void *
topo_desc_malloc(struct topo_desc * desc, unsigned int node, size_t size);
void
topo_desc_free(struct topo_desc * desc __attribute__((unused)), unsigned int node __attribute__((unused)), void * addr __attribute__((unused)));