intial commit

This commit is contained in:
quackerd 2022-05-27 00:25:30 +08:00
parent 2020770232
commit e7ffcb4340
7 changed files with 639 additions and 0 deletions

2
.gitignore vendored
View File

@ -2,6 +2,8 @@
# Prerequisites # Prerequisites
*.d *.d
compile_commands.json
# Object files # Object files
*.o *.o
*.ko *.ko

28
CMakeLists.txt Normal file
View File

@ -0,0 +1,28 @@
cmake_minimum_required(VERSION 3.0)
project(libtopo)
find_package(PkgConfig REQUIRED)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
pkg_check_modules(XML libxml-2.0 REQUIRED)
set(C_FLAGS -O2 -g -Wall -Wextra -Werror -std=c17
-Wno-deprecated-declarations
-Wno-address-of-packed-member
-Wno-zero-length-array
-Wno-gnu-zero-variadic-macro-arguments
-march=native)
include_directories(${CMAKE_SOURCE_DIR}/inc)
add_library(topo SHARED topo.c alloc.c timestamp.c)
target_compile_options(topo PRIVATE ${C_FLAGS} ${XML_CFLAGS})
target_include_directories(topo PRIVATE ${XML_INCLUDE_DIRS})
target_link_libraries(topo PRIVATE ${XML_LINK_LIBRARIES})
add_executable(test test/topo.c)
set_target_properties(test PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/test)
target_link_libraries(test PRIVATE topo)
target_compile_options(test PRIVATE ${C_FLAGS})
install(TARGETS topo DESTINATION lib/libtopo)
install(FILES inc/topo.h DESTINATION include/libtopo)

116
alloc.c Normal file
View File

@ -0,0 +1,116 @@
#include <pthread.h>
#include <sys/types.h>
#include <sys/cpuset.h>
#include <sys/domainset.h>
#include <sys/thr.h>
#include <sys/mman.h>
#include <stdio.h>
#include <errno.h>
#include "topo.h"
static pthread_mutex_t alloc_lock;
#define NM_MAX_OBJS_PER_LVL (256)
#define MEM_OBJ_SIZE (4096) // 4k
#define MEM_OBJ_NUM (1024 * 256) // 4k * 1024 * 256 = 1GB per region
#define MEM_REGION_NUM (4) // 4 x 1GB = 4GB total
static int nm_mem_idx[NM_MAX_OBJS_PER_LVL];
static int nm_mem_region_idx[NM_MAX_OBJS_PER_LVL];
static void* nm_mem_regions[NM_MAX_OBJS_PER_LVL][MEM_REGION_NUM];
struct topo_obj;
int
topo_alloc_init(int verbose, struct topo_obj * tobj)
{
long tid;
thr_self(&tid);
domainset_t orig_dom;
int orig_policy;
pthread_mutex_init(&alloc_lock, NULL);
DOMAINSET_ZERO(&orig_dom);
// save existing thread's allocation strategy
int ret = cpuset_getdomain(CPU_LEVEL_WHICH, CPU_WHICH_TID, tid, sizeof(orig_dom), &orig_dom, &orig_policy);
if (ret != 0) {
return ret;
}
domainset_t tmp_domain;
for (int i = 0; i < topo_num_numa(tobj); i++) {
DOMAINSET_ZERO(&tmp_domain);
DOMAINSET_SET(i, &tmp_domain);
ret = cpuset_setdomain(CPU_LEVEL_WHICH, CPU_WHICH_TID, tid, sizeof(tmp_domain), &tmp_domain, DOMAINSET_POLICY_PREFER);
if (ret != 0) {
if (verbose) {
fprintf(stderr, "libnm: cpuset_setdomain failed with %d\n", errno);
}
return ret;
}
for (unsigned int j = 0; j < MEM_REGION_NUM; j++) {
if ((nm_mem_regions[i][j] = mmap(NULL, MEM_OBJ_NUM * MEM_OBJ_SIZE, PROT_READ | PROT_WRITE,
MAP_ANON | MAP_ALIGNED_SUPER | MAP_NOCORE | MAP_PRIVATE | MAP_NOSYNC,
-1, 0)) == MAP_FAILED) {
if (verbose) {
fprintf(stderr, "libnm: mmap failed with %d\n", errno);
}
return -1;
}
// touch the pages to prefault the pages
for (unsigned int k = 0; k < MEM_OBJ_NUM; k++) {
*(uint32_t*)((char*)nm_mem_regions[i][j] + k * MEM_OBJ_SIZE) = 0;
}
if (verbose) {
fprintf(stdout, "libnm: reserved %u bytes (%u MB) on node %d. vaddr: 0x%p\n", MEM_OBJ_NUM * MEM_OBJ_SIZE, MEM_OBJ_SIZE * MEM_OBJ_NUM / 1024 / 1024, i, nm_mem_regions[i][j]);
}
}
nm_mem_idx[i] = 0;
nm_mem_region_idx[i] = 0;
}
// restore existing thread's allocation strategy
ret = cpuset_setdomain(CPU_LEVEL_WHICH, CPU_WHICH_TID, tid, sizeof(orig_dom), &orig_dom, orig_policy);
return ret;
}
void *
topo_malloc(unsigned int node, size_t size)
{
void * ret = NULL;
int num_objs = (size + MEM_OBJ_SIZE - 1) / MEM_OBJ_SIZE;
int retry = 0;
pthread_mutex_lock(&alloc_lock);
int cur_region = nm_mem_region_idx[node];
int cur_idx = nm_mem_idx[node];
retry:
if ((int)MEM_OBJ_NUM - cur_idx >= num_objs) {
ret = (char*)nm_mem_regions[node][cur_region] + MEM_OBJ_SIZE * cur_idx;
nm_mem_region_idx[node] = cur_region;
nm_mem_idx[node] = cur_idx + num_objs;
} else if (!retry && (cur_region < (int)MEM_REGION_NUM)) {
// check next region
cur_region++;
cur_idx = 0;
retry = 1;
goto retry;
}
pthread_mutex_unlock(&alloc_lock);
return ret;
}
void
topo_free(unsigned int node __attribute__((unused)), void * addr __attribute__((unused)))
{
// dummy function
}

35
inc/topo.h Normal file
View File

@ -0,0 +1,35 @@
#pragma once
#include <stdio.h>
#ifdef __cplusplus
extern "C" {
#endif
int
topo_num_core();
int
topo_num_numa();
int
topo_core_to_numa(int coreid);
void
topo_destroy();
int
topo_init(int verbose, int alloc_init);
void *
topo_malloc(unsigned int node, size_t size);
void
topo_free(unsigned int node, void * addr);
uint64_t
topo_uptime_ns();
#ifdef __cplusplus
} // extern "C"
#endif

8
test/topo.c Normal file
View File

@ -0,0 +1,8 @@
#include <unistd.h>
#include "topo.h"
int main()
{
topo_init(1, 1);
return 0;
}

26
timestamp.c Normal file
View File

@ -0,0 +1,26 @@
#include <stdint.h>
#include <immintrin.h>
#include <x86intrin.h>
#include "topo.h"
static uint64_t sysctl_tsc_freq = 0;
#define S2NS (1000000000UL)
static uint64_t
tsc2ns(uint64_t tsc)
{
return (uint64_t)(
(double)tsc / (double)sysctl_tsc_freq * S2NS);
}
uint64_t
topo_uptime_ns()
{
unsigned int dummy;
_mm_lfence();
uint64_t tsc = __rdtscp(&dummy);
_mm_lfence();
return tsc2ns(tsc);
}

424
topo.c Normal file
View File

@ -0,0 +1,424 @@
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#include <sys/types.h>
#include <sys/_cpuset.h>
#include <sys/cpuset.h>
#include <sys/sysctl.h>
#include "libxml/parser.h"
#include "libxml/tree.h"
#include "libxml/xmlmemory.h"
#include "libxml/xmlversion.h"
#include "topo.h"
#define TOPO_MAX_CHILDREN (256)
#define TOPO_INVALID (-1)
#define TOPO_CACHE_L2 (2)
#define TOPO_CACHE_L3 (3)
#define TOPO_CACHE_NONE (0)
#define TOPO_FLAG_NONE (0)
#define TOPO_FLAG_NUMA (0x4)
#define TOPO_FLAG_HTT (0x1)
#define TOPO_FLAG_SMT (0x2)
#define TOPO_FLAG_THREAD (TOPO_FLAG_HTT | TOPO_FLAG_SMT)
struct topo_obj {
int cache_level;
cpuset_t mask;
int flags;
int num_children;
struct topo_obj * parent;
struct topo_obj * children[TOPO_MAX_CHILDREN];
};
extern int topo_alloc_init(int verbose, struct topo_obj * tobj);
extern int topo_ts_init(int verbose);
static struct topo_obj * g_tobj = NULL;
static void
mask_to_cpuset(const char * mask, cpuset_t * cset)
{
char * last;
char * copy = strdup(mask);
char * first = strtok_r(copy, ",", &last);
char tmp[2] = {0};
int qword = 0;
while (first != NULL) {
int shift = 0;
for (int i = strlen(first) - 1; i >= 0; i--) {
tmp[0] = first[i];
int val = strtol(tmp, NULL, 16);
int count = 0;
while (val > 0) {
int bit = val & 1;
if (bit == 1) {
CPU_SET(qword * 64 + shift + count, cset);
//printf("Setting bit %d to 1!\n", qword * 64 + shift + count);
}
val = val >> 1;
count++;
}
shift += 4;
}
first = strtok_r(NULL, ",", &last);
qword++;
}
free(copy);
}
static void
preorder_dump(struct topo_obj * root, int indent) {
cpuset_t tmpset;
CPU_COPY(&root->mask, &tmpset);
const char * flag_str;
switch(root->flags) {
case TOPO_FLAG_NUMA:
flag_str = "NUMA";
break;
case TOPO_FLAG_THREAD:
flag_str = "THREAD";
break;
case TOPO_FLAG_HTT:
flag_str = "HTT";
break;
case TOPO_FLAG_SMT:
flag_str = "SMT";
break;
default:
flag_str = "NONE";
}
printf("%*scache-level: %d, flag: %s, cores: ", indent * 4, "", root->cache_level, flag_str);
while (CPU_FFS(&tmpset) != 0) {
int u = CPU_FFS(&tmpset);
printf("%d ", u - 1);
CPU_CLR(u -1 , &tmpset);
}
printf("\n");
for (int i = 0; i < root->num_children; i++) {
preorder_dump(root->children[i], indent + 1);
}
}
static xmlAttr *
find_attr(xmlNode * root, const char * attr_name)
{
xmlAttr * attr = root->properties;
while (attr != NULL) {
if (strcmp((const char *)attr->name, attr_name) == 0) {
break;
}
attr = attr->next;
}
return attr;
}
static void
free_root(struct topo_obj * root)
{
if (root == NULL) {
return;
}
if (root->num_children > 0) {
for (int i = 0; i < root->num_children; i++) {
free_root(root->children[i]);
}
}
free(root);
}
static int
populate_root(xmlNode * root, struct topo_obj * parent, struct topo_obj ** out)
{
int rc = 0;
if (root->type == XML_ELEMENT_NODE) {
if (strcmp((const char*)root->name, "group") == 0) {
// this is a group, then check cache-level
xmlAttr * attr = find_attr(root, "cache-level");
if (attr == NULL) {
fprintf(stderr, "libtopo: could not find attr cache-level in group.\n");
return -1;
}
xmlAttr * lvl_attr = find_attr(root, "level");
if (lvl_attr == NULL) {
fprintf(stderr, "libtopo: could not find attr level in group.\n");
return -1;
}
int level = atoi((const char *)lvl_attr->children->content);
int cache_level = atoi((const char *)attr->children->content);
struct topo_obj * tobj = malloc(sizeof(struct topo_obj));
tobj->cache_level = cache_level;
tobj->num_children = 0;
tobj->parent = parent;
tobj->flags = TOPO_FLAG_NONE;
CPU_ZERO(&tobj->mask);
if (level == 1) {
if (*out != NULL) {
fprintf(stderr, "libtopo: multiple level 1 group detected.\n");
free(tobj);
return -1;
}
*out = tobj;
} else {
if (parent == NULL) {
fprintf(stderr, "libtopo: level 1 group does not appear to be the outermost\n");
free(tobj);
return -1;
}
parent->children[parent->num_children] = tobj;
parent->num_children++;
}
parent = tobj;
} else if (strcmp((const char*)root->name, "cpu") == 0) {
xmlAttr * attr = find_attr(root, "mask");
if (attr == NULL) {
fprintf(stderr, "libtopo: could not find attr mask in cpu.\n");
return -1;
}
mask_to_cpuset((const char *)attr->children->content, &parent->mask);
} else if (strcmp((const char *)root->name, "flag") == 0) {
xmlAttr * attr = find_attr(root, "name");
if (attr == NULL) {
fprintf(stderr, "libtopo: could not find attr name in flag.\n");
return -1;
}
if (strcmp((const char *)attr->children->content, "THREAD") == 0) {
parent->flags |= TOPO_FLAG_THREAD;
} else if (strcmp((const char *)attr->children->content, "SMT") == 0) {
parent->flags |= TOPO_FLAG_SMT;
} else if (strcmp((const char *)attr->children->content, "HTT") == 0) {
parent->flags |= TOPO_FLAG_HTT;
} else if (strcmp((const char *)attr->children->content, "NODE") == 0) {
parent->flags |= TOPO_FLAG_NUMA;
}
}
xmlNode * child = root->children;
while (child != NULL) {
if ((rc = populate_root(child, parent, out)) != 0) {
break;
}
child = child->next;
}
}
return rc;
}
static int
init(struct topo_obj **out)
{
int rc;
size_t sz;
LIBXML_TEST_VERSION;
rc = sysctlbyname("kern.sched.topology_spec", NULL, &sz, NULL, 0);
if (rc != 0) {
return rc;
}
char * buf = malloc(sz);
if (buf == NULL) {
errno = ENOMEM;
return -1;
}
rc = sysctlbyname("kern.sched.topology_spec", buf, &sz, NULL, 0);
if (rc != 0) {
return rc;
}
//printf("xml:\n%s",buf);
xmlDoc * doc = xmlReadMemory(buf, sz, NULL, NULL, 0);
if (doc == NULL) {
errno = EINVAL;
return -1;
}
rc = populate_root(xmlDocGetRootElement(doc), NULL, out) != 0;
free(buf);
xmlFreeDoc(doc);
if (rc != 0) {
free_root(*out);
errno = EINVAL;
return rc;
}
return rc;
}
static struct topo_obj *
find_node_by_mask(struct topo_obj * root, cpuset_t * set)
{
if (!CPU_SUBSET(&root->mask, set)) {
return NULL;
}
struct topo_obj * best_fit;
for (int i = 0; i < root->num_children; i++) {
best_fit = find_node_by_mask(root->children[i], set);
if (best_fit != NULL) {
break;
}
}
return best_fit == NULL ? root : best_fit;
}
static struct topo_obj *
find_node_by_flag(struct topo_obj * root, int flag)
{
if (root->flags & flag) {
return root;
}
struct topo_obj * result;
for (int i = 0; i < root->num_children; i++) {
result = find_node_by_flag(root->children[i], flag);
if (result != NULL) {
return result;
}
}
return NULL;
}
static int
_topo_core_to_numa(struct topo_obj * root, int coreid)
{
cpuset_t set;
CPU_ZERO(&set);
CPU_SET(coreid, &set);
struct topo_obj * obj = find_node_by_mask(root, &set);
if (obj == NULL) {
return TOPO_INVALID;
}
while(obj->parent != NULL) {
obj = obj->parent;
if (obj->flags & TOPO_FLAG_NUMA) {
if (obj->parent != NULL) {
for (int i = 0; i < obj->parent->num_children; i++) {
if (obj->parent->children[i] == obj) {
return i;
}
}
}
break;
}
}
return 0;
}
static int
_topo_num_numa(struct topo_obj * root)
{
struct topo_obj * result = find_node_by_flag(root, TOPO_FLAG_NUMA);
if (result == NULL) {
return 1;
}
if (result->parent == NULL) {
return TOPO_INVALID;
}
return result->parent->num_children;
}
static int
_topo_num_core(struct topo_obj * root)
{
return CPU_COUNT(&root->mask);
}
static void
_topo_destroy(struct topo_obj * obj)
{
free_root(obj);
}
static int
_topo_init(int verbose, int alloc_init, struct topo_obj ** obj)
{
int rc = init(obj);
if (verbose) {
if (rc == 0) {
int num_cores = _topo_num_core(*obj);
fprintf(stdout, "libtopo: %d cores, %d numa domains detected.\n", num_cores, _topo_num_numa(*obj));
for (int i = 0; i < num_cores; i++) {
fprintf(stdout, "libtopo: core #%d @ numa domain %d.\n", i, _topo_core_to_numa(*obj, i));
}
fprintf(stdout, "libtopo: parsed tree:\n");
preorder_dump(*obj, 1);
} else {
fprintf(stderr, "libtopo: failed to parse topo: %d\n", errno);
}
}
if (alloc_init && rc == 0) {
rc = topo_alloc_init(verbose, *obj);
}
return rc;
}
int
topo_num_core()
{
return _topo_num_core(g_tobj);
}
int
topo_num_numa()
{
return _topo_num_numa(g_tobj);
}
int
topo_core_to_numa(int coreid)
{
return _topo_core_to_numa(g_tobj, coreid);
}
void
topo_destroy()
{
_topo_destroy(g_tobj);
}
int
topo_init(int verbose, int alloc_init)
{
return _topo_init(verbose, alloc_init, &g_tobj);
}