libtopo/topo.c

465 lines
11 KiB
C

#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#include <sys/types.h>
#include <sys/_cpuset.h>
#include <sys/cpuset.h>
#include <sys/sysctl.h>
#include <stdatomic.h>
#include "libxml/parser.h"
#include "libxml/tree.h"
#include "libxml/xmlmemory.h"
#include "libxml/xmlversion.h"
#include "topo.h"
#include "topop.h"
#define TOPO_INVALID (-1)
#define TOPO_CACHE_L2 (2)
#define TOPO_CACHE_L3 (3)
#define TOPO_CACHE_NONE (0)
#define TOPO_FLAG_NONE (0)
#define TOPO_FLAG_NUMA (0x4)
#define TOPO_FLAG_HTT (0x1)
#define TOPO_FLAG_SMT (0x2)
#define TOPO_FLAG_THREAD (TOPO_FLAG_HTT | TOPO_FLAG_SMT)
struct topo_obj {
int cache_level;
cpuset_t mask;
int flags;
int level;
int num_children;
struct topo_obj * parent;
struct topo_obj * children[TOPO_MAX_CHILDREN];
};
static struct topo_desc g_default_desc = {0};
static _Atomic(int) initialized = 0;
static volatile int init_rc = 0;
static void
mask_to_cpuset(const char * mask, cpuset_t * cset)
{
char * last;
char * copy = strdup(mask);
char * first = strtok_r(copy, ",", &last);
char tmp[2] = {0};
int qword = 0;
while (first != NULL) {
int shift = 0;
for (int i = (int)strlen(first) - 1; i >= 0; i--) {
tmp[0] = first[i];
int val = (int)strtol(tmp, NULL, 16);
int count = 0;
while (val > 0) {
int bit = val & 1;
if (bit == 1) {
CPU_SET(qword * 64 + shift + count, cset);
//printf("Setting bit %d to 1!\n", qword * 64 + shift + count);
}
val = val >> 1;
count++;
}
shift += 4;
}
first = strtok_r(NULL, ",", &last);
qword++;
}
free(copy);
}
static void
tobj_preorder_dump(struct topo_obj * root, int indent) {
cpuset_t tmpset;
CPU_COPY(&root->mask, &tmpset);
const char * flag_str;
switch(root->flags) {
case TOPO_FLAG_NUMA:
flag_str = "NUMA";
break;
case TOPO_FLAG_THREAD:
flag_str = "THREAD";
break;
case TOPO_FLAG_HTT:
flag_str = "HTT";
break;
case TOPO_FLAG_SMT:
flag_str = "SMT";
break;
default:
flag_str = "NONE";
}
printf("%*sgroup level: %d, cache-level: %d, flag: %s, cores: ", indent * 4, "", root->level, root->cache_level, flag_str);
while (CPU_FFS(&tmpset) != 0) {
int u = (int)CPU_FFS(&tmpset);
printf("%d ", u - 1);
CPU_CLR(u -1 , &tmpset);
}
printf("\n");
for (int i = 0; i < root->num_children; i++) {
tobj_preorder_dump(root->children[i], indent + 1);
}
}
static xmlAttr *
xml_find_attr(xmlNode * root, const char * attr_name)
{
xmlAttr * attr = root->properties;
while (attr != NULL) {
if (strcmp((const char *)attr->name, attr_name) == 0) {
break;
}
attr = attr->next;
}
return attr;
}
static void
tobj_free_root(struct topo_obj * root)
{
if (root == NULL) {
return;
}
if (root->num_children > 0) {
for (int i = 0; i < root->num_children; i++) {
tobj_free_root(root->children[i]);
}
}
free(root);
}
static int
tobj_populate_root(xmlNode * root, struct topo_obj * parent, struct topo_obj ** out)
{
int rc = 0;
if (root->type == XML_ELEMENT_NODE) {
if (strcmp((const char*)root->name, "group") == 0) {
// this is a group, then check cache-level
xmlAttr * attr = xml_find_attr(root, "cache-level");
if (attr == NULL) {
fprintf(stderr, "libtopo: could not find attr cache-level in group.\n");
return -1;
}
xmlAttr * lvl_attr = xml_find_attr(root, "level");
if (lvl_attr == NULL) {
fprintf(stderr, "libtopo: could not find attr level in group.\n");
return -1;
}
int level = atoi((const char *)lvl_attr->children->content);
int cache_level = atoi((const char *)attr->children->content);
struct topo_obj * tobj = malloc(sizeof(struct topo_obj));
tobj->cache_level = cache_level;
tobj->num_children = 0;
tobj->level = level;
tobj->parent = parent;
tobj->flags = TOPO_FLAG_NONE;
CPU_ZERO(&tobj->mask);
if (level == 1) {
if (*out != NULL) {
fprintf(stderr, "libtopo: multiple level 1 group detected.\n");
free(tobj);
return -1;
}
*out = tobj;
} else {
if (parent == NULL) {
fprintf(stderr, "libtopo: level 1 group does not appear to be the outermost\n");
free(tobj);
return -1;
}
parent->children[parent->num_children] = tobj;
parent->num_children++;
}
parent = tobj;
} else if (strcmp((const char*)root->name, "cpu") == 0) {
xmlAttr * attr = xml_find_attr(root, "mask");
if (attr == NULL) {
fprintf(stderr, "libtopo: could not find attr mask in cpu.\n");
return -1;
}
mask_to_cpuset((const char *)attr->children->content, &parent->mask);
} else if (strcmp((const char *)root->name, "flag") == 0) {
xmlAttr * attr = xml_find_attr(root, "name");
if (attr == NULL) {
fprintf(stderr, "libtopo: could not find attr name in flag.\n");
return -1;
}
if (strcmp((const char *)attr->children->content, "THREAD") == 0) {
parent->flags |= TOPO_FLAG_THREAD;
} else if (strcmp((const char *)attr->children->content, "SMT") == 0) {
parent->flags |= TOPO_FLAG_SMT;
} else if (strcmp((const char *)attr->children->content, "HTT") == 0) {
parent->flags |= TOPO_FLAG_HTT;
} else if (strcmp((const char *)attr->children->content, "NODE") == 0) {
parent->flags |= TOPO_FLAG_NUMA;
}
}
xmlNode * child = root->children;
while (child != NULL) {
if ((rc = tobj_populate_root(child, parent, out)) != 0) {
break;
}
child = child->next;
}
}
return rc;
}
static int
tobj_populate(struct topo_obj **out)
{
int rc;
size_t sz;
LIBXML_TEST_VERSION;
rc = sysctlbyname("kern.sched.topology_spec", NULL, &sz, NULL, 0);
if (rc != 0) {
return rc;
}
char * buf = malloc(sz);
if (buf == NULL) {
errno = ENOMEM;
return -1;
}
rc = sysctlbyname("kern.sched.topology_spec", buf, &sz, NULL, 0);
if (rc != 0) {
return rc;
}
//printf("xml:\n%s",buf);
xmlDoc * doc = xmlReadMemory(buf, (int)sz, NULL, NULL, 0);
if (doc == NULL) {
errno = EINVAL;
return -1;
}
rc = tobj_populate_root(xmlDocGetRootElement(doc), NULL, out) != 0;
free(buf);
xmlFreeDoc(doc);
if (rc != 0) {
tobj_free_root(*out);
errno = EINVAL;
return rc;
}
return rc;
}
static struct topo_obj *
tobj_find_leaf_node_by_mask(struct topo_obj * root, cpuset_t * set)
{
if (!CPU_SUBSET(&root->mask, set)) {
return NULL;
}
struct topo_obj * best_fit = root;
for (int i = 0; i < root->num_children; i++) {
struct topo_obj * child_fit = tobj_find_leaf_node_by_mask(root->children[i], set);
if (child_fit != NULL) {
best_fit = child_fit;
break;
}
}
return best_fit;
}
__attribute__((unused))
static struct topo_obj *
tobj_find_node_by_flag(struct topo_obj * root, int flag)
{
if (root->flags & flag) {
return root;
}
struct topo_obj * result;
for (int i = 0; i < root->num_children; i++) {
result = tobj_find_node_by_flag(root->children[i], flag);
if (result != NULL) {
return result;
}
}
return NULL;
}
static inline int
tobj_is_node_numa(struct topo_obj * obj)
{
return obj->level == 1 && obj->cache_level == 0;
}
static int
tobj_core_to_numa(struct topo_obj * root, int coreid)
{
cpuset_t set;
CPU_ZERO(&set);
CPU_SET(coreid, &set);
struct topo_obj * obj = tobj_find_leaf_node_by_mask(root, &set);
if (obj == NULL) {
return TOPO_INVALID;
}
while(obj->parent != NULL) {
if (tobj_is_node_numa(obj->parent)) {
for (int i = 0; i < obj->parent->num_children; i++) {
if (obj->parent->children[i] == obj) {
return i;
}
}
}
obj = obj->parent;
}
return 0;
}
static int
tobj_get_num_numa(struct topo_obj * root)
{
while (root->parent != NULL) {
root = root->parent;
}
if (tobj_is_node_numa(root) && root->num_children > 0) {
return root->num_children;
} else {
return 1;
}
}
static int
tobj_get_num_core(struct topo_obj * root)
{
return CPU_COUNT(&root->mask);
}
static void
desc_destroy(struct topo_desc * desc)
{
tobj_free_root(desc->root);
}
static int
desc_init(struct topo_desc * desc, int verbose)
{
desc->root = NULL;
int rc = tobj_populate(&desc->root);
if (rc == 0) {
desc->num_core = tobj_get_num_core(desc->root);
desc->num_numa = tobj_get_num_numa(desc->root);
if (desc->num_core > TOPO_MAX_CHILDREN) {
fprintf(stderr, "libtopo: too many cores - %d\n", desc->num_core);
rc = ENOMEM;
} else {
for (int i = 0; i < desc->num_core; i++) {
desc->core_to_numa_lookup[i] = tobj_core_to_numa(desc->root, i);
}
if (verbose) {
fprintf(stdout, "libtopo: %d cores, %d numa domains detected.\n", desc->num_core, desc->num_numa);
for (int i = 0; i < desc->num_core; i++) {
fprintf(stdout, "libtopo: core #%d @ numa domain %d.\n", i, desc->core_to_numa_lookup[i]);
}
fprintf(stdout, "libtopo: parsed tree:\n");
tobj_preorder_dump(desc->root, 1);
}
}
} else {
fprintf(stderr, "libtopo: failed to parse topo: %d\n", errno);
}
if (rc == 0) {
rc = topo_ts_init(desc, verbose);
}
if (rc != 0) {
if (desc->root != NULL) {
tobj_free_root(desc->root);
}
}
return rc;
}
//
// public APIs
//
int
topo_num_core(void)
{
return g_default_desc.num_core;
}
int
topo_num_numa(void)
{
return g_default_desc.num_numa;
}
int
topo_core_to_numa(int coreid)
{
return g_default_desc.core_to_numa_lookup[coreid];
}
void
topo_destroy(void)
{
desc_destroy(&g_default_desc);
}
uint64_t
topo_uptime_ns(void)
{
return topo_desc_uptime_ns(&g_default_desc);
}
int
topo_init(int verbose)
{
int expected = 0;
if (atomic_compare_exchange_strong(&initialized, &expected, 2)) {
init_rc = desc_init(&g_default_desc, verbose);
atomic_store(&initialized, 1);
} else {
while(atomic_load(&initialized) != 1) {} // wait for init
if (verbose) {
fprintf(stdout, "libtopo: already initialized with rc = %d\n", init_rc);
}
}
return init_rc;
}