#include #include #include #include #include #include #include #include #include #include #include "libxml/parser.h" #include "libxml/tree.h" #include "libxml/xmlmemory.h" #include "libxml/xmlversion.h" #include "topo.h" #include "topop.h" #define TOPO_INVALID (-1) #define TOPO_CACHE_L2 (2) #define TOPO_CACHE_L3 (3) #define TOPO_CACHE_NONE (0) #define TOPO_FLAG_NONE (0) #define TOPO_FLAG_NUMA (0x4) #define TOPO_FLAG_HTT (0x1) #define TOPO_FLAG_SMT (0x2) #define TOPO_FLAG_THREAD (TOPO_FLAG_HTT | TOPO_FLAG_SMT) struct topo_obj { int cache_level; cpuset_t mask; int flags; int num_children; struct topo_obj * parent; struct topo_obj * children[TOPO_MAX_CHILDREN]; }; static struct topo_desc g_default_desc = {0}; static _Atomic(int) initialized = 0; static volatile int init_rc = 0; static void mask_to_cpuset(const char * mask, cpuset_t * cset) { char * last; char * copy = strdup(mask); char * first = strtok_r(copy, ",", &last); char tmp[2] = {0}; int qword = 0; while (first != NULL) { int shift = 0; for (int i = strlen(first) - 1; i >= 0; i--) { tmp[0] = first[i]; int val = strtol(tmp, NULL, 16); int count = 0; while (val > 0) { int bit = val & 1; if (bit == 1) { CPU_SET(qword * 64 + shift + count, cset); //printf("Setting bit %d to 1!\n", qword * 64 + shift + count); } val = val >> 1; count++; } shift += 4; } first = strtok_r(NULL, ",", &last); qword++; } free(copy); } static void tobj_preorder_dump(struct topo_obj * root, int indent) { cpuset_t tmpset; CPU_COPY(&root->mask, &tmpset); const char * flag_str; switch(root->flags) { case TOPO_FLAG_NUMA: flag_str = "NUMA"; break; case TOPO_FLAG_THREAD: flag_str = "THREAD"; break; case TOPO_FLAG_HTT: flag_str = "HTT"; break; case TOPO_FLAG_SMT: flag_str = "SMT"; break; default: flag_str = "NONE"; } printf("%*scache-level: %d, flag: %s, cores: ", indent * 4, "", root->cache_level, flag_str); while (CPU_FFS(&tmpset) != 0) { int u = CPU_FFS(&tmpset); printf("%d ", u - 1); CPU_CLR(u -1 , &tmpset); } printf("\n"); for (int i = 0; i < root->num_children; i++) { tobj_preorder_dump(root->children[i], indent + 1); } } static xmlAttr * xml_find_attr(xmlNode * root, const char * attr_name) { xmlAttr * attr = root->properties; while (attr != NULL) { if (strcmp((const char *)attr->name, attr_name) == 0) { break; } attr = attr->next; } return attr; } static void tobj_free_root(struct topo_obj * root) { if (root == NULL) { return; } if (root->num_children > 0) { for (int i = 0; i < root->num_children; i++) { tobj_free_root(root->children[i]); } } free(root); } static int tobj_populate_root(xmlNode * root, struct topo_obj * parent, struct topo_obj ** out) { int rc = 0; if (root->type == XML_ELEMENT_NODE) { if (strcmp((const char*)root->name, "group") == 0) { // this is a group, then check cache-level xmlAttr * attr = xml_find_attr(root, "cache-level"); if (attr == NULL) { fprintf(stderr, "libtopo: could not find attr cache-level in group.\n"); return -1; } xmlAttr * lvl_attr = xml_find_attr(root, "level"); if (lvl_attr == NULL) { fprintf(stderr, "libtopo: could not find attr level in group.\n"); return -1; } int level = atoi((const char *)lvl_attr->children->content); int cache_level = atoi((const char *)attr->children->content); struct topo_obj * tobj = malloc(sizeof(struct topo_obj)); tobj->cache_level = cache_level; tobj->num_children = 0; tobj->parent = parent; tobj->flags = TOPO_FLAG_NONE; CPU_ZERO(&tobj->mask); if (level == 1) { if (*out != NULL) { fprintf(stderr, "libtopo: multiple level 1 group detected.\n"); free(tobj); return -1; } *out = tobj; } else { if (parent == NULL) { fprintf(stderr, "libtopo: level 1 group does not appear to be the outermost\n"); free(tobj); return -1; } parent->children[parent->num_children] = tobj; parent->num_children++; } parent = tobj; } else if (strcmp((const char*)root->name, "cpu") == 0) { xmlAttr * attr = xml_find_attr(root, "mask"); if (attr == NULL) { fprintf(stderr, "libtopo: could not find attr mask in cpu.\n"); return -1; } mask_to_cpuset((const char *)attr->children->content, &parent->mask); } else if (strcmp((const char *)root->name, "flag") == 0) { xmlAttr * attr = xml_find_attr(root, "name"); if (attr == NULL) { fprintf(stderr, "libtopo: could not find attr name in flag.\n"); return -1; } if (strcmp((const char *)attr->children->content, "THREAD") == 0) { parent->flags |= TOPO_FLAG_THREAD; } else if (strcmp((const char *)attr->children->content, "SMT") == 0) { parent->flags |= TOPO_FLAG_SMT; } else if (strcmp((const char *)attr->children->content, "HTT") == 0) { parent->flags |= TOPO_FLAG_HTT; } else if (strcmp((const char *)attr->children->content, "NODE") == 0) { parent->flags |= TOPO_FLAG_NUMA; } } xmlNode * child = root->children; while (child != NULL) { if ((rc = tobj_populate_root(child, parent, out)) != 0) { break; } child = child->next; } } return rc; } static int tobj_populate(struct topo_obj **out) { int rc; size_t sz; LIBXML_TEST_VERSION; rc = sysctlbyname("kern.sched.topology_spec", NULL, &sz, NULL, 0); if (rc != 0) { return rc; } char * buf = malloc(sz); if (buf == NULL) { errno = ENOMEM; return -1; } rc = sysctlbyname("kern.sched.topology_spec", buf, &sz, NULL, 0); if (rc != 0) { return rc; } //printf("xml:\n%s",buf); xmlDoc * doc = xmlReadMemory(buf, sz, NULL, NULL, 0); if (doc == NULL) { errno = EINVAL; return -1; } rc = tobj_populate_root(xmlDocGetRootElement(doc), NULL, out) != 0; free(buf); xmlFreeDoc(doc); if (rc != 0) { tobj_free_root(*out); errno = EINVAL; return rc; } return rc; } static struct topo_obj * tobj_find_node_by_mask(struct topo_obj * root, cpuset_t * set) { if (!CPU_SUBSET(&root->mask, set)) { return NULL; } struct topo_obj * best_fit; for (int i = 0; i < root->num_children; i++) { best_fit = tobj_find_node_by_mask(root->children[i], set); if (best_fit != NULL) { break; } } return best_fit == NULL ? root : best_fit; } static struct topo_obj * tobj_find_node_by_flag(struct topo_obj * root, int flag) { if (root->flags & flag) { return root; } struct topo_obj * result; for (int i = 0; i < root->num_children; i++) { result = tobj_find_node_by_flag(root->children[i], flag); if (result != NULL) { return result; } } return NULL; } static int tobj_core_to_numa(struct topo_obj * root, int coreid) { cpuset_t set; CPU_ZERO(&set); CPU_SET(coreid, &set); struct topo_obj * obj = tobj_find_node_by_mask(root, &set); if (obj == NULL) { return TOPO_INVALID; } while(obj->parent != NULL) { obj = obj->parent; if (obj->flags & TOPO_FLAG_NUMA) { if (obj->parent != NULL) { for (int i = 0; i < obj->parent->num_children; i++) { if (obj->parent->children[i] == obj) { return i; } } } break; } } return 0; } static int tobj_get_num_numa(struct topo_obj * root) { struct topo_obj * result = tobj_find_node_by_flag(root, TOPO_FLAG_NUMA); if (result == NULL) { return 1; } if (result->parent == NULL) { return TOPO_INVALID; } return result->parent->num_children; } static int tobj_get_num_core(struct topo_obj * root) { return CPU_COUNT(&root->mask); } static void desc_destroy(struct topo_desc * desc) { tobj_free_root(desc->root); } static int desc_init(struct topo_desc * desc, int enable_alloc, int verbose) { desc->root = NULL; int rc = tobj_populate(&desc->root); if (rc == 0) { desc->num_core = tobj_get_num_core(desc->root); desc->num_numa = tobj_get_num_numa(desc->root); if (desc->num_core > TOPO_MAX_CHILDREN) { fprintf(stderr, "libtopo: too many cores - %d\n", desc->num_core); rc = ENOMEM; } else { for (int i = 0; i < desc->num_core; i++) { desc->core_to_numa_lookup[i] = tobj_core_to_numa(desc->root, i); } if (verbose) { fprintf(stdout, "libtopo: %d cores, %d numa domains detected.\n", desc->num_core, desc->num_numa); for (int i = 0; i < desc->num_core; i++) { fprintf(stdout, "libtopo: core #%d @ numa domain %d.\n", i, desc->core_to_numa_lookup[i]); } fprintf(stdout, "libtopo: parsed tree:\n"); tobj_preorder_dump(desc->root, 1); } } } else { fprintf(stderr, "libtopo: failed to parse topo: %d\n", errno); } if (rc == 0) { rc = topo_ts_init(desc, verbose); } if (enable_alloc && rc == 0) { rc = topo_alloc_init(desc, verbose); } if (rc != 0) { if (desc->root != NULL) { tobj_free_root(desc->root); } } return rc; } // // public APIs // int topo_num_core() { return g_default_desc.num_core; } int topo_num_numa() { return g_default_desc.num_numa; } int topo_core_to_numa(int coreid) { return g_default_desc.core_to_numa_lookup[coreid]; } void topo_destroy() { desc_destroy(&g_default_desc); } void * topo_malloc(unsigned int node, size_t size) { return topo_desc_malloc(&g_default_desc, node, size); } void topo_free(unsigned int node, void * addr) { return topo_desc_free(&g_default_desc, node, addr); } uint64_t topo_uptime_ns() { return topo_desc_uptime_ns(&g_default_desc); } int topo_init(int enable_alloc, int verbose) { int expected = 0; if (atomic_compare_exchange_strong(&initialized, &expected, 2)) { init_rc = desc_init(&g_default_desc, enable_alloc, verbose); atomic_store(&initialized, 1); } else { while(atomic_load(&initialized) != 1) {} // wait for init if (verbose) { fprintf(stdout, "libtopo: already initialized with rc = %d\n", init_rc); } } return init_rc; }