numam/util/mornafah.c

238 lines
4.8 KiB
C
Raw Normal View History

2023-05-01 19:18:34 +00:00
#include <stdio.h>
#include <stdlib.h>
#include "nms.h"
#include <getopt.h>
#include <unistd.h>
#include <topo.h>
#include <immintrin.h>
#include <x86intrin.h>
2023-12-05 19:23:00 +00:00
#include <stdatomic.h>
#include <string.h>
#include <time.h>
#include <math.h>
#include <errno.h>
#include <stdint.h>
2023-05-01 19:18:34 +00:00
#include <sys/cpuset.h>
2023-12-05 19:23:00 +00:00
#include <sys/sysctl.h>
2023-05-01 19:18:34 +00:00
#include <pthread.h>
#include <pthread_np.h>
2023-12-05 19:23:00 +00:00
#define BUFFER_SIZE (128 * 1024 * 1024)
#define BUFFER_CNT (BUFFER_SIZE / sizeof(int))
2023-05-01 19:18:34 +00:00
static _Atomic int flush = 0;
2023-12-05 19:23:00 +00:00
static _Atomic uint64_t offset = 0;
2023-05-01 19:18:34 +00:00
static int * remote_buffer = NULL;
2023-12-05 19:23:00 +00:00
static uint64_t * latencies;
static int times = 100;
2023-05-01 19:18:34 +00:00
static int local_core = 0;
static int remote_core = 1;
static int cache_mode = 0;
2023-12-05 19:23:00 +00:00
static int verbose = 0;
static int random_access = 0;
static uint64_t tsc_freq = 0;
static inline uint64_t cyc2ns(uint64_t cyc)
{
return (double)cyc / ((double)tsc_freq / 1000000000.0);
}
static inline uint64_t read_time(void)
{
uint64_t l;
unsigned int a;
l = __rdtscp(&a);
_mm_lfence();
return l;
}
2023-05-01 19:18:34 +00:00
static void * local_thread(void *)
{
2023-12-05 19:23:00 +00:00
int temp, *addr;
uint64_t start, end;
2023-05-01 19:18:34 +00:00
printf("Local thread running...\n");
while(times > 0) {
2023-12-05 19:23:00 +00:00
if (random_access) {
// change offset
offset = (rand() % BUFFER_CNT) * sizeof(int);
}
2023-05-01 19:18:34 +00:00
flush = 1;
while(flush != 0) {
}
2023-12-05 19:23:00 +00:00
addr = (int *)((char *)remote_buffer + offset);
if (verbose > 1) {
printf("Local thread(%d): flushing %p.\n", local_core, addr);
}
_mm_clflushopt(addr);
_mm_mfence();
2023-05-01 19:18:34 +00:00
2023-12-05 19:23:00 +00:00
atomic_signal_fence(memory_order_seq_cst);
start = read_time();
temp = *addr;
end = read_time();
atomic_signal_fence(memory_order_seq_cst);
if (verbose > 1) {
printf("Local thread(%d): read %p.\n", local_core, addr);
}
2023-05-01 19:18:34 +00:00
2023-12-05 19:23:00 +00:00
latencies[times - 1] = end - start;
2023-05-01 19:18:34 +00:00
times--;
}
return (void *)(uintptr_t)temp;
}
static void * remote_thread(void *)
{
int temp;
2023-12-05 19:23:00 +00:00
int * addr;
2023-05-01 19:18:34 +00:00
printf("Remote thread running...\n");
while(1) {
while(flush == 0) {
}
2023-12-05 19:23:00 +00:00
addr = (int *)((char *)remote_buffer + offset);
2023-05-01 19:18:34 +00:00
if(cache_mode) {
2023-12-05 19:23:00 +00:00
temp = *addr;
_mm_mfence();
2023-05-01 19:18:34 +00:00
} else {
2023-12-05 19:23:00 +00:00
_mm_clflushopt(addr);
_mm_mfence();
}
if (verbose > 1) {
printf("Remote thread(%d): %p %s.\n", remote_core, addr, cache_mode ? "read into cache" : "flushed");
2023-05-01 19:18:34 +00:00
}
flush = 0;
}
return (void *)(uintptr_t)temp;
}
int main(int argc, char * argv[])
{
{
int c;
// parse arguments
2023-12-05 19:23:00 +00:00
while ((c = getopt(argc, argv, "l:r:t:vR")) != -1) {
2023-05-01 19:18:34 +00:00
switch (c) {
case 'l':
local_core = atoi(optarg);
break;
case 'r':
remote_core = atoi(optarg);
break;
case 't':
times = atoi(optarg);
break;
2023-12-05 19:23:00 +00:00
case 'R':
random_access = 1;
break;
case 'v':
verbose++;
2023-05-01 19:18:34 +00:00
break;
default:
exit(1);
}
}
}
2023-12-05 19:23:00 +00:00
srand(time(NULL));
2023-05-01 19:18:34 +00:00
// init topo
if (topo_init(1)) {
fprintf(stderr, "libtopo init failed!\n");
exit(1);
}
// init
if (nms_init(1)) {
fprintf(stderr, "libnms init failed!\n");
exit(1);
}
2023-12-05 19:23:00 +00:00
size_t sz = sizeof(tsc_freq);
int rc;
if ((rc = sysctlbyname("machdep.tsc_freq", &tsc_freq, &sz, NULL, 0)) < 0) {
fprintf(stderr,"failed to query tsc frequency via sysctl (%d)\n", errno);
} else {
fprintf(stdout,"system tsc frequency = %lu\n", tsc_freq);
}
2023-05-01 19:18:34 +00:00
2023-12-05 19:23:00 +00:00
latencies = malloc(sizeof(uint64_t) * times);
const int remote_numa = topo_core_to_numa(remote_core);
const int local_numa = topo_core_to_numa(local_core);
const int total = times;
remote_buffer = nms_malloc(remote_numa, BUFFER_SIZE);
// fill with random values
for (int i = 0; i < BUFFER_SIZE; i++) {
remote_buffer[i] = rand();
}
2023-05-01 19:18:34 +00:00
pthread_attr_t lattr, rattr;
pthread_t lthread, rthread;
cpuset_t lcpuset, rcpuset;
CPU_ZERO(&lcpuset);
CPU_ZERO(&rcpuset);
CPU_SET(local_core, &lcpuset);
CPU_SET(remote_core, &rcpuset);
pthread_attr_init(&rattr);
pthread_attr_setaffinity_np(&rattr, sizeof(cpuset_t), &rcpuset);
pthread_attr_init(&lattr);
pthread_attr_setaffinity_np(&lattr, sizeof(cpuset_t), &lcpuset);
printf("local thread: %d numa: %d, remote: %d numa: %d\n", local_core, local_numa, remote_core, remote_numa);
pthread_create(&lthread, &lattr, local_thread, NULL);
pthread_create(&rthread, &rattr, remote_thread, NULL);
pthread_join(lthread, NULL);
2023-12-05 19:23:00 +00:00
uint64_t min = UINT64_MAX;
uint64_t max = 0;
2023-05-01 19:18:34 +00:00
uint64_t sum = 0;
for (int i = total - 1; i >= 0; i--) {
2023-12-05 19:23:00 +00:00
if (verbose) {
printf("%lu,\n", latencies[i]);
}
if (min > latencies[i]) {
min = latencies[i];
}
if (max < latencies[i]) {
max = latencies[i];
}
2023-05-01 19:18:34 +00:00
sum += latencies[i];
}
2023-12-05 19:23:00 +00:00
double var = 0.0;
double avg = (double)sum / (double)total;
for (int i = total - 1; i >= 0; i--) {
var += pow(latencies[i] - avg, 2);
}
var = sqrt(var / avg);
printf("Avg: %lu cycles (%lu ns)\n"
"Std: %lu cycles (%lu ns)\n"
"Min: %lu cycles (%lu ns)\n"
"Max: %lu cycles (%lu ns)\n",
(uint64_t)avg, cyc2ns((uint64_t)avg),
(uint64_t)var, cyc2ns((uint64_t)var),
min, cyc2ns(min),
max, cyc2ns(max));
free(latencies);
2023-05-01 19:18:34 +00:00
return 0;
}