numam/util/mornafah.c

143 lines
2.8 KiB
C
Raw Normal View History

2023-05-01 19:18:34 +00:00
#include <stdio.h>
#include <stdlib.h>
#include "nms.h"
#include <getopt.h>
#include <unistd.h>
#include <topo.h>
#include <immintrin.h>
#include <x86intrin.h>
#include <sys/cpuset.h>
#include <pthread.h>
#include <pthread_np.h>
#define BUFFER_SIZE (1 * 1024 * 1024)
static _Atomic int flush = 0;
static int * remote_buffer = NULL;
static uint64_t latencies[65536] = {0};
static int times = 10;
static int local_core = 0;
static int remote_core = 1;
static int cache_mode = 0;
static void * local_thread(void *)
{
int temp;
unsigned int dummy;
uint64_t start, end, base;
printf("Local thread running...\n");
while(times > 0) {
flush = 1;
while(flush != 0) {
}
_mm_clflush(remote_buffer);
start = __rdtscp(&dummy);
end = __rdtscp(&dummy);
base = end - start;
start = __rdtscp(&dummy);
temp = *remote_buffer;
end = __rdtscp(&dummy);
latencies[times - 1] = end - start - base;
times--;
}
return (void *)(uintptr_t)temp;
}
static void * remote_thread(void *)
{
int temp;
printf("Remote thread running...\n");
while(1) {
while(flush == 0) {
}
if(cache_mode) {
temp = *remote_buffer;
} else {
_mm_clflush(remote_buffer);
}
flush = 0;
}
return (void *)(uintptr_t)temp;
}
int main(int argc, char * argv[])
{
{
int c;
// parse arguments
while ((c = getopt(argc, argv, "l:r:t:m:")) != -1) {
switch (c) {
case 'l':
local_core = atoi(optarg);
break;
case 'r':
remote_core = atoi(optarg);
break;
case 't':
times = atoi(optarg);
break;
case 'm':
cache_mode = atoi(optarg);
break;
default:
exit(1);
}
}
}
// init topo
if (topo_init(1)) {
fprintf(stderr, "libtopo init failed!\n");
exit(1);
}
// init
if (nms_init(1)) {
fprintf(stderr, "libnms init failed!\n");
exit(1);
}
int remote_numa = topo_core_to_numa(remote_core);
int local_numa = topo_core_to_numa(local_core);
int total = times;
remote_buffer = nms_alloc_static(remote_numa, BUFFER_SIZE);
*remote_buffer = 0xffa5be6c;
pthread_attr_t lattr, rattr;
pthread_t lthread, rthread;
cpuset_t lcpuset, rcpuset;
CPU_ZERO(&lcpuset);
CPU_ZERO(&rcpuset);
CPU_SET(local_core, &lcpuset);
CPU_SET(remote_core, &rcpuset);
pthread_attr_init(&rattr);
pthread_attr_setaffinity_np(&rattr, sizeof(cpuset_t), &rcpuset);
pthread_attr_init(&lattr);
pthread_attr_setaffinity_np(&lattr, sizeof(cpuset_t), &lcpuset);
printf("local thread: %d numa: %d, remote: %d numa: %d\n", local_core, local_numa, remote_core, remote_numa);
pthread_create(&lthread, &lattr, local_thread, NULL);
pthread_create(&rthread, &rattr, remote_thread, NULL);
pthread_join(lthread, NULL);
uint64_t sum = 0;
for (int i = total - 1; i >= 0; i--) {
printf("%lu\n", latencies[i]);
sum += latencies[i];
}
printf("Avg: %lu\n", sum / total);
return 0;
}