143 lines
2.8 KiB
C
143 lines
2.8 KiB
C
|
#include <stdio.h>
|
||
|
#include <stdlib.h>
|
||
|
#include "nms.h"
|
||
|
#include <getopt.h>
|
||
|
#include <unistd.h>
|
||
|
#include <topo.h>
|
||
|
#include <immintrin.h>
|
||
|
#include <x86intrin.h>
|
||
|
|
||
|
#include <sys/cpuset.h>
|
||
|
#include <pthread.h>
|
||
|
#include <pthread_np.h>
|
||
|
|
||
|
#define BUFFER_SIZE (1 * 1024 * 1024)
|
||
|
|
||
|
static _Atomic int flush = 0;
|
||
|
static int * remote_buffer = NULL;
|
||
|
static uint64_t latencies[65536] = {0};
|
||
|
static int times = 10;
|
||
|
static int local_core = 0;
|
||
|
static int remote_core = 1;
|
||
|
static int cache_mode = 0;
|
||
|
|
||
|
static void * local_thread(void *)
|
||
|
{
|
||
|
int temp;
|
||
|
unsigned int dummy;
|
||
|
uint64_t start, end, base;
|
||
|
printf("Local thread running...\n");
|
||
|
while(times > 0) {
|
||
|
flush = 1;
|
||
|
while(flush != 0) {
|
||
|
}
|
||
|
|
||
|
_mm_clflush(remote_buffer);
|
||
|
|
||
|
start = __rdtscp(&dummy);
|
||
|
end = __rdtscp(&dummy);
|
||
|
base = end - start;
|
||
|
|
||
|
start = __rdtscp(&dummy);
|
||
|
temp = *remote_buffer;
|
||
|
end = __rdtscp(&dummy);
|
||
|
|
||
|
latencies[times - 1] = end - start - base;
|
||
|
times--;
|
||
|
}
|
||
|
|
||
|
return (void *)(uintptr_t)temp;
|
||
|
}
|
||
|
|
||
|
static void * remote_thread(void *)
|
||
|
{
|
||
|
int temp;
|
||
|
printf("Remote thread running...\n");
|
||
|
while(1) {
|
||
|
while(flush == 0) {
|
||
|
}
|
||
|
if(cache_mode) {
|
||
|
temp = *remote_buffer;
|
||
|
} else {
|
||
|
_mm_clflush(remote_buffer);
|
||
|
}
|
||
|
|
||
|
flush = 0;
|
||
|
}
|
||
|
return (void *)(uintptr_t)temp;
|
||
|
}
|
||
|
|
||
|
int main(int argc, char * argv[])
|
||
|
{
|
||
|
{
|
||
|
int c;
|
||
|
// parse arguments
|
||
|
while ((c = getopt(argc, argv, "l:r:t:m:")) != -1) {
|
||
|
switch (c) {
|
||
|
case 'l':
|
||
|
local_core = atoi(optarg);
|
||
|
break;
|
||
|
case 'r':
|
||
|
remote_core = atoi(optarg);
|
||
|
break;
|
||
|
case 't':
|
||
|
times = atoi(optarg);
|
||
|
break;
|
||
|
case 'm':
|
||
|
cache_mode = atoi(optarg);
|
||
|
break;
|
||
|
default:
|
||
|
exit(1);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// init topo
|
||
|
if (topo_init(1)) {
|
||
|
fprintf(stderr, "libtopo init failed!\n");
|
||
|
exit(1);
|
||
|
}
|
||
|
|
||
|
// init
|
||
|
if (nms_init(1)) {
|
||
|
fprintf(stderr, "libnms init failed!\n");
|
||
|
exit(1);
|
||
|
}
|
||
|
|
||
|
int remote_numa = topo_core_to_numa(remote_core);
|
||
|
int local_numa = topo_core_to_numa(local_core);
|
||
|
int total = times;
|
||
|
|
||
|
remote_buffer = nms_alloc_static(remote_numa, BUFFER_SIZE);
|
||
|
*remote_buffer = 0xffa5be6c;
|
||
|
|
||
|
pthread_attr_t lattr, rattr;
|
||
|
pthread_t lthread, rthread;
|
||
|
cpuset_t lcpuset, rcpuset;
|
||
|
CPU_ZERO(&lcpuset);
|
||
|
CPU_ZERO(&rcpuset);
|
||
|
|
||
|
CPU_SET(local_core, &lcpuset);
|
||
|
CPU_SET(remote_core, &rcpuset);
|
||
|
|
||
|
pthread_attr_init(&rattr);
|
||
|
pthread_attr_setaffinity_np(&rattr, sizeof(cpuset_t), &rcpuset);
|
||
|
pthread_attr_init(&lattr);
|
||
|
pthread_attr_setaffinity_np(&lattr, sizeof(cpuset_t), &lcpuset);
|
||
|
|
||
|
printf("local thread: %d numa: %d, remote: %d numa: %d\n", local_core, local_numa, remote_core, remote_numa);
|
||
|
pthread_create(<hread, &lattr, local_thread, NULL);
|
||
|
pthread_create(&rthread, &rattr, remote_thread, NULL);
|
||
|
|
||
|
pthread_join(lthread, NULL);
|
||
|
|
||
|
uint64_t sum = 0;
|
||
|
for (int i = total - 1; i >= 0; i--) {
|
||
|
printf("%lu\n", latencies[i]);
|
||
|
sum += latencies[i];
|
||
|
}
|
||
|
printf("Avg: %lu\n", sum / total);
|
||
|
return 0;
|
||
|
}
|
||
|
|