341 lines
11 KiB
Python
341 lines
11 KiB
Python
import time
|
|
import subprocess as sp
|
|
import os
|
|
|
|
import libpar as par
|
|
import libtc as tc
|
|
import libmechspec as mechspec
|
|
|
|
class NetExpResult:
|
|
def __init__(self):
|
|
self.parser = None
|
|
self.pmc_parser = None
|
|
self.sample = None
|
|
|
|
|
|
class NetExpConf:
|
|
def __init__(self):
|
|
self.root_dir = ""
|
|
|
|
self.enable_client_only = False
|
|
self.enable_memgen = False
|
|
|
|
self.memgen_affinity = ""
|
|
self.memgen_iteration = -1
|
|
self.memgen_size = 512 * 1024 * 1024
|
|
self.memgen_tgtdom = 1
|
|
|
|
self.srv_affinity = ""
|
|
self.srv_mechspec = None
|
|
self.srv_port = 0
|
|
|
|
self.clt_qps = 0
|
|
self.clt_mechspecs = []
|
|
self.clt_affinity = "1"
|
|
self.clt_wrkld = 0
|
|
self.clt_wrkarg0 = "fixed:0"
|
|
self.clt_wrkarg1 = "fixed:0"
|
|
self.clt_pkt_loss_lat = 1000
|
|
self.clt_rage_quit_lat = 1000
|
|
self.clt_port = 0
|
|
self.clt_pkt_pad = 0
|
|
self.clt_pkt_depth = 1
|
|
self.clt_ia = "exponential"
|
|
|
|
self.mst_mechspec = None
|
|
self.mst_affinity = "2"
|
|
self.mst_qps = 100
|
|
self.mst_port = 0
|
|
self.mst_pkt_loss_lat = 1000
|
|
self.mst_pkt_loss_max = 1000
|
|
self.mst_duration = 10
|
|
self.mst_warmup = 5
|
|
self.mst_ia = "exponential"
|
|
|
|
self.enable_pmc = False
|
|
self.pmc_counters = []
|
|
self.pmc_mode = 0 # 0 = sampling
|
|
self.pmc_sampling_rate = 8192
|
|
self.pmc_counting_interval = 0.1
|
|
|
|
def __build_fqdn_arr(self, ns):
|
|
ret = []
|
|
for n in ns:
|
|
if n != None:
|
|
ret.append(n.fqdn)
|
|
return ret
|
|
|
|
def get_pmc_str(self):
|
|
ret = ""
|
|
for counter in self.pmc_counters:
|
|
ret = ret + counter + ","
|
|
return ret[:-1]
|
|
|
|
def calc_client_qps(self):
|
|
return 0 if self.clt_qps == 0 else (int)((self.clt_qps - self.mst_qps) / len(self.clt_mechspecs))
|
|
|
|
def finalize_mechspecs(self):
|
|
self.clt_fqdns = self.__build_fqdn_arr(self.clt_mechspecs)
|
|
self.srv_fqdns = self.__build_fqdn_arr([self.srv_mechspec])
|
|
self.mst_fqdns = self.__build_fqdn_arr([self.mst_mechspec])
|
|
|
|
__SAMPLE_FN = "sample.txt.tmp"
|
|
__PMC_FN = "pmc.txt.tmp"
|
|
|
|
def __keep_result(conf : NetExpConf):
|
|
result = NetExpResult()
|
|
|
|
target_scp_fn = tc.get_odir() + "/" + __SAMPLE_FN
|
|
scpcmd = "scp -P77 " + tc.get_ssh_user() + "@" + conf.mst_mechspec.fqdn + ":" + conf.root_dir + "/" + __SAMPLE_FN + " " + target_scp_fn
|
|
tc.log_print(scpcmd)
|
|
sp.check_call(scpcmd, shell=True)
|
|
|
|
result.parser = par.khat_parser()
|
|
with open(target_scp_fn, "r") as f:
|
|
result.sample = f.read()
|
|
result.parser.parse(result.sample)
|
|
|
|
rmcmd = "rm " + target_scp_fn
|
|
tc.log_print(rmcmd)
|
|
sp.check_call(rmcmd, shell=True)
|
|
|
|
if conf.enable_pmc:
|
|
target_pmc_fn = tc.get_odir() + "/" + __PMC_FN
|
|
|
|
pmcscpcmd = "scp -P77 " + tc.get_ssh_user() + "@" + conf.srv_mechspec.fqdn + ":" + conf.root_dir + "/" + __PMC_FN + " " + target_pmc_fn
|
|
tc.log_print(pmcscpcmd)
|
|
sp.check_call(pmcscpcmd, shell=True)
|
|
|
|
if conf.pmc_mode == 0:
|
|
pmcproccmd = "sudo pmcstat -R " + conf.root_dir + "/" + __PMC_FN + " -m " + conf.root_dir + "/" + __PMC_FN + ".proc"
|
|
tc.log_print(pmcproccmd)
|
|
tc.remote_exec(conf.srv_fqdns, pmcproccmd)
|
|
|
|
pmcscpcmd = "scp -P77 " + tc.get_ssh_user() + "@" + conf.srv_mechspec.fqdn + ":" + conf.root_dir + "/" + __PMC_FN + ".proc" + " " + target_pmc_fn + ".proc"
|
|
tc.log_print(pmcscpcmd)
|
|
sp.check_call(pmcscpcmd, shell=True)
|
|
|
|
if conf.pmc_mode != 0:
|
|
with open(target_pmc_fn, "r") as f:
|
|
result.pmc_parser = par.pmc_parser(f.read())
|
|
else:
|
|
with open(target_pmc_fn, "rb") as f:
|
|
with open(target_pmc_fn + ".proc", "r") as g:
|
|
result.pmc_parser = [f.read(), g.read()]
|
|
|
|
rmcmd = "rm " + target_pmc_fn + ".proc"
|
|
tc.log_print(rmcmd)
|
|
sp.check_call(rmcmd, shell=True)
|
|
|
|
rmcmd = "rm " + target_pmc_fn
|
|
tc.log_print(rmcmd)
|
|
sp.check_call(rmcmd, shell=True)
|
|
|
|
return result
|
|
|
|
def stop_all(conf : NetExpConf):
|
|
# stop clients
|
|
tc.log_print("Stopping clients...")
|
|
tc.remote_exec(conf.clt_fqdns, "sudo killall -9 rat; sudo killall -9 cat; sudo killall -9 khat; sudo killall -9 memloadgen", check=False)
|
|
|
|
# stop master
|
|
tc.log_print("Stopping master...")
|
|
tc.remote_exec(conf.mst_fqdns, "sudo killall -9 rat; sudo killall -9 cat; sudo killall -9 khat; sudo killall -9 memloadgen", check=False)
|
|
|
|
if not conf.enable_client_only:
|
|
# stop server
|
|
tc.log_print("Stopping server...")
|
|
tc.remote_exec(conf.srv_fqdns, "sudo killall -9 rat; sudo killall -9 cat; sudo killall -9 khat; sudo killall -9 memloadgen", check=False)
|
|
|
|
if conf.enable_pmc:
|
|
tc.log_print("Stopping server PMC...")
|
|
tc.remote_exec(conf.srv_fqdns, "sudo killall -9 pmcstat", check=False)
|
|
|
|
|
|
def __run_setup_cmd(conf : NetExpConf, cmd : str, desc : str):
|
|
all = []
|
|
all.extend(conf.srv_fqdns)
|
|
all.extend(conf.clt_fqdns)
|
|
all.extend(conf.mst_fqdns)
|
|
|
|
ssrv : list[tuple[str, sp.Popen]] = []
|
|
for s in all:
|
|
tc.log_print(f"Running \'{desc}\' on {s}...")
|
|
ssrv.append((s, tc.remote_exec([s], cmd, blocking=False, check=False)[0]))
|
|
|
|
for p in ssrv:
|
|
_ , stderr = p[1].communicate()
|
|
if p[1].returncode != 0:
|
|
print(f"{ p[0] } \'{desc}\' failed. stderr:\n{stderr.decode()}\n")
|
|
else:
|
|
print(f"{ p[0] } \'{desc}\' succeeded")
|
|
|
|
def setup(conf : NetExpConf, bench : False, dpdk : False):
|
|
libtopo_path = "/libtopo"
|
|
dpdk_path = "/dpdk"
|
|
bench_path = "/numam.d"
|
|
if dpdk:
|
|
setup_cmd = f'''sudo rm -rf {libtopo_path}; sudo rm -rf /usr/local/include/libtopo;
|
|
sudo rm -rf /usr/local/lib/libtopo;
|
|
sudo mkdir -p {libtopo_path};
|
|
sudo chmod 777 {libtopo_path};
|
|
cd {libtopo_path};
|
|
git clone https://git.quacker.org/d/libtopo;
|
|
cd libtopo;
|
|
mkdir build;
|
|
cd build;
|
|
cmake ../;
|
|
sudo make install'''
|
|
__run_setup_cmd(conf, setup_cmd, "dpdk - libtopo")
|
|
setup_cmd = f'''sudo pkg install -y meson pkgconf py39-pyelftools;
|
|
sudo rm -rf {dpdk_path}
|
|
sudo mkdir -p {dpdk_path};
|
|
sudo chmod 777 {dpdk_path};
|
|
cd {dpdk_path};
|
|
git clone https://git.quacker.org/d/numam-dpdk;
|
|
cd numam-dpdk;
|
|
git checkout migration;
|
|
CC=gcc CXX=g++ meson -Denable_kmods=true build;
|
|
cd build;
|
|
sudo ninja install'''
|
|
__run_setup_cmd(conf, setup_cmd, "dpdk - dpdk")
|
|
if bench:
|
|
setup_cmd = f'''sudo rm -rf {bench_path};
|
|
sudo mkdir -p {bench_path};
|
|
sudo chmod 777 {bench_path}'''
|
|
__run_setup_cmd(conf, setup_cmd, "bench - remove")
|
|
all = []
|
|
all.extend(conf.srv_fqdns)
|
|
all.extend(conf.clt_fqdns)
|
|
all.extend(conf.mst_fqdns)
|
|
dir = f"{os.path.dirname(__file__)}/../"
|
|
for clt in all:
|
|
print("Syncing files to " + clt + "...")
|
|
rsync_cmd = f"rsync -az --no-perms --rsync-path=\"sudo rsync\" --omit-dir-times -e \"ssh -p77\" {dir} {tc.get_ssh_user()}@{clt}:{bench_path}/"
|
|
sp.check_call(rsync_cmd, shell=True)
|
|
setup_cmd = f'''cd {bench_path};
|
|
sudo rm -rf build;
|
|
mkdir build;
|
|
cd build;
|
|
cmake ../;
|
|
make -j8 khat cat rat memloadgen'''
|
|
__run_setup_cmd(conf, setup_cmd, "bench - compile")
|
|
|
|
def run(conf : NetExpConf):
|
|
stop_all(conf)
|
|
while True:
|
|
server_cmd = "sudo "
|
|
if conf.enable_pmc:
|
|
if conf.pmc_mode != 0:
|
|
pmc_cmd = "sudo pmcstat -C -w " + str(conf.pmc_counting_interval) + " -s " + conf.get_pmc_str() + " -o " + conf.root_dir + "/" + __PMC_FN
|
|
else:
|
|
pmc_cmd = "sudo pmcstat -n " + str(conf.pmc_sampling_rate) + " -S " + conf.get_pmc_str() + " -O " + conf.root_dir + "/" + __PMC_FN
|
|
tc.log_print("Starting server PMC...")
|
|
tc.log_print(pmc_cmd)
|
|
spmc = tc.remote_exec(conf.srv_fqdns, pmc_cmd, blocking=False)
|
|
|
|
server_cmd += conf.root_dir + "/khat --log-level lib.eal:err -- -A " + conf.srv_affinity + \
|
|
" -H " + conf.srv_mechspec.netspec + " -p " + str(conf.srv_port)
|
|
if int(conf.clt_pkt_pad) > 1518:
|
|
server_cmd += " -J "
|
|
if conf.enable_client_only:
|
|
ssrv = None
|
|
tc.log_print(server_cmd)
|
|
else:
|
|
# start server
|
|
tc.log_print("Starting server...")
|
|
tc.log_print(server_cmd)
|
|
ssrv = tc.remote_exec(conf.srv_fqdns, server_cmd, blocking=False)
|
|
|
|
if conf.enable_memgen:
|
|
memgen_cmd = "sudo " + conf.root_dir + "/memloadgen -b " + str(conf.memgen_size) + " -s " + conf.memgen_affinity + \
|
|
" -i " + str(conf.memgen_iteration) + " -d " + str(conf.memgen_tgtdom)
|
|
tc.log_print("Starting memloadgen...")
|
|
tc.log_print(memgen_cmd)
|
|
smem = tc.remote_exec(conf.srv_fqdns, memgen_cmd, blocking=False)
|
|
|
|
# start clients
|
|
tc.log_print("Starting clients...")
|
|
sclt = []
|
|
sclt_name = []
|
|
for i in range(len(conf.clt_fqdns)):
|
|
client_cmd = "sudo " + conf.root_dir + "/rat --log-level lib.eal:err -- -S -A " + conf.clt_affinity + \
|
|
" -i " + conf.clt_ia + \
|
|
" -q " + str(conf.calc_client_qps()) + \
|
|
" -H " + conf.clt_mechspecs[i].netspec + \
|
|
" -s " + conf.srv_mechspec.netspec + \
|
|
" -r " + str(conf.clt_rage_quit_lat) + \
|
|
" -l " + str(conf.clt_pkt_loss_lat) + \
|
|
" -w " + str(conf.clt_wrkld) + \
|
|
" -w " + str(conf.clt_wrkarg0) + \
|
|
" -w " + str(conf.clt_wrkarg1) + \
|
|
" -P " + str(conf.clt_pkt_pad) + \
|
|
" -D " + str(conf.clt_pkt_depth) + \
|
|
" -p " + str(conf.clt_port)
|
|
if int(conf.clt_pkt_pad) > 1518:
|
|
client_cmd += " -J "
|
|
tc.log_print(client_cmd)
|
|
sclt.append(tc.remote_exec([conf.clt_fqdns[i]], client_cmd, blocking=False)[0])
|
|
sclt_name.append(conf.clt_fqdns[i])
|
|
|
|
time.sleep(5)
|
|
# start master
|
|
tc.remote_exec
|
|
tc.log_print("Starting master...")
|
|
master_cmd = "sudo " + conf.root_dir + "/cat --log-level lib.eal:err -- " + \
|
|
" -s " + conf.srv_mechspec.netspec + \
|
|
" -o " + conf.root_dir + "/" + __SAMPLE_FN + \
|
|
" -t " + str(conf.mst_duration) + \
|
|
" -T " + str(conf.mst_warmup) + \
|
|
" -i " + conf.mst_ia + \
|
|
" -q " + str(conf.mst_qps) + \
|
|
" -l " + str(conf.mst_pkt_loss_lat) + \
|
|
" -L " + str(conf.mst_pkt_loss_max) + \
|
|
" -A " + conf.mst_affinity + \
|
|
" -H " + conf.mst_mechspec.netspec + \
|
|
" -p " + str(conf.mst_port)
|
|
for clt in conf.clt_mechspecs:
|
|
master_cmd += " -S " + clt.netspec
|
|
tc.log_print(master_cmd)
|
|
sp = tc.remote_exec(conf.mst_fqdns, master_cmd, blocking=False)
|
|
p = sp[0]
|
|
|
|
# launch stderr monitoring thread
|
|
exclude = ["Pseudo-terminal", "ice_", "i40e_"]
|
|
tc.errthr_create([p], conf.mst_fqdns, exclude)
|
|
if not conf.enable_client_only:
|
|
tc.errthr_create(ssrv, conf.srv_fqdns, exclude)
|
|
tc.errthr_create(sclt, sclt_name, exclude)
|
|
if conf.enable_memgen:
|
|
tc.errthr_create(smem, ["memloadgen"], exclude)
|
|
if conf.enable_pmc:
|
|
tc.errthr_create(spmc, ["pmcstat"], exclude)
|
|
tc.errthr_start()
|
|
success = False
|
|
cur = 0
|
|
# selec = select.poll()
|
|
# selec.register(p.stdout, select.POLLIN)
|
|
while True:
|
|
# either failed or timeout
|
|
# we use failure detection to save time for long durations
|
|
if tc.errthr_get_failed() or cur >= (conf.mst_warmup + conf.mst_duration) * 3:
|
|
break
|
|
|
|
# while selec.poll(1):
|
|
# print(p.stdout.readline())
|
|
|
|
if p.poll() != None:
|
|
success = True
|
|
break
|
|
|
|
time.sleep(1)
|
|
cur = cur + 1
|
|
|
|
stop_all(conf)
|
|
tc.errthr_stop()
|
|
tc.log_print("Cooling down...")
|
|
time.sleep(5)
|
|
|
|
if success:
|
|
return __keep_result(conf)
|