Compare commits

..

10 Commits
master ... dev

Author SHA1 Message Date
a1f69bb3f8
change run.py defaults 2021-02-02 00:46:18 -05:00
7b4fc6f3ab
+histogram 2021-02-02 00:43:45 -05:00
1ec01d6c37
fix redundant global 2021-02-01 23:49:30 -05:00
ff4946a699
run script and robust stderr detection 2021-02-01 23:48:14 -05:00
f2be62a9be
cat refactor + rat reborn + statskeeping 2021-01-31 02:50:58 -05:00
226449100d
NUMA detection & server multicore 2021-01-28 05:24:59 -05:00
82e1098f3b ptp working
Summary:
+arc

stuff

khat timestamp protocol working

Test Plan: by hand

Reviewers: ali

Differential Revision: https://review.rcs.uwaterloo.ca/D408
2021-01-27 03:58:12 -05:00
855b9cf714 stuff 2021-01-25 16:30:22 -05:00
73c70a5c52
+arc 2021-01-18 14:06:10 -05:00
Oscar Zhao
0500dc1c21 ptp working 2021-01-18 12:19:13 -05:00
62 changed files with 3117 additions and 8505 deletions

View File

@ -1,198 +0,0 @@
# $FreeBSD$
# Basic .clang-format
---
BasedOnStyle: WebKit
AlignAfterOpenBracket: DontAlign
AlignConsecutiveAssignments: false
AlignConsecutiveDeclarations: false
AlignEscapedNewlines: Left
AlignOperands: false
AlignTrailingComments: true
AllowAllArgumentsOnNextLine: false
AllowAllParametersOfDeclarationOnNextLine: false
AllowShortBlocksOnASingleLine: Never
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: InlineOnly
AllowShortIfStatementsOnASingleLine: Never
AllowShortLoopsOnASingleLine: false
AlwaysBreakAfterReturnType: TopLevelDefinitions
AlwaysBreakBeforeMultilineStrings: false
AlwaysBreakTemplateDeclarations: MultiLine
BinPackArguments: true
BinPackParameters: true
BreakBeforeBinaryOperators: None
BreakBeforeBraces: WebKit
BreakBeforeTernaryOperators: false
# TODO: BreakStringLiterals can cause very strange formatting so turn it off?
BreakStringLiterals: false
# Prefer:
# some_var = function(arg1,
# arg2)
# over:
# some_var =
# function(arg1, arg2)
PenaltyBreakAssignment: 100
# Prefer:
# some_long_function(arg1, arg2
# arg3)
# over:
# some_long_function(
# arg1, arg2, arg3)
PenaltyBreakBeforeFirstCallParameter: 100
CompactNamespaces: true
DerivePointerAlignment: false
DisableFormat: false
ForEachMacros:
- ARB_ARRFOREACH
- ARB_ARRFOREACH_REVWCOND
- ARB_ARRFOREACH_REVERSE
- ARB_FOREACH
- ARB_FOREACH_FROM
- ARB_FOREACH_SAFE
- ARB_FOREACH_REVERSE
- ARB_FOREACH_REVERSE_FROM
- ARB_FOREACH_REVERSE_SAFE
- BIT_FOREACH_ISCLR
- BIT_FOREACH_ISSET
- CPU_FOREACH
- CPU_FOREACH_ISCLR
- CPU_FOREACH_ISSET
- FOREACH_THREAD_IN_PROC
- FOREACH_PROC_IN_SYSTEM
- FOREACH_PRISON_CHILD
- FOREACH_PRISON_DESCENDANT
- FOREACH_PRISON_DESCENDANT_LOCKED
- FOREACH_PRISON_DESCENDANT_LOCKED_LEVEL
- MNT_VNODE_FOREACH_ALL
- MNT_VNODE_FOREACH_ACTIVE
- RB_FOREACH
- RB_FOREACH_FROM
- RB_FOREACH_SAFE
- RB_FOREACH_REVERSE
- RB_FOREACH_REVERSE_FROM
- RB_FOREACH_REVERSE_SAFE
- SLIST_FOREACH
- SLIST_FOREACH_FROM
- SLIST_FOREACH_FROM_SAFE
- SLIST_FOREACH_SAFE
- SLIST_FOREACH_PREVPTR
- SPLAY_FOREACH
- LIST_FOREACH
- LIST_FOREACH_FROM
- LIST_FOREACH_FROM_SAFE
- LIST_FOREACH_SAFE
- STAILQ_FOREACH
- STAILQ_FOREACH_FROM
- STAILQ_FOREACH_FROM_SAFE
- STAILQ_FOREACH_SAFE
- TAILQ_FOREACH
- TAILQ_FOREACH_FROM
- TAILQ_FOREACH_FROM_SAFE
- TAILQ_FOREACH_REVERSE
- TAILQ_FOREACH_REVERSE_FROM
- TAILQ_FOREACH_REVERSE_FROM_SAFE
- TAILQ_FOREACH_REVERSE_SAFE
- TAILQ_FOREACH_SAFE
- VM_MAP_ENTRY_FOREACH
- VM_PAGE_DUMP_FOREACH
IndentCaseLabels: false
IndentPPDirectives: None
Language: Cpp
NamespaceIndentation: None
PointerAlignment: Right
ContinuationIndentWidth: 4
IndentWidth: 8
TabWidth: 8
ColumnLimit: 80
UseTab: Always
SpaceAfterCStyleCast: false
IncludeBlocks: Regroup
IncludeCategories:
- Regex: '^\"opt_.*\.h\"'
Priority: 1
SortPriority: 10
- Regex: '^<sys/cdefs\.h>'
Priority: 2
SortPriority: 20
- Regex: '^<sys/types\.h>'
Priority: 2
SortPriority: 21
- Regex: '^<sys/param\.h>'
Priority: 2
SortPriority: 22
- Regex: '^<sys/systm\.h>'
Priority: 2
SortPriority: 23
- Regex: '^<sys.*/'
Priority: 2
SortPriority: 24
- Regex: '^<vm/vm\.h>'
Priority: 3
SortPriority: 30
- Regex: '^<vm/'
Priority: 3
SortPriority: 31
- Regex: '^<machine/'
Priority: 4
SortPriority: 40
- Regex: '^<(x86|amd64|i386|xen)/'
Priority: 5
SortPriority: 50
- Regex: '^<dev/'
Priority: 6
SortPriority: 60
- Regex: '^<net.*/'
Priority: 7
SortPriority: 70
- Regex: '^<protocols/'
Priority: 7
SortPriority: 71
- Regex: '^<(fs|nfs(|client|server)|ufs)/'
Priority: 8
SortPriority: 80
- Regex: '^<[^/].*\.h'
Priority: 9
SortPriority: 90
- Regex: '^\".*\.h\"'
Priority: 10
SortPriority: 100
# LLVM's header include ordering style is almost the exact opposite of ours.
# Unfortunately, they have hard-coded their preferences into clang-format.
# Clobbering this regular expression to avoid matching prevents non-system
# headers from being forcibly moved to the top of the include list.
# http://llvm.org/docs/CodingStandards.html#include-style
IncludeIsMainRegex: 'BLAH_DONT_MATCH_ANYTHING'
SortIncludes: true
KeepEmptyLinesAtTheStartOfBlocks: true
TypenameMacros:
- ARB_ELMTYPE
- ARB_HEAD
- ARB8_HEAD
- ARB16_HEAD
- ARB32_HEAD
- ARB_ENTRY
- ARB8_ENTRY
- ARB16_ENTRY
- ARB32_ENTRY
- LIST_CLASS_ENTRY
- LIST_CLASS_HEAD
- LIST_ENTRY
- LIST_HEAD
- QUEUE_TYPEOF
- RB_ENTRY
- RB_HEAD
- SLIST_CLASS_HEAD
- SLIST_CLASS_ENTRY
- SLIST_HEAD
- SLIST_ENTRY
- SMR_POINTER
- SPLAY_ENTRY
- SPLAY_HEAD
- STAILQ_CLASS_ENTRY
- STAILQ_CLASS_HEAD
- STAILQ_ENTRY
- STAILQ_HEAD
- TAILQ_CLASS_ENTRY
- TAILQ_CLASS_HEAD
- TAILQ_ENTRY
- TAILQ_HEAD

View File

@ -1 +0,0 @@
Checks: "-*,clang-diagnostic-*,clang-analyzer-*,modernize*,performance*,-modernize-use-trailing-return-type,-modernize-avoid-c-arrays"

5
.gitignore vendored
View File

@ -268,7 +268,4 @@ cython_debug/
# Executables
*.exe
*.out
*.app
*.clangd
compile_commands.json
*.app

0
.gitmodules vendored
View File

1
.pyenv Normal file
View File

@ -0,0 +1 @@
PYTHONPATH="./scripts/libs"

View File

@ -1,86 +1,58 @@
cmake_minimum_required(VERSION 3.0)
find_program(CC_GCC gcc)
find_program(CXX_GCC g++)
set(CMAKE_C_COMPILER ${CC_GCC})
set(CMAKE_CXX_COMPILER ${CXX_GCC})
project(khat)
list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}")
find_package(PkgConfig REQUIRED)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY lib)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY bin)
pkg_check_modules(DPDK libdpdk)
pkg_check_modules(SPDK spdk_event_bdev spdk_env_dpdk)
pkg_check_modules(SPDK_SYS spdk_syslibs)
pkg_check_modules(UUID uuid)
pkg_check_modules(TOPO bsdtopo)
find_package(dpdk REQUIRED)
find_package(Hwloc REQUIRED)
set(CC_FLAGS -O2 -g -Wall -Wextra -Werror -std=c++11
-Wno-deprecated-declarations
-Wno-packed-not-aligned
-Wno-address-of-packed-member
-Wno-zero-length-array
-Wno-gnu-zero-variadic-macro-arguments
-march=native)
set(C_FLAGS -O2 -g -Wall -Wextra -Werror -std=c2x
-Wno-deprecated-declarations
-Wno-address-of-packed-member
-Wno-zero-length-array
-Wno-gnu-zero-variadic-macro-arguments
-march=native)
-msse4
-mavx)
include_directories(${CMAKE_SOURCE_DIR}/inc)
include_directories()
include_directories(${dpdk_INCLUDE_DIRS})
include_directories(${Hwloc_INCLUDE_DIRS})
set(LIBNTR_C_FLAGS -O3 -g -Wall -Wextra -Werror -std=c2x)
set(LIBGEN_CC_FLAGS -O3 -g -Wall -Wextra -Werror -std=c++17)
set(LIBNM_CC_FLAGS -O2 -g -Wall -Wextra -Werror -std=c++11)
set(LIBNTR_C_FLAGS -O2 -g -Wall -Wextra -Werror -std=c11)
set(LIBGEN_CC_FLAGS -O2 -g -Wall -Wextra -Werror -std=c++11)
add_library(ntr SHARED libntr/ntr.c)
set(KHAT_LINKLIBS pthread nm ntr)
set(CAT_LINKLIBS pthread nm ntr gen)
set(RAT_LINKLIBS pthread nm ntr gen)
add_library(nm libnm/nm.cc)
target_link_libraries(nm ${Hwloc_LIBRARIES})
target_compile_options(nm PRIVATE ${LIBNM_CC_FLAGS})
add_library(ntr libntr/ntr.c)
target_compile_options(ntr PRIVATE ${LIBNTR_C_FLAGS})
add_library(gen SHARED libgen/generator.cc libgen/loadgen.cc)
target_link_libraries(gen PRIVATE pthread ntr ${TOPO_LINK_LIBRARIES} nms)
target_compile_options(gen PRIVATE ${LIBGEN_CC_FLAGS} ${TOPO_CFLAGS})
add_library(gen libgen/generator.cc)
target_link_libraries(gen ${Hwloc_LIBRARIES})
target_compile_options(gen PRIVATE ${LIBGEN_CC_FLAGS})
add_library(netsup SHARED net/libnetsup/dpdk.cc net/libnetsup/portconf.cc)
target_link_libraries(netsup PRIVATE ntr ${DPDK_LINK_LIBRARIES})
target_compile_options(netsup PRIVATE ${LIBGEN_CC_FLAGS} ${DPDK_CFLAGS})
add_executable(khat khat/khat.cc)
target_link_libraries(khat ${dpdk_LIBRARIES} ${KHAT_LINKLIBS})
target_compile_options(khat PRIVATE ${CC_FLAGS})
add_library(nms SHARED libnms/alloc.c)
target_link_libraries(nms PRIVATE ${TOPO_LINK_LIBRARIES})
target_compile_options(nms PRIVATE ${TOPO_CFLAGS})
add_executable(cat cat/cat.cc)
target_link_libraries(cat ${dpdk_LIBRARIES} ${CAT_LINKLIBS})
target_compile_options(cat PRIVATE ${CC_FLAGS})
add_executable(khat EXCLUDE_FROM_ALL net/khat.cc)
target_link_libraries(khat PRIVATE pthread ntr gen netsup nms ${DPDK_LINK_LIBRARIES} ${TOPO_LINK_LIBRARIES})
target_compile_options(khat PRIVATE ${CC_FLAGS} ${DPDK_CFLAGS} ${TOPO_CFLAGS})
add_executable(cat EXCLUDE_FROM_ALL net/cat.cc)
target_link_libraries(cat PRIVATE pthread ntr gen netsup nms ${DPDK_LINK_LIBRARIES} ${TOPO_LINK_LIBRARIES})
target_compile_options(cat PRIVATE ${CC_FLAGS} ${DPDK_CFLAGS} ${TOPO_CFLAGS})
add_executable(rat EXCLUDE_FROM_ALL net/rat.cc)
target_link_libraries(rat PRIVATE pthread ntr gen netsup nms ${DPDK_LINK_LIBRARIES} ${TOPO_LINK_LIBRARIES})
target_compile_options(rat PRIVATE ${CC_FLAGS} ${DPDK_CFLAGS} ${TOPO_CFLAGS})
add_executable(birb EXCLUDE_FROM_ALL storage/birb.cc storage/io_gen.cc storage/drivers/bdev.cc storage/drivers/bdev_thread.cc storage/drivers/nvme.cc storage/drivers/nvme_thread.cc)
target_include_directories(birb PRIVATE ${SPDK_INCLUDE_DIRS} ${DPDK_INCLUDE_DIRS} ${UUID_INCLUDE_DIRS})
target_compile_options(birb PRIVATE ${CC_FLAGS} ${SPDK_CFLAGS} ${UUID_CFLAGS})
target_link_directories(birb PRIVATE ${SPDK_LIBRARY_DIRS} ${SPDK_SYS_STATIC_LIBRARY_DIRS} ${UUID_LIBRARY_DIRS})
target_link_libraries(birb PRIVATE pthread ntr gen -Wl,--whole-archive ${SPDK_LIBRARIES} -Wl,--no-whole-archive ${SPDK_SYS_STATIC_LIBRARIES})
add_executable(birb_posix EXCLUDE_FROM_ALL storage/birb_posix.cc storage/io_gen.cc)
target_compile_options(birb_posix PRIVATE ${CC_FLAGS})
target_link_libraries(birb_posix PRIVATE pthread ntr gen)
add_executable(memloadgen util/memloadgen.cc)
target_link_libraries(memloadgen PRIVATE pthread gen ntr nms ${TOPO_LINK_LIBRARIES})
target_compile_options(memloadgen PRIVATE ${CC_FLAGS} ${TOPO_CFLAGS})
add_executable(mornafah util/mornafah.c)
target_link_libraries(mornafah PRIVATE pthread gen ntr nms ${TOPO_LINK_LIBRARIES})
target_compile_options(mornafah PRIVATE ${C_FLAGS} ${TOPO_CFLAGS})
add_executable(nms_test tests/nms_test.c)
set_target_properties(nms_test PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/tests)
target_link_libraries(nms_test PRIVATE nms)
target_compile_options(nms_test PRIVATE ${C_FLAGS})
add_executable(rat rat/rat.cc)
target_link_libraries(rat ${dpdk_LIBRARIES} ${RAT_LINKLIBS})
target_compile_options(rat PRIVATE ${CC_FLAGS})

213
FindHwloc.cmake Normal file
View File

@ -0,0 +1,213 @@
#.rst:
# FindHwloc
# ----------
#
# Try to find Portable Hardware Locality (hwloc) libraries.
# http://www.open-mpi.org/software/hwloc
#
# You may declare HWLOC_ROOT environment variable to tell where
# your hwloc library is installed.
#
# Once done this will define::
#
# Hwloc_FOUND - True if hwloc was found
# Hwloc_INCLUDE_DIRS - include directories for hwloc
# Hwloc_LIBRARIES - link against these libraries to use hwloc
# Hwloc_VERSION - version
# Hwloc_CFLAGS - include directories as compiler flags
# Hwloc_LDLFAGS - link paths and libs as compiler flags
#
#=============================================================================
# Copyright 2014 Mikael Lepistö
#
# Distributed under the OSI-approved BSD License (the "License");
#
# This software is distributed WITHOUT ANY WARRANTY; without even the
# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the License for more information.
#=============================================================================
if(WIN32)
find_path(Hwloc_INCLUDE_DIR
NAMES
hwloc.h
PATHS
ENV "PROGRAMFILES(X86)"
ENV HWLOC_ROOT
PATH_SUFFIXES
include
)
find_library(Hwloc_LIBRARY
NAMES
libhwloc.lib
PATHS
ENV "PROGRAMFILES(X86)"
ENV HWLOC_ROOT
PATH_SUFFIXES
lib
)
#
# Check if the found library can be used to linking
#
SET (_TEST_SOURCE "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/linktest.c")
FILE (WRITE "${_TEST_SOURCE}"
"
#include <hwloc.h>
int main()
{
hwloc_topology_t topology;
int nbcores;
hwloc_topology_init(&topology);
hwloc_topology_load(topology);
nbcores = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_CORE);
hwloc_topology_destroy(topology);
return 0;
}
"
)
TRY_COMPILE(_LINK_SUCCESS ${CMAKE_BINARY_DIR} "${_TEST_SOURCE}"
CMAKE_FLAGS
"-DINCLUDE_DIRECTORIES:STRING=${Hwloc_INCLUDE_DIR}"
CMAKE_FLAGS
"-DLINK_LIBRARIES:STRING=${Hwloc_LIBRARY}"
)
IF(NOT _LINK_SUCCESS)
if(CMAKE_SIZEOF_VOID_P EQUAL 8)
message(STATUS "You are building 64bit target.")
ELSE()
message(STATUS "You are building 32bit code. If you like to build x64 use e.g. -G 'Visual Studio 12 Win64' generator." )
ENDIF()
message(FATAL_ERROR "Library found, but linking test program failed.")
ENDIF()
#
# Resolve version if some compiled binary found...
#
find_program(HWLOC_INFO_EXECUTABLE
NAMES
hwloc-info
PATHS
ENV HWLOC_ROOT
PATH_SUFFIXES
bin
)
if(HWLOC_INFO_EXECUTABLE)
execute_process(
COMMAND ${HWLOC_INFO_EXECUTABLE} "--version"
OUTPUT_VARIABLE HWLOC_VERSION_LINE
OUTPUT_STRIP_TRAILING_WHITESPACE
)
string(REGEX MATCH "([0-9]+.[0-9]+)$"
Hwloc_VERSION "${HWLOC_VERSION_LINE}")
unset(HWLOC_VERSION_LINE)
endif()
#
# All good
#
set(Hwloc_LIBRARIES ${Hwloc_LIBRARY})
set(Hwloc_INCLUDE_DIRS ${Hwloc_INCLUDE_DIR})
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(
Hwloc
FOUND_VAR Hwloc_FOUND
REQUIRED_VARS Hwloc_LIBRARY Hwloc_INCLUDE_DIR Hwloc_VERSION_PARSED Hwloc_VERSION_MAJOR Hwloc_VERSION_MINOR
VERSION_VAR Hwloc_VERSION)
mark_as_advanced(
Hwloc_INCLUDE_DIR
Hwloc_LIBRARY)
foreach(arg ${Hwloc_INCLUDE_DIRS})
set(Hwloc_CFLAGS "${Hwloc_CFLAGS} /I${arg}")
endforeach()
set(Hwloc_LDFLAGS "${Hwloc_LIBRARY}")
else()
if(CMAKE_CROSSCOMPILING)
find_path(Hwloc_INCLUDE_DIRS
NAMES
hwloc.h
PATHS
ENV HWLOC_ROOT
)
find_library(Hwloc_LIBRARIES
NAMES
hwloc
PATHS
ENV HWLOC_ROOT
)
if(Hwloc_INCLUDE_DIRS AND Hwloc_LIBRARIES)
message(WARNING "HWLOC library found using find_library() - cannot determine version. Assuming 1.7.0")
set(Hwloc_FOUND 1)
set(Hwloc_VERSION "1.7.0")
endif()
else() # Find with pkgconfig for non-crosscompile builds
find_package(PkgConfig)
if(HWLOC_ROOT)
set(ENV{PKG_CONFIG_PATH} "${HWLOC_ROOT}/lib/pkgconfig")
else()
foreach(PREFIX ${CMAKE_PREFIX_PATH})
set(PKG_CONFIG_PATH "${PKG_CONFIG_PATH}:${PREFIX}/lib/pkgconfig")
endforeach()
set(ENV{PKG_CONFIG_PATH} "${PKG_CONFIG_PATH}:$ENV{PKG_CONFIG_PATH}")
endif()
if(hwloc_FIND_REQUIRED)
set(_hwloc_OPTS "REQUIRED")
elseif(hwloc_FIND_QUIETLY)
set(_hwloc_OPTS "QUIET")
else()
set(_hwloc_output 1)
endif()
if(hwloc_FIND_VERSION)
if(hwloc_FIND_VERSION_EXACT)
pkg_check_modules(Hwloc ${_hwloc_OPTS} hwloc=${hwloc_FIND_VERSION})
else()
pkg_check_modules(Hwloc ${_hwloc_OPTS} hwloc>=${hwloc_FIND_VERSION})
endif()
else()
pkg_check_modules(Hwloc ${_hwloc_OPTS} hwloc)
endif()
if(Hwloc_FOUND)
string(REPLACE "." ";" Hwloc_VERSION_PARSED "${Hwloc_VERSION}")
set(Hwloc_VERSION "${Hwloc_VERSION}" CACHE STRING "version of Hwloc as a list")
list(GET Hwloc_VERSION_PARSED 0 Hwloc_VERSION_MAJOR)
set(Hwloc_VERSION_MAJOR "${Hwloc_VERSION_MAJOR}" CACHE STRING "Major version of Hwloc")
list(GET Hwloc_VERSION_PARSED 1 Hwloc_VERSION_MINOR)
set(Hwloc_VERSION_MINOR "${Hwloc_VERSION_MINOR}" CACHE STRING "Minor version of Hwloc")
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(Hwloc DEFAULT_MSG Hwloc_LIBRARIES)
if(NOT ${Hwloc_VERSION} VERSION_LESS 1.7.0)
set(Hwloc_GL_FOUND 1)
endif()
if(_hwloc_output)
message(STATUS
"Found hwloc ${Hwloc_VERSION} in ${Hwloc_INCLUDE_DIRS}:${Hwloc_LIBRARIES}")
endif()
endif()
endif() # cross-compile else
endif()

142
Finddpdk.cmake Normal file
View File

@ -0,0 +1,142 @@
# Try to find dpdk
#
# Once done, this will define
#
# dpdk::dpdk
# dpdk_FOUND
# dpdk_INCLUDE_DIR
# dpdk_LIBRARIES
find_package(PkgConfig QUIET)
if(PKG_CONFIG_FOUND)
pkg_check_modules(dpdk QUIET libdpdk)
endif()
if(dpdk_INCLUDE_DIRS)
# good
elseif(TARGET dpdk::dpdk)
get_target_property(dpdk_INCLUDE_DIRS
dpdk::dpdk INTERFACE_INCLUDE_DIRECTORIES)
else()
find_path(dpdk_config_INCLUDE_DIR rte_config.h
HINTS
ENV DPDK_DIR
PATH_SUFFIXES
dpdk
include)
find_path(dpdk_common_INCLUDE_DIR rte_common.h
HINTS
ENC DPDK_DIR
PATH_SUFFIXES
dpdk
include)
set(dpdk_INCLUDE_DIRS "${dpdk_config_INCLUDE_DIR}")
if(NOT dpdk_config_INCLUDE_DIR EQUAL dpdk_common_INCLUDE_DIR)
list(APPEND dpdk_INCLUDE_DIRS "${dpdk_common_INCLUDE_DIR}")
endif()
endif()
set(components
bus_pci
bus_vdev
cfgfile
cmdline
eal
ethdev
hash
kvargs
mbuf
mempool
mempool_ring
mempool_stack
net
pci
pmd_af_packet
pmd_bnxt
pmd_bond
pmd_cxgbe
pmd_e1000
pmd_ena
pmd_enic
pmd_i40e
pmd_ixgbe
pmd_mlx5
pmd_nfp
pmd_qede
pmd_ring
pmd_sfc_efx
pmd_vmxnet3_uio
ring
timer)
# for collecting dpdk library targets, it will be used when defining dpdk::dpdk
set(_dpdk_libs)
# for list of dpdk library archive paths
set(dpdk_LIBRARIES)
foreach(c ${components})
set(dpdk_lib dpdk::${c})
if(TARGET ${dpdk_lib})
get_target_property(DPDK_rte_${c}_LIBRARY
${dpdk_lib} IMPORTED_LOCATION)
else()
find_library(DPDK_rte_${c}_LIBRARY rte_${c}
HINTS
ENV DPDK_DIR
${dpdk_LIBRARY_DIRS}
PATH_SUFFIXES lib)
endif()
if(DPDK_rte_${c}_LIBRARY)
if (NOT TARGET ${dpdk_lib})
add_library(${dpdk_lib} UNKNOWN IMPORTED)
set_target_properties(${dpdk_lib} PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${dpdk_INCLUDE_DIRS}"
IMPORTED_LOCATION "${DPDK_rte_${c}_LIBRARY}")
if(c STREQUAL pmd_mlx5)
find_package(verbs QUIET)
if(verbs_FOUND)
target_link_libraries(${dpdk_lib} INTERFACE IBVerbs::verbs)
endif()
endif()
endif()
list(APPEND _dpdk_libs ${dpdk_lib})
list(APPEND dpdk_LIBRARIES ${DPDK_rte_${c}_LIBRARY})
endif()
endforeach()
mark_as_advanced(dpdk_INCLUDE_DIRS ${dpdk_LIBRARIES})
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(dpdk DEFAULT_MSG
dpdk_INCLUDE_DIRS
dpdk_LIBRARIES)
if(dpdk_FOUND)
if(NOT TARGET dpdk::cflags)
if(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64|x86_64|AMD64")
set(rte_cflags "-march=core2")
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm|ARM")
set(rte_cflags "-march=armv7-a")
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64")
set(rte_cflags "-march=armv8-a+crc")
endif()
add_library(dpdk::cflags INTERFACE IMPORTED)
if (rte_cflags)
set_target_properties(dpdk::cflags PROPERTIES
INTERFACE_COMPILE_OPTIONS "${rte_cflags}")
endif()
endif()
if(NOT TARGET dpdk::dpdk)
add_library(dpdk::dpdk INTERFACE IMPORTED)
find_package(Threads QUIET)
list(APPEND _dpdk_libs
Threads::Threads
dpdk::cflags)
set_target_properties(dpdk::dpdk PROPERTIES
INTERFACE_LINK_LIBRARIES "${_dpdk_libs}"
INTERFACE_INCLUDE_DIRECTORIES "${dpdk_INCLUDE_DIRS}")
endif()
endif()
unset(_dpdk_libs)

621
cat/cat.cc Normal file
View File

@ -0,0 +1,621 @@
#include <cstdio>
#include <ctime>
#include <netinet/in.h>
#include <rte_config.h>
#include <rte_common.h>
#include <rte_eal.h>
#include <rte_ethdev.h>
#include <rte_cycles.h>
#include <rte_lcore.h>
#include <rte_mbuf.h>
#include <rte_ether.h>
#include <rte_launch.h>
#include <rte_log.h>
#include <rte_byteorder.h>
#include <rte_ip.h>
#include <atomic>
#include <vector>
#include <fstream>
#include <unistd.h>
#include "nm.h"
#include "gen.h"
#include "ntr.h"
#include "pkt.h"
#include "util.h"
constexpr static unsigned int MBUF_MAX_COUNT = 16384;
constexpr static unsigned int MBUF_CACHE_SIZE = 512;
constexpr static unsigned int RX_RING_SIZE = 4096;
constexpr static unsigned int TX_RING_SIZE = 4096;
constexpr static unsigned int BURST_SIZE = 32;
static const struct rte_eth_conf port_conf_default{};
struct datapt {
uint32_t epoch;
uint32_t valid;
uint64_t clt_hw_tx;
uint64_t clt_sw_tx;
uint64_t clt_hw_rx;
uint64_t clt_sw_rx;
uint64_t srv_hw_tx;
uint64_t srv_sw_tx;
uint64_t srv_hw_rx;
uint64_t srv_sw_rx;
};
struct options_t {
// parameters
unsigned int run_time{5};
unsigned int warmup_time{3};
char output[256] = "output.txt";
char ia_gen_str[256] = "fixed:0.01";
struct rte_ether_addr server_mac;
uint64_t cpu_mask{0x2}; // 2nd core
std::vector<struct rte_ether_addr *> slaves;
unsigned long rage_quit_time = (unsigned long)-1;
unsigned long last_sent_ts = 0;
// states
struct rte_mempool * mbuf_pool;
struct rte_ether_addr s_host_mac;
uint16_t s_portid;
unsigned int s_rxqid;
unsigned int s_txqid;
unsigned int s_total_pkts{0};
Generator * s_iagen{nullptr};
std::vector<struct datapt *> s_data;
struct datapt * s_last_datapt{nullptr};
uint32_t s_epoch;
std::atomic<bool> s_stop {false};
std::atomic<uint32_t> s_record {0};
};
static struct options_t options;
static uint16_t
rx_add_timestamp(uint16_t port __rte_unused, uint16_t qidx __rte_unused,
struct rte_mbuf **pkts, uint16_t nb_pkts, uint16_t max_pkts __rte_unused, void *_ __rte_unused)
{
uint64_t now = rte_rdtsc();
struct pkt_hdr * pkt_data;
struct timespec ts;
int ret;
for (int i = 0; i < nb_pkts; i++) {
pkt_data = check_valid_packet(pkts[i]);
if (pkt_data == NULL) {
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "rx_add_timestamp: ignoring invalid packet 0x%p.\n", (void*)pkts[i]);
continue;
}
if (rte_be_to_cpu_16(pkt_data->type) == PKT_TYPE_PROBE_RESP) {
uint32_t epoch = rte_be_to_cpu_32(((struct pkt_payload_epoch *)pkt_data->payload)->epoch);
if (options.s_last_datapt != nullptr && options.s_last_datapt->epoch == epoch) {
if ((ret = rte_eth_timesync_read_rx_timestamp(port, &ts, pkts[i]->timesync & 0x3)) == 0) {
// has hw rx timestamp
options.s_last_datapt->clt_hw_rx = ts.tv_sec * S2NS + ts.tv_nsec;
options.s_last_datapt->clt_sw_rx = now;
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "rx_add_timestamp: tagged packet %p with sw: %llu hw: %llu.\n", (void*)pkts[i], now, options.s_last_datapt->clt_hw_rx);
} else {
rte_exit(EXIT_FAILURE, "rx_add_timestamp: packet %p not tagged - hw ts not available - %d.\n", (void*)pkts[i], ret);
}
} else {
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING, "rx_add_timestamp: packet %p epoch %d != last epoch %d.\n", (void*)pkts[i], epoch, options.s_last_datapt->epoch);
}
} else {
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "rx_add_timestamp: packet %p not tagged - type %d.\n", (void*)pkts[i], rte_be_to_cpu_16(pkt_data->type));
}
}
return nb_pkts;
}
static uint16_t
tx_add_timestamp(uint16_t port __rte_unused, uint16_t qidx __rte_unused,
struct rte_mbuf **pkts, uint16_t nb_pkts, void *_ __rte_unused)
{
uint64_t now = rte_rdtsc();
struct pkt_hdr * pkt_data;
for (int i = 0; i < nb_pkts; i++) {
pkt_data = check_valid_packet(pkts[i]);
if (pkt_data == NULL) {
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "tx_add_timestamp: ignoring invalid packet 0x%p.\n", (void*)pkts[i]);
continue;
}
if (rte_be_to_cpu_16(pkt_data->type) == PKT_TYPE_PROBE) {
uint32_t epoch = rte_be_to_cpu_32(((struct pkt_payload_epoch *)pkt_data->payload)->epoch);
if (options.s_last_datapt == nullptr || epoch != options.s_last_datapt->epoch) {
rte_exit(EXIT_FAILURE, "tx_add_timestamp: packet epoch %d != last epoch %d\n", epoch, options.s_last_datapt->epoch);
}
options.s_last_datapt->clt_sw_tx = now;
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "tx_add_timestamp: tagged packet %p with sw: %llu.\n", (void*)pkts[i], now);
} else {
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "tx_add_timestamp: packet %p not tagged - type %d.\n", (void*)pkts[i], pkt_data->type);
}
}
return nb_pkts;
}
static int
locore_main(void * tif __rte_unused)
{
struct rte_mbuf *tx_buf;
struct rte_mbuf *rx_bufs[BURST_SIZE];
struct pkt_hdr *pkt_data;
uint32_t core_id = rte_lcore_id();
int32_t ret;
bool read_tx = true;
bool recv_stat = true;
bool recv_resp = true;
uint64_t next_ts;
// XXX: check link status instead
sleep(1);
if (rte_eth_dev_socket_id(options.s_portid) > 0 && rte_eth_dev_socket_id(options.s_portid) != (int)rte_socket_id()) {
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING, "locore_main: WARNING, port %d is on remote NUMA node to "
"polling thread.\n\tPerformance will "
"not be optimal.\n", options.s_portid);
}
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "locore_main: core %d running...\n", core_id);
next_ts = get_time_us();
while(!options.s_stop.load()) {
uint64_t now = get_time_us();
// always pop incoming packets
const uint16_t nb_rx = rte_eth_rx_burst(options.s_portid, 0, rx_bufs, BURST_SIZE);
if (nb_rx > 0) {
for (int i = 0; i < nb_rx; i++) {
struct pkt_hdr * each = check_valid_packet(rx_bufs[i]);
if (each == NULL) {
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "locore_main: ignoring invalid packet %p.\n", (void*)rx_bufs[i]);
rte_pktmbuf_free(rx_bufs[i]);
continue;
}
uint16_t type = rte_be_to_cpu_16(each->type);
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "locore_main: received packet %p type %d.\n", (void*)rx_bufs[i], type);
switch (type) {
struct pkt_payload_epoch * pld_epoch;
struct pkt_payload_stat * pld_stat;
uint32_t epoch;
case PKT_TYPE_PROBE_RESP:
pld_epoch = (struct pkt_payload_epoch *)each->payload;
epoch = rte_be_to_cpu_32(pld_epoch->epoch);
if (options.s_last_datapt == nullptr || epoch != options.s_last_datapt->epoch) {
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING, "locore_main: packet %p epoch %d doesn't match datapt %d.\n", (void*)rx_bufs[i], epoch, options.s_last_datapt->epoch);
break;
}
options.s_total_pkts++;
recv_resp = true;
break;
case PKT_TYPE_STAT:
pld_stat = (struct pkt_payload_stat *)each->payload;
epoch = rte_be_to_cpu_32(pld_stat->epoch);
if (options.s_last_datapt == nullptr || epoch != options.s_last_datapt->epoch) {
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING, "locore_main: packet %p epoch %d doesn't match datapt %d.\n", (void*)rx_bufs[i], epoch, options.s_last_datapt->epoch);
break;
}
options.s_last_datapt->srv_hw_tx = rte_be_to_cpu_64(pld_stat->hw_tx);
options.s_last_datapt->srv_hw_rx = rte_be_to_cpu_64(pld_stat->hw_rx);
options.s_last_datapt->srv_sw_tx = rte_be_to_cpu_64(pld_stat->sw_tx);
options.s_last_datapt->srv_sw_rx = rte_be_to_cpu_64(pld_stat->sw_rx);
recv_stat = true;
break;
default:
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING, "locore_main: ignoring packet %p with unknown type %d.\n", (void*)rx_bufs[i], type);
rte_pktmbuf_free(rx_bufs[i]);
continue;
}
rte_pktmbuf_free(rx_bufs[i]);
}
}
if (read_tx && recv_stat & recv_resp) {
// if we have all the data
if (options.s_last_datapt != nullptr) {
// push the data to the queue if we haven't done so already
options.s_data.push_back(options.s_last_datapt);
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "locore_main: datapt for epoch %d dump:\n" \
" Valid: %d\n"
" client TX HW: %llu\n" \
" client TX SW: %llu\n" \
" client RX HW: %llu\n" \
" client RX SW: %llu\n" \
" server TX HW: %llu\n" \
" server TX SW: %llu\n" \
" server RX HW: %llu\n" \
" server RX SW: %llu\n\n",
options.s_last_datapt->epoch,
options.s_last_datapt->valid,
options.s_last_datapt->clt_hw_tx,
options.s_last_datapt->clt_sw_tx,
options.s_last_datapt->clt_hw_rx,
options.s_last_datapt->clt_sw_rx,
options.s_last_datapt->srv_hw_tx,
options.s_last_datapt->srv_sw_tx,
options.s_last_datapt->srv_hw_rx,
options.s_last_datapt->srv_sw_rx);
options.s_last_datapt = nullptr;
}
if (now >= next_ts) {
struct pkt_payload_epoch * pld_epoch;
uint32_t epoch;
next_ts += (int)(options.s_iagen->generate() * 1000000.0);
// generate the packet
tx_buf = rte_pktmbuf_alloc(options.mbuf_pool);
if (tx_buf == NULL) {
rte_exit(EXIT_FAILURE, "cannot allocate tx_buf\n");
}
pkt_data = construct_pkt_hdr(tx_buf, PKT_TYPE_PROBE,
&options.s_host_mac, &options.server_mac);
if (pkt_data == NULL) {
rte_exit(EXIT_FAILURE, "cannot allocate space for packet_data in mbuf\n");
}
epoch = options.s_epoch;
options.s_epoch++;
pld_epoch = (struct pkt_payload_epoch *)pkt_data->payload;
pld_epoch->epoch = rte_cpu_to_be_32(epoch);
options.s_last_datapt = new struct datapt;
options.s_last_datapt->epoch = epoch;
options.s_last_datapt->valid = options.s_record.load();
read_tx = false;
recv_resp = false;
recv_stat = false;
options.last_sent_ts = get_time_us();
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "locore_main: sending packet %p with epoch %d\n", (void*)tx_buf, epoch);
const uint16_t nb_tx = rte_eth_tx_burst(options.s_portid, options.s_txqid, &tx_buf, 1);
if (nb_tx != 1) {
rte_exit(EXIT_FAILURE, "failed to send packet 0x%p, epoch %d\n", (void*)tx_buf, epoch);
}
}
}
if (!recv_stat) {
// if we haven't recevied the stats get ready to rage quit
if(get_time_us() - options.last_sent_ts > options.rage_quit_time * 1000) {
rte_exit(EXIT_FAILURE, "waiting too long for resp. I QUIT!!\n");
}
}
if (!read_tx) {
struct timespec ts;
if ((ret = rte_eth_timesync_read_tx_timestamp(options.s_portid, &ts)) == 0) {
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "locore_main: read hw tx timestamp %lld.\n", ts.tv_nsec + ts.tv_sec * S2NS);
options.s_last_datapt->clt_hw_tx = ts.tv_nsec + ts.tv_sec * S2NS;
read_tx = true;
}
}
}
rte_pktmbuf_free(tx_buf);
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "locore_main: core %d successfully stopped.\n", core_id);
return 0;
}
static int
port_init(uint16_t portid, struct rte_mempool *mbuf_pool)
{
struct rte_eth_dev_info dev_info;
struct rte_eth_conf port_conf = port_conf_default;
struct rte_eth_txconf txconf;
struct rte_eth_rxconf rxconf;
uint16_t nb_rxd = RX_RING_SIZE;
uint16_t nb_txd = TX_RING_SIZE;
port_conf.rxmode.max_rx_pkt_len = RTE_ETHER_MAX_LEN;
if(!rte_eth_dev_is_valid_port(portid)) {
return -1;
}
int ret = rte_eth_dev_info_get(portid, &dev_info);
if (ret != 0) {
return ret;
}
port_conf.rxmode.max_rx_pkt_len = RTE_ETHER_MAX_LEN;
port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_UDP_CKSUM;
port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_IPV4_CKSUM;
port_conf.txmode.offloads |= DEV_TX_OFFLOAD_UDP_CKSUM;
port_conf.txmode.offloads |= DEV_TX_OFFLOAD_IPV4_CKSUM;
port_conf.txmode.offloads |= DEV_TX_OFFLOAD_MBUF_FAST_FREE;
/* Configure the Ethernet device. */
ret = rte_eth_dev_configure(portid, 1, 1, &port_conf);
if (ret != 0)
return ret;
ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd, &nb_txd);
if (ret != 0)
return ret;
/* Allocate and set up 1 RX queue per thread . */
rxconf = dev_info.default_rxconf;
rxconf.offloads = port_conf.rxmode.offloads;
for (uint32_t i = 0; i < 1; i++) {
ret = rte_eth_rx_queue_setup(portid, i, nb_rxd, rte_eth_dev_socket_id(portid), &rxconf, mbuf_pool);
if (ret < 0)
return ret;
}
txconf = dev_info.default_txconf;
txconf.offloads = port_conf.txmode.offloads;
/* Allocate and set up 1 TX queue per Ethernet port. */
for (uint32_t i = 0; i < 1; i++) {
ret = rte_eth_tx_queue_setup(portid, i, nb_txd, rte_eth_dev_socket_id(portid), &txconf);
if (ret < 0)
return ret;
}
ret = rte_eth_dev_start(portid);
if (ret < 0)
return ret;
/* Display the port MAC address. */
struct rte_ether_addr addr;
ret = rte_eth_macaddr_get(portid, &addr);
if (ret != 0)
return ret;
ret = rte_eth_timesync_enable(portid);
if (ret != 0)
return ret;
/* Enable RX in promiscuous mode for the Ethernet device. */
ret = rte_eth_promiscuous_enable(portid);
if (ret != 0)
return ret;
rte_eth_add_tx_callback(portid, 0, tx_add_timestamp, NULL);
rte_eth_add_rx_callback(portid, 0, rx_add_timestamp, NULL);
return 0;
}
static void dump_options()
{
fprintf(stdout, "Configuration:\n" \
" run time = %d\n" \
" warmup time = %d\n" \
" output file = %s\n" \
" rage quit time = %ld\n"\
" host MAC = %x:%x:%x:%x:%x:%x\n",
options.run_time,
options.warmup_time,
options.output,
options.rage_quit_time,
options.server_mac.addr_bytes[0],
options.server_mac.addr_bytes[1],
options.server_mac.addr_bytes[2],
options.server_mac.addr_bytes[3],
options.server_mac.addr_bytes[4],
options.server_mac.addr_bytes[5]);
}
static void usage()
{
fprintf(stdout,
"Usage:\n " \
" -v(vv): verbose mode\n" \
" -s: server's mac\n" \
" -S: slave(rat)'s mac\n" \
" -t: run time\n" \
" -T: warmup time\n" \
" -h: display the information\n" \
" -o: output filename\n" \
" -A: affinity mask\n" \
" -i: inter-arrival time distribution\n" \
" -r: rage quit time (in ms)\n");
fflush(stdout);
}
int main(int argc, char* argv[])
{
unsigned int nb_ports;
struct rte_mempool *mbuf_pool;
std::ofstream log_file;
ntr_init();
if (nm_init() != 0)
rte_exit(EXIT_FAILURE, "failed to init libnm\n");
// create default generator
options.s_iagen = createGenerator(options.ia_gen_str);
// init dpdk
int ret = rte_eal_init(argc, argv);
if (ret < 0) {
rte_exit(EXIT_FAILURE, "rte_eal_init failed!\n");
}
argc -= ret;
argv += ret;
// set warning level
ntr_set_level(NTR_DEP_USER1, NTR_LEVEL_WARNING);
{
int c;
// parse arguments
while((c = getopt(argc, argv, "vs:S:t:T:ho:A:i:r:")) != -1) {
switch (c) {
struct rte_ether_addr * addr;
case 'v':
ntr_set_level(NTR_DEP_USER1, ntr_get_level(NTR_DEP_USER1) + 1);
break;
case 's':
if (rte_ether_unformat_addr(optarg, &options.server_mac) == -1) {
rte_exit(EXIT_FAILURE, "cannot parse %s as mac address.\n", optarg);
}
break;
case 'S':
addr = new struct rte_ether_addr;
if (rte_ether_unformat_addr(optarg, addr) == -1) {
rte_exit(EXIT_FAILURE, "cannot parse %s as mac address.\n", optarg);
}
options.slaves.push_back(addr);
break;
case 't':
options.run_time = atoi(optarg);
break;
case 'T':
options.warmup_time = atoi(optarg);
break;
case 'h':
usage();
rte_exit(EXIT_SUCCESS, "\n");
case 'o':
strncpy(options.output, optarg, sizeof(options.output) - 1);
break;
case 'A':
options.cpu_mask = strtoull(optarg, nullptr, 16);
break;
case 'i':
strncpy(options.ia_gen_str, optarg, sizeof(options.ia_gen_str) - 1);
if (options.s_iagen != nullptr) {
delete options.s_iagen;
}
options.s_iagen = createGenerator(options.ia_gen_str);
if (options.s_iagen == nullptr) {
rte_exit(EXIT_FAILURE, "invalid generator string %s\n", options.ia_gen_str);
}
break;
case 'r':
options.rage_quit_time = atoi(optarg);
break;
default:
usage();
rte_exit(EXIT_FAILURE, "unknown argument: %c\n", c);
break;
}
}
}
// open log file for writing
log_file.open(options.output, std::ofstream::out);
if (!log_file) {
rte_exit(EXIT_FAILURE, "failed to open log file %s\n", options.output);
}
nb_ports = rte_eth_dev_count_avail();
if (nb_ports == 0) {
rte_exit(EXIT_FAILURE, "number of ports must be > 0\n");
}
uint16_t portid = rte_eth_find_next(0);
if (portid == RTE_MAX_ETHPORTS) {
rte_exit(EXIT_FAILURE, "cannot find an available port\n");
}
options.s_portid = portid;
// create a mbuf memory pool on the socket
mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", MBUF_MAX_COUNT, MBUF_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_eth_dev_socket_id(options.s_portid));
if (mbuf_pool == nullptr) {
rte_exit(EXIT_FAILURE, "cannot create mbuf pool\n");
}
options.mbuf_pool = mbuf_pool;
if (port_init(portid, mbuf_pool) != 0) {
rte_exit(EXIT_FAILURE, "cannot init port %d\n", portid);
}
if (rte_eth_macaddr_get(portid, &options.s_host_mac) != 0) {
rte_exit(EXIT_FAILURE, "cannot get mac address of port %d\n", portid);
}
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "Configured port %d with mac addr %x:%x:%x:%x:%x:%x\n", portid,
options.s_host_mac.addr_bytes[0],
options.s_host_mac.addr_bytes[1],
options.s_host_mac.addr_bytes[2],
options.s_host_mac.addr_bytes[3],
options.s_host_mac.addr_bytes[4],
options.s_host_mac.addr_bytes[5]);
dump_options();
sleep(1);
uint64_t cmask = options.cpu_mask;
const int16_t core_id = cmask_get_next_cpu(&cmask);
if (core_id == NEXT_CPU_NULL) {
rte_exit(EXIT_FAILURE, "invalid cpu mask 0x%lx\n", cmask);
}
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: launching thread on core %d\n", core_id);
if (rte_eal_remote_launch(locore_main, nullptr, core_id) != 0) {
rte_exit(EXIT_FAILURE, "failed to launch function on locore\n");
}
// XXX: poor man's timer
uint32_t second = 0;
while(true) {
if (second >= options.warmup_time) {
options.s_record.store(1);
}
if (second >= options.run_time + options.warmup_time) {
options.s_stop.store(true);
break;
}
usleep(S2US);
second++;
}
if (rte_eal_wait_lcore(core_id) < 0)
rte_exit(EXIT_FAILURE, "failed to wait for job completion\n");
uint32_t qps = 0;
// dump stats
for (auto it : options.s_data) {
if (it->valid) {
qps++;
log_file << it->clt_sw_rx << ',' << it->clt_sw_tx << ','
<< it->clt_hw_rx << ',' << it->clt_hw_tx << ','
<< it->srv_sw_rx << ',' << it->srv_sw_tx << ','
<< it->srv_hw_rx << ',' << it->srv_hw_tx << std::endl;
}
}
log_file.close();
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "Processed %d packets in %d seconds, QPS: %d\n", qps, options.run_time, qps);
// clean up
rte_eth_dev_stop(portid);
rte_eth_dev_close(portid);
return 0;
}

13
compile_flags.txt Normal file
View File

@ -0,0 +1,13 @@
-xc++
-O2
-std=c++11
-Wall
-Wextra
-Werror
-I/usr/include/dpdk
-Iinc
-Wno-deprecated-declarations
-Wno-packed-not-aligned
-Wno-address-of-packed-member
-Wno-zero-length-array
-Wno-gnu-zero-variadic-macro-arguments

View File

@ -1,61 +0,0 @@
#pragma once
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <ctime>
#include <cstdio>
#include <sys/types.h>
#include <sys/cpuset.h>
#define DISALLOW_EVIL_CONSTRUCTORS(TypeName) \
TypeName(const TypeName &) = delete; \
void operator=(const TypeName &) = delete
#define UNUSED __attribute__((unused))
constexpr static unsigned long S2NS = 1000000000UL;
constexpr static unsigned long S2US = 1000000UL;
constexpr static unsigned long MS2NS = 1000000UL;
constexpr static int NEXT_CPU_NULL = -1;
#if defined(__x86_64__)
static inline int
cmask_get_next_cpu(uint64_t *mask)
{
int ffs = ffsll(*mask);
*mask &= ~(1ul << (ffs - 1));
return ffs - 1;
}
static inline int
cmask_get_num_cpus(const uint64_t mask)
{
return __builtin_popcount(mask);
}
#endif
static inline uint64_t
get_uptime()
{
struct timespec tp;
clock_gettime(CLOCK_MONOTONIC, &tp);
return (tp.tv_sec * S2NS + tp.tv_nsec);
}
static inline void
cpulist_to_cpuset(char * cpulist, cpuset_t * cpuset)
{
char * cpu = strtok(cpulist, ",");
CPU_ZERO(cpuset);
while (cpu != nullptr) {
CPU_SET(atoi(cpu), cpuset);
cpu = strtok(nullptr, ",");
}
}
#define ATTR_UNUSED __attribute__((unused))

234
inc/gen.h Normal file
View File

@ -0,0 +1,234 @@
// modified from mutilate
// -*- c++ -*-
// 1. implement "fixed" generator
// 2. implement discrete generator
// 3. implement combine generator?
#pragma once
#include <netinet/in.h>
#include <string>
#include <vector>
#include <utility>
#include <assert.h>
#include <inttypes.h>
#include <limits.h>
#include <math.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <sys/param.h>
#include "util.h"
#define D(fmt, ...)
#define DIE(fmt, ...) (void)0;
#define FNV_64_PRIME (0x100000001b3ULL)
#define FNV1_64_INIT (0xcbf29ce484222325ULL)
static inline uint64_t fnv_64_buf(const void* buf, size_t len) {
uint64_t hval = FNV1_64_INIT;
unsigned char *bp = (unsigned char *)buf; /* start of buffer */
unsigned char *be = bp + len; /* beyond end of buffer */
while (bp < be) {
hval ^= (uint64_t)*bp++;
hval *= FNV_64_PRIME;
}
return hval;
}
static inline uint64_t fnv_64(uint64_t in) { return fnv_64_buf(&in, sizeof(in)); }
// Generator syntax:
//
// \d+ == fixed
// n[ormal]:mean,sd
// e[xponential]:lambda
// p[areto]:scale,shape
// g[ev]:loc,scale,shape
// fb_value, fb_key, fb_rate
class Generator {
public:
Generator() {}
// Generator(const Generator &g) = delete;
// virtual Generator& operator=(const Generator &g) = delete;
virtual ~Generator() {}
virtual double generate(double U = -1.0) = 0;
virtual void set_lambda(double) {DIE("set_lambda() not implemented");}
protected:
std::string type;
};
class Fixed : public Generator {
public:
Fixed(double _value = 1.0) : value(_value) { D("Fixed(%f)", value); }
virtual double generate(double) { return value; }
virtual void set_lambda(double lambda) {
if (lambda > 0.0) value = 1.0 / lambda;
else value = 0.0;
}
private:
double value;
};
class Uniform : public Generator {
public:
Uniform(double _scale) : scale(_scale) { D("Uniform(%f)", scale); }
virtual double generate(double U = -1.0) {
if (U < 0.0) U = drand48();
return scale * U;
}
virtual void set_lambda(double lambda) {
if (lambda > 0.0) scale = 2.0 / lambda;
else scale = 0.0;
}
private:
double scale;
};
class Normal : public Generator {
public:
Normal(double _mean = 1.0, double _sd = 1.0) : mean(_mean), sd(_sd) {
D("Normal(mean=%f, sd=%f)", mean, sd);
}
virtual double generate(double U = -1.0) {
if (U < 0.0) U = drand48();
double V = U; // drand48();
double N = sqrt(-2 * log(U)) * cos(2 * M_PI * V);
return mean + sd * N;
}
virtual void set_lambda(double lambda) {
if (lambda > 0.0) mean = 1.0 / lambda;
else mean = 0.0;
}
private:
double mean, sd;
};
class Exponential : public Generator {
public:
Exponential(double _lambda = 1.0) : lambda(_lambda) {
D("Exponential(lambda=%f)", lambda);
}
virtual double generate(double U = -1.0) {
if (lambda <= 0.0) return 0.0;
if (U < 0.0) U = drand48();
return -log(U) / lambda;
}
virtual void set_lambda(double lambda) { this->lambda = lambda; }
private:
double lambda;
};
class GPareto : public Generator {
public:
GPareto(double _loc = 0.0, double _scale = 1.0, double _shape = 1.0) :
loc(_loc), scale(_scale), shape(_shape) {
assert(shape != 0.0);
D("GPareto(loc=%f, scale=%f, shape=%f)", loc, scale, shape);
}
virtual double generate(double U = -1.0) {
if (U < 0.0) U = drand48();
return loc + scale * (pow(U, -shape) - 1) / shape;
}
virtual void set_lambda(double lambda) {
if (lambda <= 0.0) scale = 0.0;
else scale = (1 - shape) / lambda - (1 - shape) * loc;
}
private:
double loc /* mu */;
double scale /* sigma */, shape /* k */;
};
class GEV : public Generator {
public:
GEV(double _loc = 0.0, double _scale = 1.0, double _shape = 1.0) :
e(1.0), loc(_loc), scale(_scale), shape(_shape) {
assert(shape != 0.0);
D("GEV(loc=%f, scale=%f, shape=%f)", loc, scale, shape);
}
virtual double generate(double U = -1.0) {
return loc + scale * (pow(e.generate(U), -shape) - 1) / shape;
}
private:
Exponential e;
double loc /* mu */, scale /* sigma */, shape /* k */;
};
class Discrete : public Generator {
public:
~Discrete() { delete def; }
Discrete(Generator* _def = NULL) : def(_def) {
if (def == NULL) def = new Fixed(0.0);
}
virtual double generate(double U = -1.0) {
double Uc = U;
if (pv.size() > 0 && U < 0.0) U = drand48();
double sum = 0;
for (auto p: pv) {
sum += p.first;
if (U < sum) return p.second;
}
return def->generate(Uc);
}
void add(double p, double v) {
pv.push_back(std::pair<double,double>(p, v));
}
private:
Generator *def;
std::vector< std::pair<double,double> > pv;
};
class KeyGenerator {
public:
KeyGenerator(Generator* _g, double _max = 10000) : g(_g), max(_max) {}
std::string generate(uint64_t ind) {
uint64_t h = fnv_64(ind);
double U = (double) h / (double)ULLONG_MAX;
double G = g->generate(U);
int keylen = MAX(round(G), floor(log10(max)) + 1);
char key[256];
snprintf(key, 256, "%0*" PRIu64, keylen, ind);
// D("%d = %s", ind, key);
return std::string(key);
}
private:
Generator* g;
double max;
};
Generator* createGenerator(std::string str);
Generator* createFacebookKey();
Generator* createFacebookValue();
Generator* createFacebookIA();

View File

@ -1,346 +0,0 @@
// modified from mutilate
// -*- c++ -*-
// 1. implement "fixed" generator
// 2. implement discrete generator
// 3. implement combine generator?
#pragma once
#include <assert.h>
#include <inttypes.h>
#include <limits.h>
#include <math.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <string>
#include <utility>
#include <vector>
#include <sys/_pthreadtypes.h>
#include <sys/param.h>
#include "defs.hh"
#define D(fmt, ...)
#define DIE(fmt, ...) (void)0;
#define FNV_64_PRIME (0x100000001b3ULL)
#define FNV1_64_INIT (0xcbf29ce484222325ULL)
static inline uint64_t
fnv_64_buf(const void *buf, size_t len)
{
uint64_t hval = FNV1_64_INIT;
unsigned char *bp = (unsigned char *)buf; /* start of buffer */
unsigned char *be = bp + len; /* beyond end of buffer */
while (bp < be) {
hval ^= (uint64_t)*bp++;
hval *= FNV_64_PRIME;
}
return hval;
}
static inline uint64_t
fnv_64(uint64_t in)
{
return fnv_64_buf(&in, sizeof(in));
}
// Generator syntax:
//
// \d+ == fixed
// n[ormal]:mean,sd
// e[xponential]:lambda
// p[areto]:scale,shape
// g[ev]:loc,scale,shape
// fb_value, fb_key, fb_rate
class Generator {
public:
Generator() { }
// Generator(const Generator &g) = delete;
// virtual Generator& operator=(const Generator &g) = delete;
virtual ~Generator() { }
virtual double generate(double U = -1.0) = 0;
virtual void set_lambda(double) { DIE("set_lambda() not implemented"); }
protected:
std::string type;
};
class Fixed : public Generator {
public:
Fixed(double _value = 1.0)
: value(_value)
{
D("Fixed(%f)", value);
}
virtual double generate(double) { return value; }
virtual void set_lambda(double lambda)
{
if (lambda > 0.0)
value = 1.0 / lambda;
else
value = 0.0;
}
private:
double value;
};
class Uniform : public Generator {
public:
Uniform(double _scale)
: scale(_scale)
{
D("Uniform(%f)", scale);
}
virtual double generate(double U = -1.0)
{
if (U < 0.0)
U = drand48();
return scale * U;
}
virtual void set_lambda(double lambda)
{
if (lambda > 0.0)
scale = 2.0 / lambda;
else
scale = 0.0;
}
private:
double scale;
};
class Normal : public Generator {
public:
Normal(double _mean = 1.0, double _sd = 1.0)
: mean(_mean)
, sd(_sd)
{
D("Normal(mean=%f, sd=%f)", mean, sd);
}
virtual double generate(double U = -1.0)
{
if (U < 0.0)
U = drand48();
double V = U; // drand48();
double N = sqrt(-2 * log(U)) * cos(2 * M_PI * V);
return mean + sd * N;
}
virtual void set_lambda(double lambda)
{
if (lambda > 0.0)
mean = 1.0 / lambda;
else
mean = 0.0;
}
private:
double mean, sd;
};
class Exponential : public Generator {
public:
Exponential(double _lambda = 1.0)
: lambda(_lambda)
{
D("Exponential(lambda=%f)", lambda);
}
virtual double generate(double U = -1.0)
{
if (lambda <= 0.0)
return 0.0;
if (U < 0.0)
U = drand48();
return -log(U) / lambda;
}
virtual void set_lambda(double lambda) { this->lambda = lambda; }
private:
double lambda;
};
class GPareto : public Generator {
public:
GPareto(double _loc = 0.0, double _scale = 1.0, double _shape = 1.0)
: loc(_loc)
, scale(_scale)
, shape(_shape)
{
assert(shape != 0.0);
D("GPareto(loc=%f, scale=%f, shape=%f)", loc, scale, shape);
}
virtual double generate(double U = -1.0)
{
if (U < 0.0)
U = drand48();
return loc + scale * (pow(U, -shape) - 1) / shape;
}
virtual void set_lambda(double lambda)
{
if (lambda <= 0.0)
scale = 0.0;
else
scale = (1 - shape) / lambda - (1 - shape) * loc;
}
private:
double loc /* mu */;
double scale /* sigma */, shape /* k */;
};
class GEV : public Generator {
public:
GEV(double _loc = 0.0, double _scale = 1.0, double _shape = 1.0)
: e(1.0)
, loc(_loc)
, scale(_scale)
, shape(_shape)
{
assert(shape != 0.0);
D("GEV(loc=%f, scale=%f, shape=%f)", loc, scale, shape);
}
virtual double generate(double U = -1.0)
{
return loc + scale * (pow(e.generate(U), -shape) - 1) / shape;
}
private:
Exponential e;
double loc /* mu */, scale /* sigma */, shape /* k */;
};
class Discrete : public Generator {
public:
~Discrete() { delete def; }
Discrete(Generator *_def = NULL)
: def(_def)
{
if (def == NULL)
def = new Fixed(0.0);
}
virtual double generate(double U = -1.0)
{
double Uc = U;
if (pv.size() > 0 && U < 0.0)
U = drand48();
double sum = 0;
for (auto p : pv) {
sum += p.first;
if (U < sum)
return p.second;
}
return def->generate(Uc);
}
void add(double p, double v)
{
pv.push_back(std::pair<double, double>(p, v));
}
private:
Generator *def;
std::vector<std::pair<double, double>> pv;
};
class KeyGenerator {
public:
KeyGenerator(Generator *_g, double _max = 10000)
: g(_g)
, max(_max)
{
}
std::string generate(uint64_t ind)
{
uint64_t h = fnv_64(ind);
double U = (double)h / (double)ULLONG_MAX;
double G = g->generate(U);
int keylen = MAX(round(G), floor(log10(max)) + 1);
char key[256];
snprintf(key, 256, "%0*" PRIu64, keylen, ind);
// D("%d = %s", ind, key);
return std::string(key);
}
private:
Generator *g;
double max;
};
Generator *createGenerator(std::string str);
Generator *createFacebookKey();
Generator *createFacebookValue();
Generator *createFacebookIA();
// memload generator
class memload_generator {
public:
struct memload_generator_options {
size_t transaction_size {4096};
size_t buffer_size {64*1024*1024};
char ia_dist[64]{"fixed"};
int verbose {0};
uint64_t trans_per_second;
bool shared_buffer {true};
};
private:
DISALLOW_EVIL_CONSTRUCTORS(memload_generator);
struct thread_info {
pthread_t pthr;
void *from_buffer;
void *to_buffer;
std::atomic<bool> reset_ts;
int tid;
int pull;
int coreid;
int target_dom;
struct memload_generator_options * opts;
Generator * ia_gen;
// stat keeping
std::atomic<uint32_t> num_trans;
std::atomic<int> * state;
std::atomic<int> init_status;
};
std::vector<struct thread_info *> thr_infos;
std::atomic<int> state;
static constexpr int STATE_RUN = 0;
static constexpr int STATE_RDY = 1;
static constexpr int STATE_END = 2;
static constexpr int STATE_INIT = 3;
static void *worker_thrd(void *_tinfo);
struct memload_generator_options opts;
public:
memload_generator(cpuset_t * threads, cpuset_t * modes, cpuset_t * target_domain, struct memload_generator_options * opt, bool *success);
uint64_t get_transactions();
bool start();
bool stop();
bool set_transactions(uint64_t tps);
~memload_generator();
};

View File

@ -1,133 +0,0 @@
#pragma once
#include <cstdint>
#include "rte_ethdev.h"
#include "rte_ether.h"
#define MAX_NUMA_NODES (64)
struct device_conf {
int portid;
uint16_t tx_ring_sz;
uint16_t rx_ring_sz;
cpuset_t core_affinity;
int mtu;
uint64_t rx_offloads;
uint64_t tx_offloads;
uint64_t rss_hf;
rte_tx_callback_fn tx_fn;
void * tx_user;
rte_rx_callback_fn rx_fn;
void * rx_user;
bool timesync;
};
struct mem_conf {
int num_elements;
int cache_size;
int data_room_size;
int priv_size;
unsigned int max_pools;
};
constexpr static uint16_t MIN_RANDOM_PORT = 1000;
constexpr static uint16_t DEFAULT_RAT_PORT = 1234;
constexpr static unsigned int INIT_DELAY = 3;
constexpr static unsigned int MAX_NODES = 64;
void
dpdk_init(struct device_conf *dconf, struct mem_conf *mconf);
void
dpdk_cleanup(struct device_conf *dconf);
struct rte_mempool *
mempool_get(int nodeid);
struct port_conf {
const char * driver_name;
uint64_t rxoffload;
uint64_t txoffload;
uint64_t rss_hf;
bool timesync;
};
int
portconf_get(int portid, struct port_conf * out);
// constexpr static int LATENCY_MEASURE_TIMES = 10000;
// static inline void
// sync_port_clock(uint16_t portid)
//{
// int64_t lat = 0;
// int64_t get_time_lat;
// int64_t write_time_lat;
// struct timespec dum;
// struct timespec start;
// struct timespec end;
//
// // measure clock_gettime latency
// for(int i = 0; i < LATENCY_MEASURE_TIMES; i++) {
// // end - start ~= 2x clock_gettime's latency
// clock_gettime(CLOCK_REALTIME, &start);
// clock_gettime(CLOCK_REALTIME, &dum);
// clock_gettime(CLOCK_REALTIME, &end);
//
// if (end.tv_sec != start.tv_sec) {
// rte_exit(EXIT_FAILURE, "clock_gettime too slow\n");
// }
//
// // shouldn't overflow
// lat += (end.tv_nsec - start.tv_nsec) / 2;
// }
// get_time_lat = lat / LATENCY_MEASURE_TIMES;
//
// // measure rte_eth_timesync_write_time latency
// lat = 0;
// for(int i = 0; i < LATENCY_MEASURE_TIMES; i++) {
// // end - start ~= rte_eth_timesync latency + clock_gettime's latency
// clock_gettime(CLOCK_REALTIME, &dum);
// clock_gettime(CLOCK_REALTIME, &start);
// if (rte_eth_timesync_write_time(portid, &dum) != 0) {
// rte_exit(EXIT_FAILURE, "failed to write time\n");
// }
// clock_gettime(CLOCK_REALTIME, &end);
//
// if (end.tv_sec != start.tv_sec) {
// rte_exit(EXIT_FAILURE, "clock_gettime too slow!\n");
// }
//
// // shouldn't overflow
// int64_t elat = (end.tv_nsec - start.tv_nsec) - get_time_lat;
// if (elat < 0) {
// rte_exit(EXIT_FAILURE, "something is wrong with lat \n");
// }
// lat += elat;
// }
// write_time_lat = lat / LATENCY_MEASURE_TIMES;
//
// int64_t delta = (get_time_lat + write_time_lat) / 2;
// int64_t s2ns = (int64_t)S2NS;
// // sync the clock
// while (true) {
// clock_gettime(CLOCK_REALTIME, &dum);
// dum.tv_nsec += delta;
// if (dum.tv_nsec > s2ns) {
// // try again if overflow
// continue;
// }
// if (rte_eth_timesync_write_time(portid, &dum) != 0) {
// rte_exit(EXIT_FAILURE, "failed to write time\n");
// }
// break;
// }
// rte_eth_timesync_enable(portid);
//
// printf("Sync-ed time: get lat %ld write lat %ld\n", get_time_lat,
// write_time_lat);
//}

View File

@ -1,490 +0,0 @@
#pragma once
#include <sys/endian.h>
#include <rte_byteorder.h>
#include <rte_ether.h>
#include <rte_flow.h>
#include <rte_ip.h>
#include <rte_mbuf.h>
#include <rte_mbuf_core.h>
#include <rte_net.h>
#include <rte_udp.h>
#include <unistd.h>
#include "defs.hh"
#include <random>
#define IP_DEFTTL 64 /* from RFC 1340. */
#define IP_VERSION 0x40
#define IP_HDRLEN 0x05 /* default IP header length == five 32-bits words. */
#define IP_VHL_DEF (IP_VERSION | IP_HDRLEN)
#define IP_ADDR_FMT_SIZE 15
constexpr static uint32_t MAX_JUMBO_MTU = 9000;
constexpr static uint32_t MAX_STANDARD_MTU = 1500;
static inline int
mtu_to_pkt_size(int mtu)
{
return mtu + RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN;
}
static inline void
tx_burst_all(int portid, int txqid, struct rte_mbuf ** tx_bufs, int sz)
{
int remaining = sz;
while(remaining > 0) {
remaining -= rte_eth_tx_burst(
portid, txqid, &tx_bufs[sz - remaining],
remaining);
}
}
constexpr static uint32_t ETHER_FRAME_MAGIC = 0xDCDCE5E5;
const static struct rte_ether_addr POU_MAC {
0x01, 0x00, 0x5e, 0x00, 0x01, 0x81
};
const static uint32_t POU_IP = RTE_IPV4(224, 0, 1, 129);
const static uint16_t POU_PORT = 320;
/* Khat Protocol:
* khat only processes two kinds of packets - LOAD and PROBE
* rat:
* rat -> LOAD -> khat
* khat -> LOAD_RESP -> rat
* cat:
* cat -> PROBE -> khat (cat tx timestamps)
* khat -> PROBE_RESP -> cat (cat rx timestamps and khat tx/rx
* timestamps) khat -> STAT -> cat (khat sends its tx/rx timestamps)
*/
/* Rat Protocol:
* cat & rat:
* 1. both launch with full parameters
* rat with slave flag
* cat with master flag
* 2. rats create threads and wait for cat's signal
* 3. cat creates threads
* 4. cat -> rats SYNC
* 5. rats -> cat SYNC_ACK and start running
* 6. cat start running after received all SYNC_ACKs
* 7. cat stops running, cat -> rats FIN
* 8. rats stops running, rats -> cat FIN_ACK with QPS
* 9. cat exits after receiving all FIN_ACKs and flushing statsGG
*/
struct ptp_hdr {
uint8_t ptp_msg_type;
uint8_t ptp_ver;
uint8_t unused[34];
} __attribute__((packed));
struct pkt_hdr {
struct rte_ether_hdr eth_hdr;
struct rte_ipv4_hdr ipv4_hdr;
struct rte_udp_hdr udp_hdr;
struct ptp_hdr ptp_hdr;
uint16_t type;
uint32_t magic;
char payload[0];
} __attribute__((packed));
struct net_spec {
uint32_t ip;
rte_ether_addr mac_addr;
};
static inline void
pkt_hdr_to_netspec(struct pkt_hdr *pkt, struct net_spec *src,
uint16_t *src_port, struct net_spec *dst, uint16_t *dst_port)
{
if (src != nullptr) {
rte_ether_addr_copy(&pkt->eth_hdr.src_addr, &src->mac_addr);
src->ip = rte_be_to_cpu_32(pkt->ipv4_hdr.src_addr);
}
if (src_port != nullptr) {
*src_port = rte_be_to_cpu_16(pkt->udp_hdr.src_port);
}
if (dst != nullptr) {
rte_ether_addr_copy(&pkt->eth_hdr.dst_addr, &dst->mac_addr);
dst->ip = rte_be_to_cpu_32(pkt->ipv4_hdr.dst_addr);
}
if (dst_port != nullptr) {
*dst_port = rte_be_to_cpu_16(pkt->udp_hdr.dst_port);
}
};
struct conn_spec {
struct net_spec *src;
uint16_t src_port;
struct net_spec *dst;
uint16_t dst_port;
};
// returns 0 on success
static inline int
str_to_netspec(char *str, struct net_spec *out)
{
const char *tok = "@";
char *token;
char *ptr;
uint32_t a, b, c, d;
token = strtok_r(str, tok, &ptr);
if (token == nullptr ||
sscanf(token, "%d.%d.%d.%d", &a, &b, &c, &d) != 4) {
return -1;
}
out->ip = RTE_IPV4(a, b, c, d);
// mac next
token = strtok_r(nullptr, tok, &ptr);
if (token == nullptr ||
rte_ether_unformat_addr(token, &out->mac_addr) != 0) {
return -1;
}
return 0;
}
constexpr static uint16_t PKT_TYPE_LOAD = 0;
constexpr static uint32_t LOAD_TYPE_CPU = 0; // arg0 = cpu time in us. arg1 = unused
constexpr static uint32_t LOAD_TYPE_MEM = 1; // arg0 = which thread to access. arg1 = how many cachelines to access
constexpr static uint32_t LOAD_TYPE_MAX = LOAD_TYPE_MEM + 1;
struct pkt_payload_load {
uint32_t epoch;
uint32_t type; // type of load
uint32_t arg0;
uint32_t arg1;
};
constexpr static uint16_t PKT_TYPE_PROBE = 1;
constexpr static uint16_t PKT_TYPE_LOAD_RESP = 2;
constexpr static uint16_t PKT_TYPE_PROBE_RESP = 3;
struct pkt_payload_epoch {
uint32_t epoch;
};
constexpr static uint16_t PKT_TYPE_STAT = 4;
struct pkt_payload_stat {
uint32_t epoch;
uint64_t hw_rx;
uint64_t hw_tx;
uint64_t sw_rx;
uint64_t sw_tx;
};
constexpr static uint16_t PKT_TYPE_SYNC = 5;
constexpr static uint16_t PKT_TYPE_SYNC_ACK = 6;
constexpr static uint16_t PKT_TYPE_FIN = 7;
constexpr static uint16_t PKT_TYPE_FIN_ACK = 8;
struct pkt_payload_qps {
uint32_t qps;
uint32_t recved_pkts;
uint32_t lost_pkts;
};
constexpr static uint16_t NUM_PKT_TYPES = PKT_TYPE_FIN_ACK + 1;
// for fast packet verification
static const uint32_t expected_payload_size[NUM_PKT_TYPES] {
sizeof(struct pkt_payload_load), // LOAD
sizeof(struct pkt_payload_epoch), // PROBE
sizeof(struct pkt_payload_epoch), // LOAD_RESP
sizeof(struct pkt_payload_epoch), // PROBE_RESP
sizeof(struct pkt_payload_stat), // STAT
0, // SYNC
0, // SYNC_ACK
0, // FIN
sizeof(struct pkt_payload_qps) // FIN_ACK
};
class rdport_generator {
private:
DISALLOW_EVIL_CONSTRUCTORS(rdport_generator);
constexpr static uint32_t MAX_PORT = 65535;
uint32_t min_port;
uint32_t cur;
std::random_device rd;
std::default_random_engine gen;
std::uniform_int_distribution<uint32_t> dist;
public:
rdport_generator(uint32_t mport)
: min_port(mport)
, cur(0)
, dist(0, MAX_PORT - min_port)
{
gen.seed(get_uptime());
cur = dist(gen);
}
uint16_t next()
{
uint16_t ret = ((cur) % (MAX_PORT - min_port)) + min_port;
cur++;
return ret;
}
};
#define NTR_PKT(dep, level, pkt, prefix_fmt, ...) \
ntr(dep, level, \
prefix_fmt \
"src: %d.%d.%d.%d:%d@%02x:%02x:%02x:%02x:%02x:%02x dst: %d.%d.%d.%d:%d@%02x:%02x:%02x:%02x:%02x:%02x type: %d\n", \
##__VA_ARGS__, \
(rte_be_to_cpu_32(pkt->ipv4_hdr.src_addr) >> 24) & 0xff, \
(rte_be_to_cpu_32(pkt->ipv4_hdr.src_addr) >> 16) & 0xff, \
(rte_be_to_cpu_32(pkt->ipv4_hdr.src_addr) >> 8) & 0xff, \
(rte_be_to_cpu_32(pkt->ipv4_hdr.src_addr) >> 0) & 0xff, \
rte_be_to_cpu_16(pkt->udp_hdr.src_port), \
pkt->eth_hdr.src_addr.addr_bytes[0], \
pkt->eth_hdr.src_addr.addr_bytes[1], \
pkt->eth_hdr.src_addr.addr_bytes[2], \
pkt->eth_hdr.src_addr.addr_bytes[3], \
pkt->eth_hdr.src_addr.addr_bytes[4], \
pkt->eth_hdr.src_addr.addr_bytes[5], \
(rte_be_to_cpu_32(pkt->ipv4_hdr.dst_addr) >> 24) & 0xff, \
(rte_be_to_cpu_32(pkt->ipv4_hdr.dst_addr) >> 16) & 0xff, \
(rte_be_to_cpu_32(pkt->ipv4_hdr.dst_addr) >> 8) & 0xff, \
(rte_be_to_cpu_32(pkt->ipv4_hdr.dst_addr) >> 0) & 0xff, \
rte_be_to_cpu_16(pkt->udp_hdr.dst_port), \
pkt->eth_hdr.dst_addr.addr_bytes[0], \
pkt->eth_hdr.dst_addr.addr_bytes[1], \
pkt->eth_hdr.dst_addr.addr_bytes[2], \
pkt->eth_hdr.dst_addr.addr_bytes[3], \
pkt->eth_hdr.dst_addr.addr_bytes[4], \
pkt->eth_hdr.dst_addr.addr_bytes[5], rte_be_to_cpu_16(pkt->type))
static inline void
print_mac(struct rte_ether_addr *mac)
{
printf("%x:%x:%x:%x:%x:%x", mac->addr_bytes[0], mac->addr_bytes[1],
mac->addr_bytes[2], mac->addr_bytes[3], mac->addr_bytes[4],
mac->addr_bytes[5]);
}
static inline void
print_ipv4(uint32_t ip)
{
printf("%d.%d.%d.%d", (ip >> 24) & 0xff, (ip >> 16) & 0xff,
(ip >> 8) & 0xff, (ip >> 0) & 0xff);
}
static inline void
dump_pkt(struct rte_mbuf *pkt)
{
if (rte_pktmbuf_data_len(pkt) < sizeof(struct rte_ether_hdr)) {
return;
}
struct rte_ether_hdr _eth_hdr;
auto eth_hdr = (struct rte_ether_hdr *)rte_pktmbuf_read(
pkt, 0, sizeof(struct rte_ether_hdr), &_eth_hdr);
if (eth_hdr == nullptr) {
return;
}
// ethernet frame
printf(
"Packet %p: Length 0x%x\n", (void *)pkt, rte_pktmbuf_data_len(pkt));
printf(" Ethernet header:\n");
printf(" Src:");
print_mac(&eth_hdr->src_addr);
printf("\n");
printf(" Dst:");
print_mac(&eth_hdr->dst_addr);
printf("\n");
printf(" Type: 0x%x\n", rte_be_to_cpu_16(eth_hdr->ether_type));
uint16_t ether_type = rte_be_to_cpu_16(eth_hdr->ether_type);
if (ether_type != RTE_ETHER_TYPE_IPV4) {
return;
}
if (rte_pktmbuf_data_len(pkt) <
sizeof(struct rte_ether_hdr) + sizeof(struct rte_ipv4_hdr)) {
return;
}
// dump ip header
auto ipv4_hdr = (struct rte_ipv4_hdr *)(eth_hdr + 1);
printf(" IPv4 header:\n");
printf(" Src:");
print_ipv4(rte_be_to_cpu_32(ipv4_hdr->src_addr));
printf("\n");
printf(" Dst:");
print_ipv4(rte_be_to_cpu_32(ipv4_hdr->dst_addr));
printf("\n");
printf(" Protocol: 0x%x\n", ipv4_hdr->next_proto_id);
}
static inline bool
is_l2ts_pkt(uint16_t type)
{
return type == PKT_TYPE_PROBE || type == PKT_TYPE_PROBE_RESP;
}
// fills the packet with the information except for the payload itself
static inline struct pkt_hdr *
construct_pkt_hdr(
struct rte_mbuf *buf, uint16_t type, const struct conn_spec *conn, int pkt_pad_sz)
{
rte_pktmbuf_reset(buf);
int total_sz = sizeof(struct pkt_hdr) +
expected_payload_size[type];
if (pkt_pad_sz > total_sz) {
total_sz = pkt_pad_sz;
}
auto pkt_data = (struct pkt_hdr *)rte_pktmbuf_append(buf, total_sz);
if (pkt_data == nullptr)
return nullptr;
struct rte_ether_hdr *eth_hdr;
struct rte_ipv4_hdr *ipv4_hdr;
struct rte_udp_hdr *udp_hdr;
bool is_ts_pkt = is_l2ts_pkt(type);
// single segment
buf->nb_segs = 1;
// construct l2 header
eth_hdr = &pkt_data->eth_hdr;
rte_ether_addr_copy(&conn->src->mac_addr, &eth_hdr->src_addr);
if (is_ts_pkt) {
rte_ether_addr_copy(&POU_MAC, &eth_hdr->dst_addr);
} else {
rte_ether_addr_copy(&conn->dst->mac_addr, &eth_hdr->dst_addr);
}
eth_hdr->ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
buf->l2_len = sizeof(struct rte_ether_hdr);
// construct l3 header
ipv4_hdr = &pkt_data->ipv4_hdr;
memset(ipv4_hdr, 0, sizeof(struct rte_ipv4_hdr));
ipv4_hdr->version_ihl = IP_VHL_DEF;
ipv4_hdr->type_of_service = 0;
ipv4_hdr->fragment_offset = 0;
ipv4_hdr->time_to_live = IP_DEFTTL;
ipv4_hdr->next_proto_id = IPPROTO_UDP;
ipv4_hdr->packet_id = 0;
ipv4_hdr->src_addr = rte_cpu_to_be_32(conn->src->ip);
if (is_ts_pkt) {
ipv4_hdr->dst_addr = rte_cpu_to_be_32(POU_IP);
} else {
ipv4_hdr->dst_addr = rte_cpu_to_be_32(conn->dst->ip);
}
ipv4_hdr->total_length = rte_cpu_to_be_16(total_sz - sizeof(struct rte_ether_hdr) - sizeof(struct rte_ipv4_hdr));
ipv4_hdr->hdr_checksum = 0;
buf->l3_len = sizeof(struct rte_ipv4_hdr);
// construct l4 header
udp_hdr = &pkt_data->udp_hdr;
udp_hdr->src_port = rte_cpu_to_be_16(conn->src_port);
if (is_ts_pkt) {
udp_hdr->dst_port = rte_cpu_to_be_16(POU_PORT);
} else {
udp_hdr->dst_port = rte_cpu_to_be_16(conn->dst_port);
}
udp_hdr->dgram_cksum = 0; /* No UDP checksum. */
udp_hdr->dgram_len = total_sz - sizeof(struct rte_ether_hdr) - sizeof(struct rte_ipv4_hdr) - sizeof(struct rte_udp_hdr);
buf->l4_len = sizeof(struct rte_udp_hdr);
buf->ol_flags |= RTE_MBUF_F_TX_IPV4;
buf->ol_flags |= RTE_MBUF_F_TX_IP_CKSUM;
buf->ol_flags |= RTE_MBUF_F_TX_UDP_CKSUM;
if (is_ts_pkt) {
// set misc flags
buf->ol_flags |= RTE_MBUF_F_TX_IEEE1588_TMST;
pkt_data->ptp_hdr.ptp_ver = 0x2; // VER 2
pkt_data->ptp_hdr.ptp_msg_type = 0x0; // SYNC
} else {
pkt_data->ptp_hdr.ptp_ver = 0xff; // invalid ver
}
pkt_data->type = rte_cpu_to_be_16(type);
pkt_data->magic = rte_cpu_to_be_32(ETHER_FRAME_MAGIC);
return pkt_data;
}
// returns 0 on success
static inline int
alloc_pkt_hdr(struct rte_mempool *pool, uint16_t type,
const struct conn_spec *conn, int pkt_pad_sz, struct rte_mbuf **mbuf_out,
struct pkt_hdr **hdr_out)
{
struct pkt_hdr *hdr;
struct rte_mbuf *pkt = rte_pktmbuf_alloc(pool);
if (pkt == nullptr) {
return -1;
}
// printf("alloc_pkt_hdr:\n");
// printf("from ");
// print_mac(&conn->src->mac_addr);
// printf("\nto ");
// print_mac(&conn->dst->mac_addr);
// printf("\n");
hdr = construct_pkt_hdr(pkt, type, conn, pkt_pad_sz);
if (hdr == nullptr) {
rte_pktmbuf_free(pkt);
return -1;
}
*mbuf_out = pkt;
*hdr_out = hdr;
return 0;
}
static inline struct pkt_hdr *
check_valid_packet(struct rte_mbuf *pkt, const struct rte_ether_addr *host_mac)
{
struct pkt_hdr *pkt_data = nullptr;
const struct rte_ether_addr *expected_mac = nullptr;
uint16_t type;
const uint32_t data_len = rte_pktmbuf_data_len(pkt);
if (data_len < sizeof(struct pkt_hdr)) {
return nullptr;
}
pkt_data = rte_pktmbuf_mtod(pkt, struct pkt_hdr *);
// check MAGIC
if (rte_be_to_cpu_32(pkt_data->magic) != ETHER_FRAME_MAGIC) {
return nullptr;
}
type = rte_be_to_cpu_16(pkt_data->type);
// check type and payload size
if ((type >= NUM_PKT_TYPES) ||
(data_len <
(sizeof(struct pkt_hdr) +
expected_payload_size[rte_be_to_cpu_16(pkt_data->type)]))) {
return nullptr;
}
// strict dest mac filter
if (host_mac != nullptr) {
if (is_l2ts_pkt(type)) {
// dst mac must be the broadcast addr
expected_mac = &POU_MAC;
} else {
// dst mac must match the host mac
expected_mac = host_mac;
}
if (!rte_is_same_ether_addr(
expected_mac, &pkt_data->eth_hdr.dst_addr))
return nullptr;
}
return pkt_data;
}

16
inc/nm.h Normal file
View File

@ -0,0 +1,16 @@
#pragma once
#include <vector>
constexpr static int NM_LEVEL_NUMA = 0;
constexpr static int NM_LEVEL_CPU = 1;
constexpr static int NM_LEVEL_CORE = 2;
std::vector<struct nm_obj *> * nm_get_nodes();
std::vector<struct nm_obj *> * nm_get_cpus();
std::vector<struct nm_obj *> * nm_get_cores();
// 0 on success
// -1 on error
int nm_init();

View File

@ -1,26 +0,0 @@
#pragma once
#include <sys/types.h>
#ifdef __cplusplus
extern "C" {
#endif
int
nms_init(int verbose);
void *
nms_malloc(int nodeid, size_t sz);
void *
nms_alloc_static(int nodeid, size_t sz);
void
nms_free_static(void * buf, size_t sz);
void
nms_free(int nodeid, void * addr);
#ifdef __cplusplus
}
#endif // __cplusplus

View File

@ -1,7 +1,7 @@
#pragma once
#include <stdarg.h>
#include <stdio.h>
#include <stdarg.h>
#define NTR_LEVEL_NONE (0)
#define NTR_LEVEL_ERROR (1)
@ -20,16 +20,15 @@
#ifdef __cplusplus
extern "C" {
#endif
#endif
void ntr_init();
__attribute__((format(printf, 3, 4))) void ntr(
int dep, int level, const char *fmt, ...);
void ntr(int dep, int level, const char * fmt, ...);
void ntr_set_level(int dep, int level);
void ntr_set_output(FILE *f);
void ntr_set_output(FILE * f);
int ntr_get_level(int dep);

190
inc/pkt.h Normal file
View File

@ -0,0 +1,190 @@
#pragma once
#include <rte_mbuf_core.h>
#include <rte_mbuf.h>
#include <rte_udp.h>
#include <rte_byteorder.h>
#include <rte_ip.h>
#include <stdint.h>
#include <rte_flow.h>
#include <rte_ether.h>
#include <unistd.h>
#include <rte_net.h>
#include <rte_vxlan.h>
constexpr static uint32_t ETHER_FRAME_MAGIC = 0xDCDCE5E5;
const static struct rte_ether_addr PROBE_MAC_ADDR {0x01,0x1B,0x19,0x00,0x00,0x00};
const static uint16_t ETHER_TYPE_LOCAL_EXP = 0x88b5;
struct ptp_hdr {
uint8_t ptp_msg_type;
uint8_t ptp_ver;
uint8_t unused[34];
} __attribute__((packed));
struct pkt_hdr {
struct rte_ether_hdr eth_hdr;
struct ptp_hdr ptp_hdr;
uint16_t type;
uint32_t magic;
char payload[0];
} __attribute__((packed));
constexpr static uint16_t PKT_TYPE_LOAD = 0;
constexpr static uint16_t PKT_TYPE_PROBE = 1;
constexpr static uint16_t PKT_TYPE_LOAD_RESP = 2;
constexpr static uint16_t PKT_TYPE_PROBE_RESP = 3;
struct pkt_payload_epoch {
uint32_t epoch;
};
constexpr static uint16_t PKT_TYPE_STAT = 4;
struct pkt_payload_stat {
uint32_t epoch;
uint64_t hw_rx;
uint64_t hw_tx;
uint64_t sw_rx;
uint64_t sw_tx;
};
constexpr static uint16_t NUM_PKT_TYPES = PKT_TYPE_STAT + 1;
// for fast packet verification
static const uint32_t expected_payload_size[NUM_PKT_TYPES] {
sizeof(struct pkt_payload_epoch), // LOAD
sizeof(struct pkt_payload_epoch), // PROBE
sizeof(struct pkt_payload_epoch), // LOAD_RESP
sizeof(struct pkt_payload_epoch), // PROBE_RESP
sizeof(struct pkt_payload_stat) //STAT
};
static inline void
print_mac(struct rte_ether_addr * mac)
{
printf("%x:%x:%x:%x:%x:%x", mac->addr_bytes[0],
mac->addr_bytes[1],
mac->addr_bytes[2],
mac->addr_bytes[3],
mac->addr_bytes[4],
mac->addr_bytes[5]);
}
static inline void
print_ipv4(uint32_t ip)
{
printf("%d-%d-%d-%d", (ip >> 24) & 0xff,
(ip >> 16) & 0xff,
(ip >> 8) & 0xff,
(ip >> 0) & 0xff);
}
static inline void
dump_pkt(struct rte_mbuf *pkt)
{
if(rte_pktmbuf_data_len(pkt) < sizeof(struct rte_ether_hdr)) {
return;
}
struct rte_ether_hdr _eth_hdr;
struct rte_ether_hdr * eth_hdr = (struct rte_ether_hdr *)rte_pktmbuf_read(pkt, 0, sizeof(struct rte_ether_hdr), &_eth_hdr);
if (eth_hdr == NULL) {
return;
}
// ethernet frame
printf("Packet %p: Length 0x%x\n", (void*)pkt, rte_pktmbuf_data_len(pkt));
printf(" Ethernet header:\n");
printf(" Src:");
print_mac(&eth_hdr->s_addr);
printf("\n");
printf(" Dst:");
print_mac(&eth_hdr->d_addr);
printf("\n");
printf(" Type: 0x%x\n", rte_be_to_cpu_16(eth_hdr->ether_type));
uint16_t ether_type = rte_be_to_cpu_16(eth_hdr->ether_type);
if (ether_type != RTE_ETHER_TYPE_IPV4) {
return;
}
if(rte_pktmbuf_data_len(pkt) < sizeof(struct rte_ether_hdr) + sizeof(struct rte_ipv4_hdr)) {
return;
}
// dump ip header
struct rte_ipv4_hdr * ipv4_hdr = (struct rte_ipv4_hdr *)(eth_hdr + 1);
printf(" IPv4 header:\n");
printf(" Src:");
print_ipv4(rte_be_to_cpu_32(ipv4_hdr->src_addr));
printf("\n");
printf(" Dst:");
print_ipv4(rte_be_to_cpu_32(ipv4_hdr->dst_addr));
printf("\n");
printf(" Protocol: 0x%x\n", ipv4_hdr->next_proto_id);
}
// fills the packet with the information except for the payload itself
static inline
struct pkt_hdr * construct_pkt_hdr(struct rte_mbuf * buf, uint16_t type,
struct rte_ether_addr * src_mac, struct rte_ether_addr * dst_mac)
{
rte_pktmbuf_reset(buf);
const uint32_t total_sz = sizeof(struct pkt_hdr) + expected_payload_size[type];
struct pkt_hdr * pkt_data = (struct pkt_hdr *)rte_pktmbuf_append(buf, total_sz);
struct rte_ether_hdr * eth_hdr;
if (pkt_data == NULL)
return NULL;
// single segment
buf->nb_segs = 1;
// construct l2 header
eth_hdr = &pkt_data->eth_hdr;
rte_ether_addr_copy(src_mac, &eth_hdr->s_addr);
if (type == PKT_TYPE_PROBE || type == PKT_TYPE_PROBE_RESP) {
rte_ether_addr_copy(&PROBE_MAC_ADDR, &eth_hdr->d_addr);
eth_hdr->ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_1588);
pkt_data->ptp_hdr.ptp_ver = 0x2; // VER 2
buf->ol_flags |= PKT_TX_IEEE1588_TMST;
} else {
rte_ether_addr_copy(dst_mac, &eth_hdr->d_addr);
eth_hdr->ether_type = rte_cpu_to_be_16(ETHER_TYPE_LOCAL_EXP);
pkt_data->ptp_hdr.ptp_ver = 0xff;
}
buf->l2_len = sizeof(struct rte_ether_hdr);
pkt_data->ptp_hdr.ptp_msg_type = 0x0; // SYNC
pkt_data->type = rte_cpu_to_be_16(type);
pkt_data->magic = rte_cpu_to_be_32(ETHER_FRAME_MAGIC);
return pkt_data;
}
static inline
struct pkt_hdr * check_valid_packet(struct rte_mbuf * pkt)
{
struct pkt_hdr * pkt_data = NULL;
const uint32_t data_len = rte_pktmbuf_data_len(pkt);
if (data_len < sizeof(struct pkt_hdr)) {
return NULL;
}
pkt_data = rte_pktmbuf_mtod(pkt, struct pkt_hdr *);
// check MAGIC
if (rte_be_to_cpu_32(pkt_data->magic) != ETHER_FRAME_MAGIC) {
return NULL;
}
// check type and payload size
if ((rte_be_to_cpu_16(pkt_data->type) < NUM_PKT_TYPES) &&
(data_len >= (sizeof(struct pkt_hdr) + expected_payload_size[rte_be_to_cpu_16(pkt_data->type)]))) {
return pkt_data;
}
return NULL;
}

View File

@ -1,56 +0,0 @@
#pragma once
#include "storage/drivers/driver.hh"
#include "spdk/bdev.h"
#include "spdk/bdev_zone.h"
#include "spdk/thread.h"
class birb_bdev_driver : public birb_driver
{
public:
birb_bdev_driver(const char * dev_name);
~birb_bdev_driver() override;
size_t get_capacity() override;
birb_driver_status get_status() override;
struct spdk_bdev * get_bdev();
struct spdk_bdev_desc * get_bdev_desc();
birb_driver_type get_type() override;
size_t get_align() override;
private:
DISALLOW_EVIL_CONSTRUCTORS(birb_bdev_driver);
struct spdk_bdev_desc * bdev_desc;
struct spdk_bdev * bdev;
size_t block_sz;
size_t block_num;
birb_driver_status status;
static void print_all_bdev();
static void bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev * bdev,
void * event_ctx);
};
class birb_bdev_thread_context : public birb_driver_thread_context
{
public:
birb_bdev_thread_context(birb_bdev_driver * driver);
~birb_bdev_thread_context() override;
int read(size_t offset, size_t size, char * buffer, callback callback, void * context) override;
int write(size_t offset, size_t size, char * buffer, callback callback, void * context) override;
void poll() override;
birb_driver::birb_driver_status get_status() override;
private:
struct cb_context {
callback cb;
void * ctx;
};
DISALLOW_EVIL_CONSTRUCTORS(birb_bdev_thread_context);
spdk_io_channel * io_channel;
birb_driver::birb_driver_status status;
birb_bdev_driver * driver;
static void io_callback(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg);
};

View File

@ -1,47 +0,0 @@
#pragma once
#include "defs.hh"
#include "spdk/thread.h"
#include <cstdlib>
class birb_driver
{
private:
DISALLOW_EVIL_CONSTRUCTORS(birb_driver);
public:
enum birb_driver_status{
BIRB_SUCCESS,
BIRB_FAIL
};
enum birb_driver_type{
BIRB_DRV_NVME,
BIRB_DRV_BDEV
};
virtual size_t get_capacity() = 0;
virtual birb_driver_status get_status() = 0;
virtual size_t get_align() = 0;
virtual birb_driver_type get_type() = 0;
virtual ~birb_driver() = default;
protected:
birb_driver() = default;
};
class birb_driver_thread_context
{
private:
DISALLOW_EVIL_CONSTRUCTORS(birb_driver_thread_context);
public:
using callback = void (*)(bool, void *);
virtual int read(size_t offset, size_t size, char * buffer, callback callback, void * context) = 0;
virtual int write(size_t offset, size_t size, char * buffer, callback callback, void * context) = 0;
virtual void poll() = 0;
virtual birb_driver::birb_driver_status get_status() = 0;
virtual ~birb_driver_thread_context() = default;
protected:
birb_driver_thread_context() = default;
};

View File

@ -1,65 +0,0 @@
#pragma once
#include "storage/drivers/driver.hh"
#include "spdk/nvme.h"
#include "spdk/thread.h"
class birb_nvme_driver : public birb_driver
{
public:
birb_nvme_driver(const char * dev_name);
~birb_nvme_driver() override;
size_t get_capacity() override;
birb_driver_status get_status() override;
birb_driver_type get_type() override;
size_t get_align() override;
spdk_nvme_ctrlr * get_ctrlr();
spdk_nvme_ns * get_ns();
spdk_nvme_io_qpair_opts * get_io_qpair_opts();
private:
struct attach_context {
spdk_nvme_ctrlr ** ctrlr;
spdk_nvme_ns ** ns;
const char * dev_name;
int valid;
};
DISALLOW_EVIL_CONSTRUCTORS(birb_nvme_driver);
birb_driver_status status;
spdk_nvme_ctrlr * ctrlr;
spdk_nvme_ns * ns;
spdk_nvme_io_qpair_opts opts;
static bool probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, struct spdk_nvme_ctrlr_opts *opts);
static void attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts);
};
class birb_nvme_thread_context : public birb_driver_thread_context
{
public:
birb_nvme_thread_context(birb_nvme_driver * driver);
~birb_nvme_thread_context() override;
int read(size_t offset, size_t size, char * buffer, callback callback, void * context) override;
int write(size_t offset, size_t size, char * buffer, callback callback, void * context) override;
void poll() override;
birb_driver::birb_driver_status get_status() override;
private:
struct cb_context {
callback cb;
void * ctx;
};
DISALLOW_EVIL_CONSTRUCTORS(birb_nvme_thread_context);
birb_driver::birb_driver_status status;
birb_nvme_driver * driver;
struct spdk_nvme_qpair * qpair;
static void io_callback(void *arg, const struct spdk_nvme_cpl *completion);
static uint32_t size_to_lba(size_t size, int lba_size);
static uint64_t addr_to_lba(size_t addr, int lba_size);
};

View File

@ -1,47 +0,0 @@
#pragma once
#include "defs.hh"
#include "spdk/thread.h"
#include <cstdlib>
class birb_driver
{
private:
DISALLOW_EVIL_CONSTRUCTORS(birb_driver);
public:
enum birb_driver_status{
BIRB_SUCCESS,
BIRB_FAIL
};
enum birb_driver_type{
BIRB_DRV_NVME,
BIRB_DRV_BDEV
};
virtual size_t get_capacity() = 0;
virtual birb_driver_status get_status() = 0;
virtual size_t get_align() = 0;
virtual birb_driver_type get_type() = 0;
virtual ~birb_driver() = default;
protected:
birb_driver() = default;
};
class birb_driver_thread_context
{
private:
DISALLOW_EVIL_CONSTRUCTORS(birb_driver_thread_context);
public:
using callback = void (*)(bool, void *);
virtual int read(size_t offset, size_t size, char * buffer, callback callback, void * context) = 0;
virtual int write(size_t offset, size_t size, char * buffer, callback callback, void * context) = 0;
virtual void poll() = 0;
virtual birb_driver::birb_driver_status get_status() = 0;
virtual ~birb_driver_thread_context() = default;
protected:
birb_driver_thread_context() = default;
};

View File

@ -1,53 +0,0 @@
#pragma once
#include <sys/endian.h>
#include <sys/types.h>
#include "defs.hh"
#include "gen.hh"
#include <random>
enum io_generator_opcode {
IOGEN_READ,
IOGEN_WRITE
};
enum io_generator_address_mode {
IOGEN_ADDR_MONOTONIC_INCREASING,
IOGEN_ADDR_UNIFORM_RANDOM
};
struct io_generator_ctx {
unsigned long size;
uint64_t offset;
io_generator_opcode op;
};
//
// cur_offset is aligned to req_size
//
class io_generator {
public:
int issue(struct io_generator_ctx * ctx, char * buf);
io_generator(unsigned long req_size,
unsigned long capacity,
unsigned int read_pct,
io_generator_address_mode addr_mode);
io_generator() = delete;
private:
unsigned long cur_offset;
const unsigned long capacity;
const unsigned long req_size;
const unsigned int read_pct;
const io_generator_address_mode addr_mode;
std::random_device rd;
std::mt19937 rng;
std::uniform_int_distribution<int> dist;
std::random_device addr_rd;
std::mt19937 addr_rng;
std::uniform_int_distribution<uint64_t> addr_dist;
DISALLOW_EVIL_CONSTRUCTORS(io_generator);
};

33
inc/util.h Normal file
View File

@ -0,0 +1,33 @@
#pragma once
#include <stdint.h>
#include <time.h>
#include <rte_ip.h>
constexpr static unsigned long S2NS = 100000000UL;
constexpr static unsigned long S2US = 1000000L;
constexpr static uint16_t SERVER_LOAD_PORT = 1234;
constexpr static uint16_t SERVER_PROBE_PORT = 319;
constexpr static uint32_t SERVER_IP = RTE_IPV4(192,168,123,0);
static inline uint64_t
get_time_us()
{
struct timespec ts;
clock_gettime(CLOCK_REALTIME, &ts);
return ts.tv_sec * 1000000 + ts.tv_nsec / 1000;
}
constexpr static int NEXT_CPU_NULL = -1;
static inline int
cmask_get_next_cpu(uint64_t * mask)
{
int ffs = ffsll(*mask);
*mask &= ~(1 << (ffs - 1));
return ffs - 1;
}
static inline int
cmask_get_num_cpus(const uint64_t mask)
{
return _mm_popcnt_u64(mask);
}

557
khat/khat.cc Normal file
View File

@ -0,0 +1,557 @@
#include <cstdio>
#include <cassert>
#include <ctime>
#include <netinet/in.h>
#include <rte_config.h>
#include <rte_common.h>
#include <rte_eal.h>
#include <rte_ethdev.h>
#include <rte_cycles.h>
#include <rte_lcore.h>
#include <rte_mbuf.h>
#include <rte_ether.h>
#include <rte_launch.h>
#include <rte_log.h>
#include <atomic>
#include <vector>
#include <fstream>
#include <unistd.h>
#include <signal.h>
#include "nm.h"
#include "pkt.h"
#include "ntr.h"
#include "util.h"
/* Protocol:
* regular client:
* client -> LOAD -> server
* server -> LOAD_RESP -> client
* measuring client:
* client -> PROBE -> server (client tx timestamps)
* server -> PROBE_RESP -> client (client rx timestamps and server tx/rx timestamps)
* server -> STAT -> client (server sends its tx/rx timestamps)
*/
static void * const PROBE_MAGIC = (void*)0x12344444;
constexpr static unsigned int MBUF_MAX_COUNT = 65536;
constexpr static unsigned int MBUF_CACHE_SIZE = 512;
constexpr static unsigned int RX_RING_SIZE = 4096;
constexpr static unsigned int TX_RING_SIZE = 4096;
constexpr static unsigned int BURST_SIZE = 32;
static const struct rte_eth_conf port_conf_default{};
// keep track of the probe state
// when a probe packet first arrives this state is set to be influx and the rte_mbuf's userdata is set to PROBE_MAGIC
// which prevents other probe packets to be processed
// when the server sends the probe stats back to user influx is released
// this is to guarantee that the server only processes one probe packet at the time
// XXX: also this can be attached to the mbuf itself and processed by the lcore thread
// I kept this global because globally there could be only one pending probe request
// and rx_add_timestamp can save their shit here too
struct probe_state_t {
struct rte_ether_hdr hdr;
uint32_t epoch;
uint32_t timesync;
uint64_t last_sw_rx;
uint64_t last_sw_tx;
uint64_t last_hw_rx;
};
struct thread_info {
int tid;
int rxqid;
int txqid;
int lcore_id;
};
// state machine:
constexpr static int SERVER_STATE_WAIT = 0;
constexpr static int SERVER_STATE_PROBE = 1;
struct options_t {
//config
int num_threads{1};
uint64_t cpuset{0b010}; //2nd core
//states
uint16_t s_portid;
struct rte_ether_addr s_host_mac;
struct rte_mempool * s_pkt_mempool;
std::atomic<int> s_state {SERVER_STATE_WAIT};
struct probe_state_t s_probe_info;
std::vector<struct thread_info *> s_thr_info;
};
static struct options_t options;
static uint16_t
rx_add_timestamp(uint16_t port __rte_unused, uint16_t qidx __rte_unused,
struct rte_mbuf **pkts, uint16_t nb_pkts, uint16_t max_pkts __rte_unused, void *_ __rte_unused)
{
uint64_t now = rte_rdtsc();
struct timespec ts;
struct pkt_hdr * pkt_data;
for (int i = 0; i < nb_pkts; i++) {
pkt_data = check_valid_packet(pkts[i]);
if (pkt_data == NULL) {
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "rx_add_timestamp: ignoring invalid packet %p.\n", (void*)pkts[i]);
continue;
}
if (rte_be_to_cpu_16(pkt_data->type) == PKT_TYPE_PROBE) {
int state_wait = SERVER_STATE_WAIT;
pkts[i]->userdata = nullptr;
if (rte_eth_timesync_read_rx_timestamp(port, &ts, pkts[i]->timesync & 0x3) == 0) {
if (options.s_state.compare_exchange_strong(state_wait, SERVER_STATE_PROBE)) {
// mark the mbuf as probe packet being processed
// only the locore that receives the pkt w/ userdata != nullptr processes that packet
pkts[i]->userdata = PROBE_MAGIC;
// tag with timestamps
options.s_probe_info.last_hw_rx = ts.tv_nsec + ts.tv_sec * S2NS;
options.s_probe_info.last_sw_rx = now;
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "rx_add_timestamp: tagged packet %p epoch %d with sw: %llu hw:%llu.\n", (void*)pkts[i], options.s_probe_info.epoch, now, options.s_probe_info.last_hw_rx);
} else
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING, "rx_add_timestamp: packet %p not tagged - server is processing a probe.\n", (void*)pkts[i]);
} else
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING, "rx_add_timestamp: packet %p not tagged - hw rx timestamp not available.\n", (void*)pkts[i]);
} else
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "rx_add_timestamp: packet %p not tagged - type %d.\n", (void*)pkts[i], rte_be_to_cpu_16(pkt_data->type));
}
return nb_pkts;
}
static uint16_t
tx_add_timestamp(uint16_t port __rte_unused, uint16_t qidx __rte_unused,
struct rte_mbuf **pkts, uint16_t nb_pkts, void *_ __rte_unused)
{
uint64_t now = rte_rdtsc();
struct pkt_hdr * pkt_data;
for (int i = 0; i < nb_pkts; i++) {
pkt_data = check_valid_packet(pkts[i]);
if (pkt_data == NULL) {
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "tx_add_timestamp: ignoring invalid packet %p.\n", (void*)pkts[i]);
continue;
}
if (rte_be_to_cpu_16(pkt_data->type) == PKT_TYPE_PROBE_RESP) {
// this packet is the response to PROBE packets
// at this time the packet is not sent to the NIC yet so
// the state must be waiting stats
// XXX: this should be an assert
if(options.s_state.load() != SERVER_STATE_PROBE || pkts[i]->userdata != PROBE_MAGIC) {
rte_exit(EXIT_FAILURE, "packet %p sent to NIC before sw callback\n", (void*)pkts[i]);
}
options.s_probe_info.last_sw_tx = now;
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "tx_add_timestamp: tagged packet %p with sw tx %llu\n", (void*)pkts[i], options.s_probe_info.last_sw_tx);
} else {
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "tx_add_timestamp: packet %p not tagged - type %d\n", (void*)pkts[i], pkt_data->type);
}
}
return nb_pkts;
}
static int
locore_main(void * ti)
{
struct thread_info * tinfo = (struct thread_info *)ti;
struct rte_mbuf *bufs[BURST_SIZE];
// + 1 because it might involve an extra PKT_TYPE_STAT packet
// when all tx timestamps are ready
struct rte_mbuf *tx_bufs[BURST_SIZE];
struct pkt_hdr *pkt_data;
bool pending_probe = false;
if (rte_eth_dev_socket_id(options.s_portid) > 0 && rte_eth_dev_socket_id(options.s_portid) != (int)rte_socket_id()) {
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING, "locore_main <thread %d>: WARNING, port %d is on remote NUMA node to "
"polling thread.\n\tPerformance will "
"not be optimal.\n", tinfo->tid, options.s_portid);
}
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "locore_main <thread %d>: running on locore %d with txidx %d and rxidx %d.\n", tinfo->tid, rte_lcore_id(), tinfo->txqid, tinfo->rxqid);
while(true) {
uint16_t nb_tx = 0;
const uint16_t nb_rx = rte_eth_rx_burst(options.s_portid, tinfo->rxqid, bufs, BURST_SIZE);
struct rte_mbuf * pkt_buf;
struct pkt_hdr * tx_data;
for(int i = 0; i < nb_rx; i++) {
// XXX: optimization: in rx_add_timestamp every packet is already validated once
// can just mark valid packet with a value so we can avoid this redundant check
pkt_data = check_valid_packet(bufs[i]);
if (pkt_data == NULL) {
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "locore_main <thread %d>: skipping invalid packet %p.\n", tinfo->tid, (void*)bufs[i]);
//dump_pkt(bufs[i]);
rte_pktmbuf_free(bufs[i]);
continue;
}
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "locore_main <thread %d>: packet %p from %x:%x:%x:%x:%x:%x to %x:%x:%x:%x:%x:%x, type %d\n",
tinfo->tid,
(void*)bufs[i],
pkt_data->eth_hdr.s_addr.addr_bytes[0],
pkt_data->eth_hdr.s_addr.addr_bytes[1],
pkt_data->eth_hdr.s_addr.addr_bytes[2],
pkt_data->eth_hdr.s_addr.addr_bytes[3],
pkt_data->eth_hdr.s_addr.addr_bytes[4],
pkt_data->eth_hdr.s_addr.addr_bytes[5],
pkt_data->eth_hdr.d_addr.addr_bytes[0],
pkt_data->eth_hdr.d_addr.addr_bytes[1],
pkt_data->eth_hdr.d_addr.addr_bytes[2],
pkt_data->eth_hdr.d_addr.addr_bytes[3],
pkt_data->eth_hdr.d_addr.addr_bytes[4],
pkt_data->eth_hdr.d_addr.addr_bytes[5],
rte_be_to_cpu_16(pkt_data->type));
switch (rte_be_to_cpu_16(pkt_data->type)) {
case PKT_TYPE_PROBE: {
if (options.s_state.load() == SERVER_STATE_PROBE && bufs[i]->userdata == PROBE_MAGIC) {
// send back probe_resp pkt to probe for return latency
pending_probe = true;
// book keep probe results
options.s_probe_info.epoch = rte_be_to_cpu_32(((struct pkt_payload_epoch *)pkt_data->payload)->epoch);
options.s_probe_info.timesync = bufs[i]->timesync;
rte_memcpy(&options.s_probe_info.hdr, &pkt_data->eth_hdr, sizeof(struct rte_ether_hdr));
pkt_buf = rte_pktmbuf_alloc(options.s_pkt_mempool);
if (pkt_buf == NULL) {
rte_exit(EXIT_FAILURE, "failed to allocate memory for pkt_buf\n");
}
tx_data = construct_pkt_hdr(pkt_buf, PKT_TYPE_PROBE_RESP,
&options.s_host_mac,
&pkt_data->eth_hdr.s_addr);
if (tx_data == NULL) {
rte_exit(EXIT_FAILURE, "failed to construct tx packet %p", (void*)pkt_buf);
}
rte_memcpy(tx_data->payload, pkt_data->payload, sizeof(struct pkt_payload_epoch));
pkt_buf->userdata = PROBE_MAGIC;
// queue for burst send
tx_bufs[nb_tx++] = pkt_buf;
}
break;
}
case PKT_TYPE_LOAD: {
// we reply to load packet regardless of the server state
pkt_buf = rte_pktmbuf_alloc(options.s_pkt_mempool);
if (pkt_buf == NULL) {
rte_exit(EXIT_FAILURE, "failed to allocate memory for pkt_buf\n");
}
tx_data = construct_pkt_hdr(pkt_buf, PKT_TYPE_LOAD_RESP,
&options.s_host_mac,
&pkt_data->eth_hdr.s_addr);
if (tx_data == NULL) {
rte_exit(EXIT_FAILURE, "failed to construct tx packet %p", (void*)pkt_buf);
}
rte_memcpy(tx_data->payload, pkt_data->payload, sizeof(struct pkt_payload_epoch));
// queue for burst send
tx_bufs[nb_tx++] = pkt_buf;
break;
}
default:
break;
}
rte_pktmbuf_free(bufs[i]);
}
// send the packets
if (nb_tx > 0) {
const uint16_t nb_tx_succ = rte_eth_tx_burst(options.s_portid, tinfo->txqid, tx_bufs, nb_tx);
if (nb_tx_succ < nb_tx) {
rte_exit(EXIT_FAILURE, "failed to send some packets.\n");
}
}
// we wanna check every loop not only when there are packets
if (pending_probe) {
struct timespec ts;
struct pkt_payload_stat * stat;
if (rte_eth_timesync_read_tx_timestamp(options.s_portid, &ts) == 0) {
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "locore_main <thread %d>: obtained hw tx timestamp %lld.\n", tinfo->tid, ts.tv_sec * S2NS + ts.tv_nsec);
// now we have everything we need
pkt_buf = rte_pktmbuf_alloc(options.s_pkt_mempool);
if (pkt_buf == NULL) {
rte_exit(EXIT_FAILURE, "failed to allocate memory for pkt_buf\n");
}
tx_data = construct_pkt_hdr(pkt_buf, PKT_TYPE_STAT,
&options.s_host_mac,
&options.s_probe_info.hdr.s_addr);
// populate stats
stat = (struct pkt_payload_stat *)tx_data->payload;
stat->epoch = rte_cpu_to_be_32(options.s_probe_info.epoch);
stat->hw_rx = rte_cpu_to_be_64(options.s_probe_info.last_hw_rx);
stat->hw_tx = rte_cpu_to_be_64(ts.tv_nsec + ts.tv_sec * S2NS);
stat->sw_rx = rte_cpu_to_be_64(options.s_probe_info.last_sw_rx);
stat->sw_tx = rte_cpu_to_be_64(options.s_probe_info.last_sw_tx);
// send the packet
if (rte_eth_tx_burst(options.s_portid, 0, &pkt_buf, 1) < 1) {
rte_exit(EXIT_FAILURE, "failed to send some packets.\n");
}
// release flux
pending_probe = false;
int expected = SERVER_STATE_PROBE;
if (!options.s_state.compare_exchange_strong(expected, SERVER_STATE_WAIT)) {
rte_exit(EXIT_FAILURE, "s_state changed unexpectedly!");
}
}
}
}
return 0;
}
static int
port_init(uint16_t portid, struct rte_mempool *mbuf_pool)
{
struct rte_eth_dev_info dev_info;
struct rte_eth_conf port_conf = port_conf_default;
struct rte_eth_txconf txconf;
struct rte_eth_rxconf rxconf;
uint16_t nb_rxd = RX_RING_SIZE;
uint16_t nb_txd = TX_RING_SIZE;
if(!rte_eth_dev_is_valid_port(portid)) {
return -1;
}
int ret = rte_eth_dev_info_get(portid, &dev_info);
if (ret != 0) {
return ret;
}
port_conf.rxmode.max_rx_pkt_len = RTE_ETHER_MAX_LEN;
port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_UDP_CKSUM;
port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_IPV4_CKSUM;
port_conf.txmode.offloads |= DEV_TX_OFFLOAD_UDP_CKSUM;
port_conf.txmode.offloads |= DEV_TX_OFFLOAD_IPV4_CKSUM;
port_conf.txmode.offloads |= DEV_TX_OFFLOAD_MBUF_FAST_FREE;
/* Configure the Ethernet device. */
ret = rte_eth_dev_configure(portid, options.num_threads, options.num_threads, &port_conf);
if (ret != 0)
return ret;
ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd, &nb_txd);
if (ret != 0)
return ret;
/* Allocate and set up 1 RX queue per thread per Ethernet port. */
rxconf = dev_info.default_rxconf;
for (int i = 0; i < options.num_threads; i++) {
ret = rte_eth_rx_queue_setup(portid, i, nb_rxd, rte_eth_dev_socket_id(portid), &rxconf, mbuf_pool);
if (ret < 0)
return ret;
options.s_thr_info.at(i)->rxqid = i;
}
txconf = dev_info.default_txconf;
txconf.offloads = port_conf.txmode.offloads;
/* Allocate and set up 1 TX queue per thread per Ethernet port. */
for (int i = 0; i < options.num_threads; i++) {
ret = rte_eth_tx_queue_setup(portid, i, nb_txd, rte_eth_dev_socket_id(portid), &txconf);
if (ret < 0)
return ret;
options.s_thr_info.at(i)->txqid = i;
}
ret = rte_eth_dev_start(portid);
if (ret < 0)
return ret;
/* Display the port MAC address. */
struct rte_ether_addr addr;
ret = rte_eth_macaddr_get(portid, &addr);
if (ret != 0)
return ret;
ret = rte_eth_timesync_enable(portid);
if (ret != 0)
return ret;
/* Enable RX in promiscuous mode for the Ethernet device. */
ret = rte_eth_promiscuous_enable(portid);
if (ret != 0)
return ret;
for (int i = 0; i < options.num_threads; i++) {
if (rte_eth_add_tx_callback(portid, options.s_thr_info.at(i)->txqid, tx_add_timestamp, NULL) == NULL ||
rte_eth_add_rx_callback(portid, options.s_thr_info.at(i)->rxqid, rx_add_timestamp, NULL) == NULL) {
return -1;
}
}
return 0;
}
static void usage()
{
fprintf(stdout,
"Usage:\n" \
" -v(vv): verbose mode\n" \
" -h: seek help\n" \
" -A: cpu mask for worker threads\n");
fflush(stdout);
}
static void dump_options()
{
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO,
"main: khat configuration:\n" \
" verbosity: +%d\n" \
" thread count: %d\n" \
" thread mask: %lld\n\n",
ntr_get_level(NTR_DEP_USER1) - NTR_LEVEL_DEFAULT,
options.num_threads,
options.cpuset);
}
int main(int argc, char* argv[])
{
unsigned int nb_ports;
struct rte_mempool *mbuf_pool;
ntr_init();
if (nm_init() != 0) {
rte_exit(EXIT_FAILURE, "nm init failed!\n");
}
// init dpdk
int ret = rte_eal_init(argc, argv);
if (ret < 0) {
rte_exit(EXIT_FAILURE, "rte_eal_init failed!\n");
}
argc -= ret;
argv += ret;
// set warning level
ntr_set_level(NTR_DEP_USER1, NTR_LEVEL_WARNING);
{
int c;
// parse arguments
while((c = getopt(argc, argv, "hvA:")) != -1) {
switch (c) {
case 'v':
ntr_set_level(NTR_DEP_USER1, ntr_get_level(NTR_DEP_USER1) + 1);
break;
case 'h':
usage();
rte_exit(EXIT_SUCCESS, "\n");
break;
case 'A':
options.cpuset = strtoull(optarg, nullptr, 16);
options.num_threads = cmask_get_num_cpus(options.cpuset);
if (options.num_threads == 0) {
rte_exit(EXIT_FAILURE, "must run at least one thread\n");
}
break;
default:
usage();
rte_exit(EXIT_SUCCESS, "unknown argument: %c", c);
break;
}
}
}
dump_options();
nb_ports = rte_eth_dev_count_avail();
if (nb_ports == 0) {
rte_exit(EXIT_FAILURE, "number of ports must be > 0\n");
}
uint16_t portid = rte_eth_find_next(0);
if (portid == RTE_MAX_ETHPORTS) {
rte_exit(EXIT_FAILURE, "cannot find an available port\n");
}
options.s_portid = portid;
// create a mbuf memory pool on the socket
mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", MBUF_MAX_COUNT * nb_ports, MBUF_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_eth_dev_socket_id(portid));
if (mbuf_pool == nullptr) {
rte_exit(EXIT_FAILURE, "cannot create mbuf pool\n");
}
options.s_pkt_mempool = mbuf_pool;
// init threads
uint64_t cpuset = options.cpuset;
for(int i = 0; i < options.num_threads; i++) {
struct thread_info * tinfo = new thread_info;
tinfo->tid = i;
tinfo->lcore_id = cmask_get_next_cpu(&cpuset);
options.s_thr_info.push_back(tinfo);
}
if (port_init(portid, mbuf_pool) != 0) {
rte_exit(EXIT_FAILURE, "cannot init port %d\n", portid);
}
if (rte_eth_macaddr_get(portid, &options.s_host_mac) != 0) {
rte_exit(EXIT_FAILURE, "cannot get mac address of port %d\n", portid);
}
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "Configured port %d on socket %d with mac addr %x:%x:%x:%x:%x:%x\n", portid, rte_eth_dev_socket_id(portid),
options.s_host_mac.addr_bytes[0],
options.s_host_mac.addr_bytes[1],
options.s_host_mac.addr_bytes[2],
options.s_host_mac.addr_bytes[3],
options.s_host_mac.addr_bytes[4],
options.s_host_mac.addr_bytes[5]);
usleep(S2US);
for(int i = 0; i < options.num_threads; i++) {
struct thread_info * tinfo = options.s_thr_info.at(i);
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: launching thread %d on locore %d\n", tinfo->tid, tinfo->lcore_id);
if (rte_eal_remote_launch(locore_main, (void *)options.s_thr_info.at(i), tinfo->lcore_id) != 0) {
rte_exit(EXIT_FAILURE, "failed to launch function on locore %d\n", tinfo->lcore_id);
}
}
for(int i = 0; i < options.num_threads; i++) {
struct thread_info * tinfo = options.s_thr_info.at(i);
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: waiting for locore %d...\n", tinfo->lcore_id);
if (rte_eal_wait_lcore(tinfo->lcore_id) != 0) {
rte_exit(EXIT_FAILURE, "failed to wait for locore %d\n", tinfo->lcore_id);
}
}
// shouldn't get here
return 0;
}

View File

@ -1,95 +1,74 @@
// modified from mutilate
#include "gen.hh"
#include "gen.h"
Generator *
createFacebookKey()
{
return new GEV(30.7984, 8.20449, 0.078688);
Generator* createFacebookKey() { return new GEV(30.7984, 8.20449, 0.078688); }
Generator* createFacebookValue() {
Generator* g = new GPareto(15.0, 214.476, 0.348238);
Discrete* d = new Discrete(g);
d->add(0.00536, 0.0);
d->add(0.00047, 1.0);
d->add(0.17820, 2.0);
d->add(0.09239, 3.0);
d->add(0.00018, 4.0);
d->add(0.02740, 5.0);
d->add(0.00065, 6.0);
d->add(0.00606, 7.0);
d->add(0.00023, 8.0);
d->add(0.00837, 9.0);
d->add(0.00837, 10.0);
d->add(0.08989, 11.0);
d->add(0.00092, 12.0);
d->add(0.00326, 13.0);
d->add(0.01980, 14.0);
return d;
}
Generator *
createFacebookValue()
{
Generator *g = new GPareto(15.0, 214.476, 0.348238);
Generator* createFacebookIA() { return new GPareto(0, 16.0292, 0.154971); }
Discrete *d = new Discrete(g);
d->add(0.00536, 0.0);
d->add(0.00047, 1.0);
d->add(0.17820, 2.0);
d->add(0.09239, 3.0);
d->add(0.00018, 4.0);
d->add(0.02740, 5.0);
d->add(0.00065, 6.0);
d->add(0.00606, 7.0);
d->add(0.00023, 8.0);
d->add(0.00837, 9.0);
d->add(0.00837, 10.0);
d->add(0.08989, 11.0);
d->add(0.00092, 12.0);
d->add(0.00326, 13.0);
d->add(0.01980, 14.0);
Generator* createGenerator(std::string str) {
if (!strcmp(str.c_str(), "fb_key")) return createFacebookKey();
else if (!strcmp(str.c_str(), "fb_value")) return createFacebookValue();
else if (!strcmp(str.c_str(), "fb_ia")) return createFacebookIA();
return d;
}
char *s_copy = new char[str.length() + 1];
strcpy(s_copy, str.c_str());
char *saveptr = NULL;
Generator *
createFacebookIA()
{
return new GPareto(0, 16.0292, 0.154971);
}
if (atoi(s_copy) != 0 || !strcmp(s_copy, "0")) {
double v = atof(s_copy);
delete[] s_copy;
return new Fixed(v);
}
Generator *
createGenerator(std::string str)
{
if (!strcmp(str.c_str(), "fb_key"))
return createFacebookKey();
else if (!strcmp(str.c_str(), "fb_value"))
return createFacebookValue();
else if (!strcmp(str.c_str(), "fb_ia"))
return createFacebookIA();
char *t_ptr = strtok_r(s_copy, ":", &saveptr);
char *a_ptr = strtok_r(NULL, ":", &saveptr);
char *s_copy = new char[str.length() + 1];
strcpy(s_copy, str.c_str());
char *saveptr = NULL;
if (t_ptr == NULL) // || a_ptr == NULL)
DIE("strtok(.., \":\") failed to parse %s", str.c_str());
if (atoi(s_copy) != 0 || !strcmp(s_copy, "0")) {
double v = atof(s_copy);
delete[] s_copy;
return new Fixed(v);
}
saveptr = NULL;
char *s1 = strtok_r(a_ptr, ",", &saveptr);
char *s2 = strtok_r(NULL, ",", &saveptr);
char *s3 = strtok_r(NULL, ",", &saveptr);
char *t_ptr = strtok_r(s_copy, ":", &saveptr);
char *a_ptr = strtok_r(NULL, ":", &saveptr);
double a1 = s1 ? atof(s1) : 0.0;
double a2 = s2 ? atof(s2) : 0.0;
double a3 = s3 ? atof(s3) : 0.0;
if (t_ptr == NULL) // || a_ptr == NULL)
DIE("strtok(.., \":\") failed to parse %s", str.c_str());
delete[] s_copy;
saveptr = NULL;
char *s1 = strtok_r(a_ptr, ",", &saveptr);
char *s2 = strtok_r(NULL, ",", &saveptr);
char *s3 = strtok_r(NULL, ",", &saveptr);
if (strcasestr(str.c_str(), "fixed")) return new Fixed(a1);
else if (strcasestr(str.c_str(), "normal")) return new Normal(a1, a2);
else if (strcasestr(str.c_str(), "exponential")) return new Exponential(a1);
else if (strcasestr(str.c_str(), "pareto")) return new GPareto(a1, a2, a3);
else if (strcasestr(str.c_str(), "gev")) return new GEV(a1, a2, a3);
else if (strcasestr(str.c_str(), "uniform")) return new Uniform(a1);
double a1 = s1 ? atof(s1) : 0.0;
double a2 = s2 ? atof(s2) : 0.0;
double a3 = s3 ? atof(s3) : 0.0;
DIE("Unable to create Generator '%s'", str.c_str());
delete[] s_copy;
if (strcasestr(str.c_str(), "fixed"))
return new Fixed(a1);
else if (strcasestr(str.c_str(), "normal"))
return new Normal(a1, a2);
else if (strcasestr(str.c_str(), "exponential"))
return new Exponential(a1);
else if (strcasestr(str.c_str(), "pareto"))
return new GPareto(a1, a2, a3);
else if (strcasestr(str.c_str(), "gev"))
return new GEV(a1, a2, a3);
else if (strcasestr(str.c_str(), "uniform"))
return new Uniform(a1);
DIE("Unable to create Generator '%s'", str.c_str());
return NULL;
return NULL;
}

View File

@ -1,276 +0,0 @@
#include <sys/types.h>
#include <sys/cpuset.h>
#include <sys/domainset.h>
#include <sys/endian.h>
#include <sys/thr.h>
#include <pthread.h>
#include <pthread_np.h>
#include <topo.h>
#include <unistd.h>
#include "nms.h"
#include "gen.hh"
#include <atomic>
void *
memload_generator::worker_thrd(void *_tinfo)
{
auto *tinfo = (struct thread_info *)_tinfo;
void *from_buffer, *to_buffer, *tmp;
if (tinfo->opts->shared_buffer) {
from_buffer = tinfo->from_buffer;
to_buffer = tinfo->to_buffer;
} else {
if (tinfo->opts->verbose) {
fprintf(stdout,
"memload_generator <thread %d>: allocating fbuf %lu bytes on domain %d...\n",
tinfo->tid, tinfo->opts->buffer_size,
topo_core_to_numa(tinfo->coreid));
}
from_buffer = nms_alloc_static(topo_core_to_numa(
tinfo->coreid),
tinfo->opts->buffer_size);
if (tinfo->opts->verbose) {
fprintf(stdout,
"memload_generator <thread %d>: allocating tbuf %lu bytes on domain %d...\n",
tinfo->tid, tinfo->opts->buffer_size, tinfo->target_dom);
}
to_buffer = nms_alloc_static(tinfo->target_dom,
tinfo->opts->buffer_size);
}
if (from_buffer == nullptr || to_buffer == nullptr) {
if (tinfo->opts->verbose) {
fprintf(stderr,
"memload_generator <thread %d>: failed to allocate memory\n",
tinfo->tid);
}
tinfo->init_status.store(-1);
return nullptr;
}
if (tinfo->pull) {
tmp = from_buffer;
from_buffer = to_buffer;
to_buffer = tmp;
}
// wait for other threads to init
if (tinfo->opts->verbose) {
fprintf(stdout, "memload_generator <thread %d, pull %d>: running...\n", tinfo->tid, tinfo->pull);
}
tinfo->init_status.store(1);
uint64_t next_ts = topo_uptime_ns();
size_t cur_offset = 0;
uint64_t cur_ts = 0;
while (true) {
switch (tinfo->state->load()) {
case STATE_RUN:
cur_ts = topo_uptime_ns();
if (cur_ts >= next_ts) {
if (cur_offset + tinfo->opts->transaction_size >
tinfo->opts->buffer_size) {
cur_offset = 0;
}
// for (uint i = 0; i < tinfo->opts->transaction_size; i++) {
// ((char *)to_buffer)[cur_offset + i] = ((char *)from_buffer)[cur_offset + i];
// }
memcpy((char *)to_buffer + cur_offset,
(char *)from_buffer + cur_offset,
tinfo->opts->transaction_size);
tinfo->num_trans.fetch_add(1);
if (tinfo->reset_ts.load(
std::memory_order_relaxed)) {
tinfo->reset_ts.store(false,
std::memory_order_relaxed);
next_ts = cur_ts;
}
next_ts += tinfo->ia_gen->generate() *
(double)S2NS;
cur_offset += tinfo->opts->transaction_size;
}
break;
case STATE_END:
goto end;
case STATE_RDY:
next_ts = topo_uptime_ns();
break;
case STATE_INIT:
default:
break;
}
}
end:
if (tinfo->opts->verbose) {
fprintf(stdout, "memload_generator <thread %d>: exiting...\n",
tinfo->tid);
}
if (!tinfo->opts->shared_buffer) {
nms_free_static(from_buffer, tinfo->opts->buffer_size);
nms_free_static(to_buffer, tinfo->opts->buffer_size);
}
return nullptr;
}
memload_generator::memload_generator(cpuset_t *threads, cpuset_t * modes, cpuset_t *target_domain,
struct memload_generator_options *opt, bool *success)
{
*success = false;
state.store(STATE_INIT);
std::memcpy(&this->opts, opt, sizeof(memload_generator_options));
int nextcore = CPU_FFS(threads) - 1;
int target_domain_id = CPU_FFS(target_domain) - 1;
int num_cores = CPU_COUNT(threads);
if (target_domain_id < 0 || num_cores == 0) {
return;
}
double thread_tps = (double)opt->trans_per_second / (double)num_cores;
void *local_buffer = nullptr;
void *target_buffer = nullptr;
int tid = 0;
if (opts.shared_buffer) {
local_buffer = nms_alloc_static(topo_core_to_numa(nextcore),
opt->buffer_size);
target_buffer = nms_alloc_static(target_domain_id,
opt->buffer_size);
if (local_buffer == nullptr || target_buffer == nullptr) {
*success = false;
goto end;
}
}
while (nextcore != -1) {
auto info = new struct thread_info;
cpuset_t cpuset;
pthread_attr_t attr;
info->ia_gen = createGenerator(opts.ia_dist);
if (info->ia_gen == nullptr) {
goto end;
}
info->ia_gen->set_lambda(thread_tps);
info->init_status.store(0);
info->state = &this->state;
info->reset_ts.store(false, std::memory_order_relaxed);
info->num_trans.store(0);
info->opts = &this->opts;
info->tid = tid;
info->coreid = nextcore;
info->target_dom = target_domain_id;
info->from_buffer = local_buffer;
info->to_buffer = target_buffer;
info->pull = CPU_ISSET(nextcore, modes);
CPU_ZERO(&cpuset);
CPU_SET(nextcore, &cpuset);
pthread_attr_init(&attr);
pthread_attr_setaffinity_np(&attr, sizeof(cpuset_t), &cpuset);
pthread_create(&info->pthr, &attr, worker_thrd, info);
if (opts.verbose) {
fprintf(stdout,
"memload_generator: created thread %d on core %d target domain %d\n",
tid, nextcore, target_domain_id);
}
thr_infos.push_back(info);
CPU_CLR(nextcore, threads);
nextcore = CPU_FFS(threads) - 1;
tid++;
}
for (auto tinfo : thr_infos) {
int status;
while ((status = tinfo->init_status.load()) != 1) {
if (status == -1) {
state.store(STATE_END);
*success = false;
goto end;
}
}
}
state.store(STATE_RDY);
*success = true;
end:
if (opts.verbose) {
fprintf(stdout,
"memload_generator: exiting constructor. Success: %d...\n",
success ? 1 : 0);
}
}
bool
memload_generator::start()
{
if (this->state.load() == STATE_RDY) {
this->state.store(memload_generator::STATE_RUN);
return true;
}
return false;
}
bool
memload_generator::stop()
{
if (this->state.load() == STATE_RUN) {
this->state.store(memload_generator::STATE_RDY);
return true;
}
return false;
}
bool
memload_generator::set_transactions(uint64_t tps)
{
if (this->state.load() != STATE_END &&
this->state.load() != STATE_INIT) {
for (unsigned int i = 0; i < thr_infos.size(); i++) {
thr_infos.at(i)->ia_gen->set_lambda(
(double)tps / (double)thr_infos.size());
thr_infos.at(i)->reset_ts.store(true,
std::memory_order_relaxed);
}
return true;
}
return false;
}
uint64_t
memload_generator::get_transactions()
{
uint64_t total_transactions = 0;
for (auto i : thr_infos) {
total_transactions += i->num_trans.load();
}
return total_transactions;
}
memload_generator::~memload_generator()
{
void *buf1, *buf2;
this->state.store(STATE_END);
for (auto i : thr_infos) {
// XXX: nms_free regions
pthread_join(i->pthr, NULL);
buf1 = i->from_buffer;
buf2 = i->to_buffer;
delete i;
}
if (opts.shared_buffer) {
nms_free_static(buf1, opts.buffer_size);
nms_free_static(buf2, opts.buffer_size);
}
}

127
libnm/nm.cc Normal file
View File

@ -0,0 +1,127 @@
#include <hwloc.h>
#include <vector>
#include <algorithm>
#include "nm.h"
struct nm_obj {
int level;
int id;
struct nm_obj *parent;
std::vector<struct nm_obj *> children;
};
static bool nm_obj_comparator(struct nm_obj * a, struct nm_obj * b)
{
return a->id < b->id;
}
static std::vector<struct nm_obj *> nodes;
static std::vector<struct nm_obj *> cores;
static std::vector<struct nm_obj *> cpus;
std::vector<struct nm_obj *> * nm_get_nodes()
{
return &nodes;
}
std::vector<struct nm_obj *> * nm_get_cpus()
{
return &cpus;
}
std::vector<struct nm_obj *> * nm_get_cores()
{
return &cores;
}
hwloc_obj_t get_parent_type(hwloc_obj_t obj, hwloc_obj_type_t type)
{
while(obj != nullptr) {
if (obj->type == type) {
break;
}
obj = obj->parent;
}
return obj;
}
// 0 on success
// -1 on error
int nm_init()
{
int ret;
hwloc_topology * topo;
if ((ret = hwloc_topology_init(&topo)) != 0) {
return ret;
}
if ((ret = hwloc_topology_load(topo)) != 0)
return ret;
// populate numa nodes
hwloc_obj_t obj = nullptr;
while(1) {
obj = hwloc_get_next_obj_by_type(topo, HWLOC_OBJ_PACKAGE, obj);
if (obj == nullptr) {
break;
}
struct nm_obj * each = new struct nm_obj;
each->id = obj->logical_index;
each->level = NM_LEVEL_NUMA;
each->parent = nullptr;
nodes.push_back(each);
printf("libnm: identified NUMA node %d\n", each->id);
}
std::sort(nodes.begin(), nodes.end(), nm_obj_comparator);
// populate cpus
obj = nullptr;
while(1) {
obj = hwloc_get_next_obj_by_type(topo, HWLOC_OBJ_CORE, obj);
if (obj == nullptr) {
break;
}
struct nm_obj * each = new struct nm_obj;
each->id = obj->logical_index;
each->level = NM_LEVEL_CPU;
hwloc_obj_t parent = get_parent_type(obj, HWLOC_OBJ_PACKAGE);
if (parent == nullptr) {
return -1;
}
// XXX: this faults if the OS decides to be stupid
each->parent = nodes.at(parent->logical_index);
each->parent->children.push_back(each);
cpus.push_back(each);
printf("libnm: identified CPU %d on NUMA node %d\n", each->id, each->parent->id);
}
std::sort(cpus.begin(), cpus.end(), nm_obj_comparator);
// populate cores
obj = nullptr;
while(1) {
obj = hwloc_get_next_obj_by_type(topo, HWLOC_OBJ_PU, obj);
if (obj == nullptr) {
break;
}
struct nm_obj * each = new struct nm_obj;
each->id = obj->logical_index;
each->level = NM_LEVEL_CORE;
hwloc_obj_t parent = get_parent_type(obj, HWLOC_OBJ_CORE);
if (parent == nullptr) {
return -1;
}
// XXX: this faults if the OS decides to be stupid
each->parent = cpus.at(parent->logical_index);
each->parent->children.push_back(each);
cores.push_back(each);
printf("libnm: identified core %d on CPU %d, NUMA node %d\n", each->id, each->parent->id, each->parent->parent->id);
}
std::sort(cores.begin(), cores.end(), nm_obj_comparator);
return ret;
}

View File

@ -1,205 +0,0 @@
#include <pthread.h>
#include <sys/types.h>
#include <sys/cpuset.h>
#include <sys/domainset.h>
#include <sys/thr.h>
#include <sys/mman.h>
#include <stdint.h>
#include <stdio.h>
#include <errno.h>
#include <stdatomic.h>
#include <string.h>
#include <assert.h>
#include <nms.h>
#define MAX_NUMA_DOMAINS (64)
#define MAX_REGIONS (64)
#define REGION_SIZE (1024 * 1024 * 1024)
#define PAGE_SIZE (4096)
struct nms_region {
uintptr_t start_addr;
size_t size;
size_t occupied;
};
struct nms_desc {
// alloc
pthread_mutex_t alloc_lock;
struct nms_region regions[MAX_NUMA_DOMAINS][MAX_REGIONS];
int region_sz[MAX_NUMA_DOMAINS];
};
static _Atomic(int) initialized = 0;
static struct nms_desc g_desc;
void
nms_free_static(void * buf, size_t sz)
{
munmap(buf, sz);
return;
}
void *
nms_alloc_static(int node_id, size_t sz)
{
long tid;
domainset_t orig_dom;
int orig_policy;
void * region;
thr_self(&tid);
DOMAINSET_ZERO(&orig_dom);
// save existing thread's allocation strategy
int ret = cpuset_getdomain(CPU_LEVEL_WHICH, CPU_WHICH_TID, tid, sizeof(orig_dom), &orig_dom, &orig_policy);
if (ret != 0) {
fprintf(stderr, "libnms: cpuset_getdomain failed with %d\n", errno);
return NULL;
}
domainset_t tmp_domain;
DOMAINSET_ZERO(&tmp_domain);
DOMAINSET_SET(node_id, &tmp_domain);
ret = cpuset_setdomain(CPU_LEVEL_WHICH, CPU_WHICH_TID, tid, sizeof(tmp_domain), &tmp_domain, DOMAINSET_POLICY_ROUNDROBIN);
if (ret != 0) {
fprintf(stderr, "libnms: cpuset_setdomain failed with %d\n", errno);
return NULL;
}
if ((region = mmap(NULL, sz, PROT_READ | PROT_WRITE, MAP_ANON | MAP_ALIGNED_SUPER | MAP_NOCORE | MAP_PRIVATE | MAP_PREFAULT_READ, -1, 0)) == MAP_FAILED) {
fprintf(stderr, "libnms: mmap failed with %d\n", errno);
return NULL;
}
// touch the pages to prefault the pages
int sum;
for (size_t i = 0; i < sz; i++) {
sum += *(uint8_t *)((char *)region + i);
*(uint8_t *)((char *)region + i) = i;
}
// restore existing thread's allocation strategy
ret = cpuset_setdomain(CPU_LEVEL_WHICH, CPU_WHICH_TID, tid, sizeof(orig_dom), &orig_dom, orig_policy);
if (ret != 0) {
fprintf(stderr, "libnms: cpuset_setdomain failed with %d\n", errno);
munmap(region, REGION_SIZE);
return NULL;
}
return region;
}
static int
nms_desc_init(struct nms_desc * desc, int verbose)
{
memset(desc, 0, sizeof(struct nms_desc));
pthread_mutex_init(&desc->alloc_lock, NULL);
return 0;
}
static void *
nms_region_malloc(struct nms_region * region, size_t size)
{
void * ret = NULL;
if (region->size >= region->occupied + size) {
ret = (void *)(region->start_addr + region->occupied);
region->occupied += size;
region->occupied = (region->occupied + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
}
return ret;
}
static int
nms_desc_add_region(struct nms_desc * desc, int nodeid, size_t size)
{
void * ret;
int idx;
ret = nms_alloc_static(nodeid, REGION_SIZE);
if (ret == NULL) {
fprintf(stderr, "libnms: failed to allocate region on node %d\n", nodeid);
return ENOMEM;
}
desc->region_sz[nodeid]++;
idx = desc->region_sz[nodeid] - 1;
desc->regions[nodeid][idx].start_addr = (uintptr_t)ret;
desc->regions[nodeid][idx].occupied = 0;
desc->regions[nodeid][idx].size = REGION_SIZE;
return 0;
}
static void *
nms_desc_malloc(struct nms_desc * desc, unsigned int nodeid, size_t size)
{
void * ret = NULL;
int idx;
int new_region = 0;
if (size > REGION_SIZE) {
return NULL;
}
pthread_mutex_lock(&desc->alloc_lock);
retry:
if (desc->region_sz[nodeid] > 0) {
idx = desc->region_sz[nodeid] - 1;
ret = nms_region_malloc(&desc->regions[nodeid][idx], size);
}
if (ret == NULL) {
// we need a new region
if (nms_desc_add_region(desc, nodeid, REGION_SIZE) != 0) {
pthread_mutex_unlock(&desc->alloc_lock);
return NULL;
}
fprintf(stdout, "libnms: malloc request of size %zu -> allocated new region on node %d\n", size, nodeid);
goto retry;
}
pthread_mutex_unlock(&desc->alloc_lock);
return ret;
}
static void
nms_desc_free(struct nms_desc * desc __attribute__((unused)), unsigned int node __attribute__((unused)), void * addr __attribute__((unused)))
{
// dummy function
}
int
nms_init(int verbose)
{
int expected = 0;
if (atomic_compare_exchange_strong(&initialized, &expected, 2)) {
nms_desc_init(&g_desc, verbose);
atomic_store(&initialized, 1);
} else {
while(atomic_load(&initialized) != 1) {
}
fprintf(stdout,"libnms: already initialized.\n");
}
return 0;
}
void *
nms_malloc(int nodeid, size_t sz)
{
assert(atomic_load(&initialized) == 1);
return nms_desc_malloc(&g_desc, nodeid, sz);
}
void
nms_free(int nodeid, void * addr)
{
assert(atomic_load(&initialized) == 1);
nms_desc_free(&g_desc, nodeid, addr);
}

View File

@ -1,46 +1,43 @@
#include "ntr.h"
static int ntr_log_levels[NTR_DEP_MAX] = { NTR_LEVEL_DEFAULT };
static FILE *ntr_out;
static int ntr_log_levels[NTR_DEP_MAX] = {NTR_LEVEL_DEFAULT};
static FILE * ntr_out;
void
ntr_init()
void ntr_init()
{
ntr_out = stdout;
ntr_out = stdout;
}
void
ntr(int dep, int level, const char *fmt, ...)
void ntr(int dep, int level, const char * fmt, ...)
{
va_list vl;
va_start(vl, fmt);
if (dep < NTR_DEP_MAX && level <= ntr_log_levels[dep]) {
vfprintf(ntr_out, fmt, vl);
}
va_end(vl);
va_list vl;
va_start(vl, fmt);
if (dep < NTR_DEP_MAX && level <= ntr_log_levels[dep]) {
vfprintf(ntr_out, fmt, vl);
}
va_end(vl);
}
void
ntr_set_level(int dep, int level)
void ntr_set_level(int dep, int level)
{
if (dep < NTR_DEP_MAX) {
ntr_log_levels[dep] = level;
}
if (dep < NTR_DEP_MAX) {
ntr_log_levels[dep] = level;
}
}
void
ntr_set_output(FILE *f)
void ntr_set_output(FILE * f)
{
if (f != NULL) {
ntr_out = f;
}
if (f != NULL) {
ntr_out = f;
}
}
int
ntr_get_level(int dep)
int ntr_get_level(int dep)
{
if (dep < NTR_DEP_MAX) {
return ntr_log_levels[dep];
}
return 0;
if (dep < NTR_DEP_MAX) {
return ntr_log_levels[dep];
}
return 0;
}

View File

@ -1,989 +0,0 @@
#include <atomic>
#include <cstdlib>
#include <ctime>
#include <fstream>
#include <random>
#include <vector>
#include <topo.h>
#include <rte_byteorder.h>
#include <rte_common.h>
#include <rte_config.h>
#include <rte_cycles.h>
#include <rte_eal.h>
#include <rte_ethdev.h>
#include <rte_ether.h>
#include <rte_launch.h>
#include <rte_lcore.h>
#include <rte_mbuf.h>
#include <unistd.h>
#include "ntr.h"
#include "gen.hh"
#include "net/netsup.hh"
#include "net/pkt.hh"
#include "nms.h"
constexpr static unsigned int BURST_SIZE = 32;
constexpr static unsigned int MAX_SLAVES = 32;
constexpr static unsigned int SLAVES_MAX_WAIT_MS = 1000;
struct datapt {
uint32_t epoch;
uint32_t valid;
uint64_t clt_hw_tx;
uint64_t clt_sw_tx;
uint64_t clt_hw_rx;
uint64_t clt_sw_rx;
uint64_t srv_hw_tx;
uint64_t srv_sw_tx;
uint64_t srv_hw_rx;
uint64_t srv_sw_rx;
};
constexpr static uint32_t STATE_WAIT = 0; // waiting for sending
constexpr static uint32_t STATE_SENT = 1; // we sent a packet
constexpr static uint32_t STATE_COMPLETE = 2; // we received everything
constexpr static uint32_t STATE_PKTLOSS = 3; // last packet sent was lost
struct options_t {
// parameters
unsigned int run_time { 5 };
unsigned int warmup_time { 3 };
char output[256] = "output.txt";
char ia_gen_str[256] = "fixed";
unsigned int target_qps { 0 };
unsigned int master_mode { 0 };
struct net_spec server_spec { };
cpuset_t cpu_set = CPUSET_T_INITIALIZER(0x2); // 2nd core
std::vector<struct net_spec *> slaves;
uint32_t pkt_loss_failure_threshold { 0 };
uint32_t pkt_loss_time_ms { UINT32_MAX };
int portid { 0 };
// states
struct net_spec s_host_spec { };
struct conn_spec s_host_conn {
.src = &s_host_spec, .dst = &server_spec, .dst_port = POU_PORT
};
unsigned int s_rxqid { 0 };
unsigned int s_txqid { 0 };
unsigned int s_socketid { 0 };
// for qps calculation
std::atomic<uint32_t> s_recved_pkts { 0 };
std::atomic<uint32_t> s_pkt_loss { 0 };
std::atomic<uint64_t> s_start_time { 0 };
std::atomic<uint64_t> s_end_time { 0 };
std::atomic<uint32_t> s_slave_qps { 0 };
std::atomic<uint32_t> s_slave_recved { 0 };
std::atomic<uint32_t> s_slave_loss { 0 };
uint32_t s_state { STATE_WAIT };
bool s_hwtimestamp { true };
Generator *s_iagen { nullptr };
std::vector<struct datapt *> s_data;
struct datapt *s_last_datapt { nullptr };
uint32_t s_epoch { 0 };
std::atomic<bool> s_stop { false };
std::atomic<uint32_t> s_record { 0 };
};
static struct options_t options;
static uint16_t
rx_add_timestamp(uint16_t port, uint16_t qidx __rte_unused,
struct rte_mbuf **pkts, uint16_t nb_pkts, uint16_t max_pkts __rte_unused,
void *_ __rte_unused)
{
uint64_t now = topo_uptime_ns();
struct pkt_hdr *pkt_data;
struct timespec ts { };
int ret;
if (options.s_state != STATE_SENT) {
return nb_pkts;
}
for (int i = 0; i < nb_pkts; i++) {
pkt_data = check_valid_packet(pkts[i],
&options.s_host_spec.mac_addr);
if (pkt_data == nullptr) {
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
"rx_add_timestamp: ignoring invalid packet 0x%p.\n",
(void *)pkts[i]);
continue;
}
if (rte_be_to_cpu_16(pkt_data->type) == PKT_TYPE_PROBE_RESP) {
uint32_t epoch = rte_be_to_cpu_32(
((struct pkt_payload_epoch *)pkt_data->payload)
->epoch);
if (options.s_last_datapt != nullptr &&
options.s_last_datapt->epoch == epoch) {
if (options.s_hwtimestamp) {
if ((ret = rte_eth_timesync_read_rx_timestamp(
port, &ts, pkts[i]->timesync & 0x3)) ==
0) {
// has hw rx timestamp
options.s_last_datapt->clt_hw_rx =
ts.tv_sec * S2NS + ts.tv_nsec;
options.s_last_datapt->clt_sw_rx = now;
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
"rx_add_timestamp: tagged packet %p with sw: %lu hw: %lu.\n",
(void *)pkts[i], now,
options.s_last_datapt->clt_hw_rx);
} else {
rte_exit(EXIT_FAILURE,
"rx_add_timestamp: packet %p not tagged - hw ts not "
"available - %d.\n",
(void *)pkts[i], ret);
}
} else {
options.s_last_datapt->clt_sw_rx = now;
options.s_last_datapt->clt_hw_rx = 0;
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
"rx_add_timestamp: tagged packet %p with sw: %lu hw: (disabled).\n",
(void *)pkts[i], now);
}
} else {
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING,
"rx_add_timestamp: packet %p epoch %d != last epoch %d.\n",
(void *)pkts[i], epoch,
options.s_last_datapt->epoch);
}
} else {
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
"rx_add_timestamp: packet %p not tagged - type %d.\n",
(void *)pkts[i], rte_be_to_cpu_16(pkt_data->type));
}
}
return nb_pkts;
}
static uint16_t
tx_add_timestamp(uint16_t port __rte_unused, uint16_t qidx __rte_unused,
struct rte_mbuf **pkts, uint16_t nb_pkts, void *_ __rte_unused)
{
uint64_t now = topo_uptime_ns();
struct pkt_hdr *pkt_data;
// if (options.s_state != STATE_SENT) {
// return nb_pkts;
// }
for (int i = 0; i < nb_pkts; i++) {
pkt_data = check_valid_packet(pkts[i],
&options.s_host_spec.mac_addr);
if (pkt_data == nullptr) {
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
"tx_add_timestamp: ignoring invalid packet 0x%p.\n",
(void *)pkts[i]);
continue;
}
if (rte_be_to_cpu_16(pkt_data->type) == PKT_TYPE_PROBE) {
uint32_t epoch = rte_be_to_cpu_32(
((struct pkt_payload_epoch *)pkt_data->payload)
->epoch);
if (options.s_last_datapt == nullptr ||
epoch != options.s_last_datapt->epoch) {
rte_exit(EXIT_FAILURE,
"tx_add_timestamp: packet epoch %d != last epoch %d\n",
epoch, options.s_last_datapt->epoch);
}
options.s_last_datapt->clt_sw_tx = now;
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
"tx_add_timestamp: tagged packet %p with sw: %lu.\n",
(void *)pkts[i], now);
} else {
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
"tx_add_timestamp: packet %p not tagged - type %d.\n",
(void *)pkts[i], pkt_data->type);
}
}
return nb_pkts;
}
// returns 0 on success
static void
send_all_slaves(uint16_t type)
{
struct rte_mbuf *tx_bufs[MAX_SLAVES];
//struct rte_eth_stats stats;
struct conn_spec cspec;
cspec.src = &options.s_host_spec;
cspec.dst_port = DEFAULT_RAT_PORT;
cspec.src_port = DEFAULT_RAT_PORT;
// send all clients SYNC
for (unsigned int i = 0; i < options.slaves.size(); i++) {
struct pkt_hdr *hdr;
cspec.dst = options.slaves.at(i);
if (alloc_pkt_hdr(mempool_get(options.s_socketid), type, &cspec, 0,
&tx_bufs[i], &hdr) != 0) {
rte_exit(EXIT_FAILURE, "failed to alloc packet\n");
}
}
// if (rte_eth_stats_get(options.portid, &stats) != 0 ) {
// rte_exit(EXIT_FAILURE, "failed!");
// }
// printf("send_all_slaves: ipackets %lu, opackets %lu, ierrors %lu, oerrors %lu\n", stats.ipackets, stats.opackets, stats.ierrors, stats.oerrors);
if (rte_eth_tx_burst(options.portid, options.s_txqid, tx_bufs,
options.slaves.size()) != options.slaves.size()) {
rte_exit(EXIT_FAILURE, "failed to send some packets\n");
}
}
// sizeof mbuf must >= MAX_SLAVES
// this function fills up to #slave
static void
wait_for_slaves(uint16_t etype, struct rte_mbuf **out)
{
struct rte_mbuf *tx_bufs[MAX_SLAVES];
bool stop = false;
const uint64_t start = topo_uptime_ns();
std::vector<struct rte_ether_addr *> recved;
uint32_t tot = 0;
while (!stop) {
uint64_t now = topo_uptime_ns();
const uint16_t nb_rx = rte_eth_rx_burst(options.portid,
options.s_rxqid, tx_bufs, MAX_SLAVES);
if (nb_rx > 0) {
for (unsigned int i = 0; i < nb_rx; i++) {
struct pkt_hdr *each = check_valid_packet(
tx_bufs[i], &options.s_host_spec.mac_addr);
uint16_t type;
if (each == nullptr) {
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
"wait_for_slaves: ignoring invalid packet %p.\n",
(void *)tx_bufs[i]);
goto end_loop;
}
type = rte_be_to_cpu_16(each->type);
if (type == etype) {
bool invalid = true;
// check if it is from one of our
// clients
for (auto eaddr : options.slaves) {
if (rte_is_same_ether_addr(
&eaddr->mac_addr,
&each->eth_hdr
.src_addr)) {
invalid = false;
break;
}
}
if (invalid) {
// received invalid packet from
// unregistered slave
ntr(NTR_DEP_USER1,
NTR_LEVEL_WARNING,
"wait_for_slaves: invalid packet %p from unregistered slave\n.",
tx_bufs[i]);
goto end_loop;
}
invalid = false;
// check if we have already received the
// same packet from the mac addr
for (auto eaddr : recved) {
if (rte_is_same_ether_addr(
eaddr,
&each->eth_hdr
.src_addr)) {
invalid = true;
break;
}
}
if (invalid) {
// received invalid packet from
// the same slave
ntr(NTR_DEP_USER1,
NTR_LEVEL_WARNING,
"wait_for_slaves: invalid packet %p - duplicated\n.",
tx_bufs[i]);
goto end_loop;
}
recved.push_back(
&each->eth_hdr.src_addr);
if (recved.size() ==
options.slaves.size()) {
stop = true;
}
if (out != nullptr) {
out[tot] = tx_bufs[i];
tot++;
// don't free this packet
continue;
}
} else {
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
"wait_for_slaves: ignoring invalid packet %p type %d.\n",
(void *)tx_bufs[i], type);
}
end_loop:
rte_pktmbuf_free(tx_bufs[i]);
}
}
// struct rte_eth_stats stats;
// if (rte_eth_stats_get(options.portid, &stats) != 0 ) {
// rte_exit(EXIT_FAILURE, "failed!");
// }
//printf("wait_slaves <AFTER>: ipackets %lu, opackets %lu, ierrors %lu, oerrors %lu\n", stats.ipackets, stats.opackets, stats.ierrors, stats.oerrors);
if (now - start > SLAVES_MAX_WAIT_MS * MS2NS) {
rte_exit(EXIT_FAILURE,
"cat: waiting for too long %d. I QUIT!!", etype);
}
}
}
static void
pkt_loop()
{
struct rte_mbuf *tx_buf;
struct rte_mbuf *rx_bufs[BURST_SIZE];
struct pkt_hdr *pkt_data;
rdport_generator port_gen(MIN_RANDOM_PORT);
bool read_tx = true;
bool recv_stat = true;
bool recv_resp = true;
if (rte_eth_dev_socket_id(options.portid) > 0 &&
rte_eth_dev_socket_id(options.portid) != (int)rte_socket_id()) {
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING,
"locore_main: WARNING, port %d is on remote NUMA node to "
"polling thread.\n\tPerformance will "
"not be optimal.\n",
options.portid);
}
uint64_t next_ts = topo_uptime_ns();
uint64_t last_send_ts = next_ts;
bool is_last_pkt_lost = false;
uint32_t num_cts_pkt_lost = 0;
while (!options.s_stop.load()) {
uint64_t now = topo_uptime_ns();
// always pop incoming packets
const uint16_t nb_rx = rte_eth_rx_burst(options.portid,
options.s_rxqid, rx_bufs, BURST_SIZE);
if (nb_rx > 0) {
for (int i = 0; i < nb_rx; i++) {
if (options.s_state != STATE_SENT) {
// only need to process packets after we
// sent one
rte_pktmbuf_free(rx_bufs[i]);
continue;
}
struct pkt_hdr *each = check_valid_packet(
rx_bufs[i], &options.s_host_spec.mac_addr);
if (each == nullptr) {
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
"locore_main: ignoring invalid packet %p.\n",
(void *)rx_bufs[i]);
rte_pktmbuf_free(rx_bufs[i]);
continue;
}
uint16_t type = rte_be_to_cpu_16(each->type);
NTR_PKT(NTR_DEP_USER1, NTR_LEVEL_DEBUG, each,
"locore_main: received packet %p ", each);
struct pkt_payload_epoch *pld_epoch;
struct pkt_payload_stat *pld_stat;
uint32_t epoch;
switch (type) {
case PKT_TYPE_PROBE_RESP:
pld_epoch = (struct pkt_payload_epoch *)
each->payload;
epoch = rte_be_to_cpu_32(
pld_epoch->epoch);
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "lcore_main: PROBE_RESP received packet %p epoch %d\n", each, epoch);
if (options.s_last_datapt == nullptr ||
epoch !=
options.s_last_datapt->epoch) {
ntr(NTR_DEP_USER1,
NTR_LEVEL_WARNING,
"locore_main: packet %p epoch %d doesn't match datapt %d.\n",
(void *)rx_bufs[i], epoch,
options.s_last_datapt
->epoch);
break;
}
recv_resp = true;
break;
case PKT_TYPE_STAT:
pld_stat = (struct pkt_payload_stat *)
each->payload;
epoch = rte_be_to_cpu_32(
pld_stat->epoch);
if (options.s_last_datapt == nullptr ||
epoch !=
options.s_last_datapt->epoch) {
ntr(NTR_DEP_USER1,
NTR_LEVEL_WARNING,
"locore_main: packet %p epoch %d doesn't match datapt %d.\n",
(void *)rx_bufs[i], epoch,
options.s_last_datapt
->epoch);
break;
}
options.s_last_datapt->srv_hw_tx =
rte_be_to_cpu_64(pld_stat->hw_tx);
options.s_last_datapt->srv_hw_rx =
rte_be_to_cpu_64(pld_stat->hw_rx);
options.s_last_datapt->srv_sw_tx =
rte_be_to_cpu_64(pld_stat->sw_tx);
options.s_last_datapt->srv_sw_rx =
rte_be_to_cpu_64(pld_stat->sw_rx);
recv_stat = true;
is_last_pkt_lost = false;
break;
default:
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
"locore_main: ignoring packet %p with unknown type %d.\n",
(void *)rx_bufs[i], type);
}
rte_pktmbuf_free(rx_bufs[i]);
}
}
if (options.s_state == STATE_SENT) {
// check if hw tx ts is read
if (!read_tx) {
int ret;
struct timespec ts;
if (options.s_hwtimestamp) {
if ((ret = rte_eth_timesync_read_tx_timestamp(
options.portid, &ts)) == 0) {
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
"locore_main: read hw tx timestamp %lu.\n",
(ts.tv_nsec + ts.tv_sec * S2NS));
options.s_last_datapt->clt_hw_tx =
ts.tv_nsec + ts.tv_sec * S2NS;
read_tx = true;
}
} else {
options.s_last_datapt->clt_hw_tx = 0;
read_tx = true;
}
}
if (read_tx && recv_resp && recv_stat) {
options.s_state = STATE_COMPLETE;
} else {
// check packet loss
if (now - last_send_ts >
options.pkt_loss_time_ms * MS2NS) {
if (is_last_pkt_lost) {
num_cts_pkt_lost++;
} else {
is_last_pkt_lost = true;
num_cts_pkt_lost = 1;
}
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
"locore_main: packet loss: waiting too long for epoch %d. %d in a row.\n",
options.s_last_datapt->epoch,
num_cts_pkt_lost);
delete options.s_last_datapt;
options.s_last_datapt = nullptr;
options.s_state = STATE_PKTLOSS;
options.s_pkt_loss.fetch_add(1);
if (num_cts_pkt_lost >
options
.pkt_loss_failure_threshold) {
rte_exit(EXIT_FAILURE,
"too many continuous packet loss detected\n");
}
}
}
}
if (options.s_state == STATE_COMPLETE ||
options.s_state == STATE_PKTLOSS ||
options.s_state == STATE_WAIT) {
if (options.s_state == STATE_COMPLETE) {
options.s_data.push_back(options.s_last_datapt);
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
"locore_main: datapt for epoch %d dump:\n"
" Valid: %d\n"
" client TX HW: %lu\n"
" client TX SW: %lu\n"
" client RX HW: %lu\n"
" client RX SW: %lu\n"
" server TX HW: %lu\n"
" server TX SW: %lu\n"
" server RX HW: %lu\n"
" server RX SW: %lu\n\n",
options.s_last_datapt->epoch,
options.s_last_datapt->valid,
options.s_last_datapt->clt_hw_tx,
options.s_last_datapt->clt_sw_tx,
options.s_last_datapt->clt_hw_rx,
options.s_last_datapt->clt_sw_rx,
options.s_last_datapt->srv_hw_tx,
options.s_last_datapt->srv_sw_tx,
options.s_last_datapt->srv_hw_rx,
options.s_last_datapt->srv_sw_rx);
options.s_recved_pkts.fetch_add(1);
options.s_last_datapt = nullptr;
}
options.s_state = STATE_WAIT;
if (now >= next_ts) {
struct pkt_payload_epoch *pld_epoch;
uint32_t epoch;
next_ts += (int)(options.s_iagen->generate() *
S2NS);
options.s_host_conn.src_port = port_gen.next();
if (alloc_pkt_hdr(mempool_get(options.s_socketid),
PKT_TYPE_PROBE, &options.s_host_conn, 0,
&tx_buf, &pkt_data) != 0) {
rte_exit(EXIT_FAILURE,
"failed to alloc probe packet.\n");
}
epoch = options.s_epoch;
options.s_epoch++;
pld_epoch = (struct pkt_payload_epoch *)
pkt_data->payload;
pld_epoch->epoch = rte_cpu_to_be_32(epoch);
options.s_last_datapt = new struct datapt;
options.s_last_datapt->epoch = epoch;
options.s_last_datapt->valid =
options.s_record.load();
last_send_ts = now;
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
"locore_main: sending packet 0x%p with epoch %d\n",
(void *)tx_buf, epoch);
const uint16_t nb_tx =
rte_eth_tx_burst(options.portid,
options.s_txqid, &tx_buf, 1);
if (nb_tx != 1) {
rte_exit(EXIT_FAILURE,
"failed to send packet 0x%p, epoch %d\n",
(void *)tx_buf, epoch);
}
rte_pktmbuf_free(tx_buf);
read_tx = false;
recv_resp = false;
recv_stat = false;
options.s_state = STATE_SENT;
}
}
}
}
static int
locore_main(void *tif __rte_unused)
{
struct rte_mbuf *mbufs[MAX_SLAVES];
uint32_t core_id = rte_lcore_id();
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "locore_main: core %d running...\n",
core_id);
if (options.master_mode == 1) {
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
"locore_main: sending SYNC ...\n");
send_all_slaves(PKT_TYPE_SYNC);
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
"locore_main: waiting for SYNC_ACK ...\n");
wait_for_slaves(PKT_TYPE_SYNC_ACK, nullptr);
}
options.s_start_time.store(topo_uptime_ns());
pkt_loop();
options.s_end_time.store(topo_uptime_ns());
if (options.master_mode == 1) {
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
"locore_main: sending FIN ...\n");
send_all_slaves(PKT_TYPE_FIN);
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
"locore_main: waiting for FIN_ACK ...\n");
wait_for_slaves(PKT_TYPE_FIN_ACK, mbufs);
// aggregate slave QPS
for (unsigned int i = 0; i < options.slaves.size(); i++) {
// these packets already underwent validity check in
// wait_for_slaves
auto pkt_hdr = rte_pktmbuf_mtod(mbufs[i],
struct pkt_hdr *);
auto pld_qps = (struct pkt_payload_qps *)
pkt_hdr->payload;
uint32_t qps = rte_be_to_cpu_32(pld_qps->qps);
uint32_t recved = rte_be_to_cpu_32(
pld_qps->recved_pkts);
uint32_t loss = rte_be_to_cpu_32(pld_qps->lost_pkts);
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
"locore_main: received qps %d from client %d\n",
qps, i);
options.s_slave_qps.fetch_add(qps);
options.s_slave_loss.fetch_add(loss);
options.s_slave_recved.fetch_add(recved);
rte_pktmbuf_free(mbufs[i]);
}
}
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "locore_main: exited\n");
return 0;
}
static void
dump_options()
{
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO,
"Configuration:\n"
" verbosity = +%d\n"
" run time = %d\n"
" warmup time = %d\n"
" output file = %s\n"
" number of threads = %d\n"
" interarrival dist = %s\n"
" target qps = %d\n"
" host IP = 0x%x\n"
" pkt loss time = %u\n"
" pkt loss failure threshold = %u\n"
" portid = %d\n",
ntr_get_level(NTR_DEP_USER1) - NTR_LEVEL_WARNING, options.run_time,
options.warmup_time, options.output, CPU_COUNT(&options.cpu_set),
options.ia_gen_str, options.target_qps, options.s_host_spec.ip,
options.pkt_loss_time_ms, options.pkt_loss_failure_threshold,
options.portid);
for (auto slave : options.slaves) {
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO,
" slave = 0x%x@%x:%x:%x:%x:%x:%x\n", slave->ip,
slave->mac_addr.addr_bytes[0],
slave->mac_addr.addr_bytes[1],
slave->mac_addr.addr_bytes[2],
slave->mac_addr.addr_bytes[3],
slave->mac_addr.addr_bytes[4],
slave->mac_addr.addr_bytes[5]);
}
}
static void
usage()
{
fprintf(stdout,
"Usage:\n"
" -v(vv): verbose mode\n"
" -s: server net spec\n"
" -S: slave(rat)'s net spec (also turns on master mode)\n"
" -t: run time\n"
" -T: warmup time\n"
" -h: display the information\n"
" -o: output filename\n"
" -A: affinity mask\n"
" -i: inter-arrival time distribution\n"
" -q: target qps\n"
" -H: host net spec\n"
" -L: pkt loss failure threshold\n"
" -l: pkt loss time threshold\n");
}
int
main(int argc, char *argv[])
{
std::ofstream log_file;
bool has_host_spec = false;
ntr_init();
// init dpdk
int ret = rte_eal_init(argc, argv);
if (ret < 0) {
rte_exit(EXIT_FAILURE, "rte_eal_init failed!\n");
}
argc -= ret;
argv += ret;
// set warning level
ntr_set_level(NTR_DEP_USER1, NTR_LEVEL_WARNING);
{
int c;
// parse arguments
struct net_spec *ns;
while ((c = getopt(argc, argv, "vs:S:t:T:ho:A:i:q:H:L:l:p:")) !=
-1) {
switch (c) {
case 'v':
ntr_set_level(NTR_DEP_USER1,
ntr_get_level(NTR_DEP_USER1) + 1);
break;
case 's':
if (str_to_netspec(optarg,
&options.server_spec) != 0) {
rte_exit(EXIT_FAILURE,
"invalid server net spec.\n");
}
break;
case 'S':
ns = new struct net_spec;
if (str_to_netspec(optarg, ns) != 0) {
rte_exit(EXIT_FAILURE,
"invalid client net spec\n");
}
options.slaves.push_back(ns);
options.master_mode = 1;
if (options.slaves.size() > MAX_SLAVES) {
rte_exit(EXIT_FAILURE,
"too many rats.\n");
}
break;
case 't':
options.run_time = strtol(optarg, nullptr, 10);
break;
case 'T':
options.warmup_time = strtol(optarg, nullptr,
10);
break;
case 'h':
usage();
rte_exit(EXIT_SUCCESS, "\n");
case 'o':
strncpy(options.output, optarg,
sizeof(options.output) - 1);
break;
case 'A':
cpulist_to_cpuset(optarg, &options.cpu_set);
break;
case 'i':
strncpy(options.ia_gen_str, optarg,
sizeof(options.ia_gen_str) - 1);
break;
case 'q':
options.target_qps = strtoul(optarg, nullptr,
10);
break;
case 'H':
has_host_spec = true;
if (str_to_netspec(optarg,
&options.s_host_spec) != 0) {
rte_exit(EXIT_FAILURE,
"invalid host net spec.\n");
}
break;
case 'L':
options.pkt_loss_failure_threshold =
strtoul(optarg, nullptr, 10);
break;
case 'l':
options.pkt_loss_time_ms = strtoul(optarg,
nullptr, 10);
if (options.pkt_loss_time_ms == 0) {
options.pkt_loss_time_ms = UINT32_MAX;
}
break;
case 'p':
options.portid = strtol(optarg, nullptr, 10);
break;
default:
usage();
rte_exit(EXIT_FAILURE, "unknown argument: %c\n",
c);
}
}
}
if (!has_host_spec) {
rte_exit(EXIT_FAILURE, "must specify host IP\n");
}
// init libtopo
if (topo_init(ntr_get_level(NTR_DEP_USER1) - NTR_LEVEL_WARNING) !=
0) {
rte_exit(EXIT_FAILURE, "libtopo init failed!\n");
}
// init nms
if (nms_init(ntr_get_level(NTR_DEP_USER1) - NTR_LEVEL_WARNING) != 0) {
rte_exit(EXIT_FAILURE, "failed to init libnms!\n");
}
if (CPU_COUNT(&options.cpu_set) != 1) {
rte_exit(EXIT_FAILURE, "must specify exactly one core\n");
}
int core_id = CPU_FFS(&options.cpu_set) - 1;
dump_options();
// configure memory and port
struct port_conf pconf;
struct device_conf dconf;
struct mem_conf mconf;
portconf_get(options.portid, &pconf);
if (!pconf.timesync) {
options.s_hwtimestamp = false;
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING,
"main: timesync disabled. hw timestamp unavailable.\n ");
}
if (CPU_COUNT(&options.cpu_set) > 1) {
int ffs = CPU_FFS(&options.cpu_set);
CPU_ZERO(&options.cpu_set);
CPU_SET(ffs - 1, &options.cpu_set);
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING, "cat only supports one thread, using only core %d.\n", ffs - 1);
}
dconf.mtu = MAX_STANDARD_MTU;
CPU_COPY(&options.cpu_set, &dconf.core_affinity);
dconf.portid = options.portid;
dconf.rss_hf = pconf.rss_hf;
dconf.rx_offloads = pconf.rxoffload;
dconf.tx_offloads = pconf.txoffload;
dconf.timesync = pconf.timesync;
dconf.rx_fn = rx_add_timestamp;
dconf.rx_user = nullptr;
dconf.rx_ring_sz = 2048;
dconf.tx_fn = tx_add_timestamp;
dconf.tx_user = nullptr;
dconf.tx_ring_sz = 2048;
mconf.cache_size = 64;
mconf.priv_size = 0;
mconf.num_elements = 4096;
mconf.data_room_size = RTE_MBUF_DEFAULT_BUF_SIZE + MAX_STANDARD_MTU;
mconf.max_pools = -1;
dpdk_init(&dconf, &mconf);
if (rte_eth_macaddr_get(options.portid,
&options.s_host_spec.mac_addr) != 0) {
rte_exit(EXIT_FAILURE, "cannot get mac address of port %d\n",
options.portid);
}
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO,
"Configured port %d with mac addr %x:%x:%x:%x:%x:%x\n",
options.portid, options.s_host_spec.mac_addr.addr_bytes[0],
options.s_host_spec.mac_addr.addr_bytes[1],
options.s_host_spec.mac_addr.addr_bytes[2],
options.s_host_spec.mac_addr.addr_bytes[3],
options.s_host_spec.mac_addr.addr_bytes[4],
options.s_host_spec.mac_addr.addr_bytes[5]);
// create default generator
options.s_iagen = createGenerator(options.ia_gen_str);
if (options.s_iagen == nullptr) {
rte_exit(EXIT_FAILURE, "invalid generator string %s\n",
options.ia_gen_str);
}
options.s_iagen->set_lambda((double)options.target_qps);
// open log file for writing
log_file.open(options.output, std::ofstream::out);
if (!log_file) {
rte_exit(EXIT_FAILURE, "failed to open log file %s\n",
options.output);
}
sleep(INIT_DELAY);
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO,
"main: launching thread on core %d\n", core_id);
if (rte_eal_remote_launch(locore_main, nullptr, core_id) != 0) {
rte_exit(EXIT_FAILURE, "failed to launch function on locore\n");
}
// XXX: poor man's timer
uint32_t second = 0;
while (true) {
if (second >= options.warmup_time) {
options.s_record.store(1);
}
if (second >= options.run_time + options.warmup_time) {
options.s_stop.store(true);
break;
}
usleep(S2US);
second++;
}
if (rte_eal_wait_lcore(core_id) < 0)
rte_exit(EXIT_FAILURE, "failed to wait for job completion\n");
// calculate QPS
uint32_t qps = (double)options.s_recved_pkts.load() /
(((double)(options.s_end_time.load() -
options.s_start_time.load()) /
(double)S2NS));
qps += options.s_slave_qps.load();
// dump stats
log_file << qps << ',' << options.s_recved_pkts.load() << ','
<< options.s_pkt_loss.load() << ','
<< options.s_slave_recved.load() << ','
<< options.s_slave_loss.load() << std::endl;
for (auto it : options.s_data) {
if (it->valid) {
log_file << it->clt_sw_rx << ',' << it->clt_sw_tx << ','
<< it->clt_hw_rx << ',' << it->clt_hw_tx << ','
<< it->srv_sw_rx << ',' << it->srv_sw_tx << ','
<< it->srv_hw_rx << ',' << it->srv_hw_tx
<< std::endl;
}
delete it;
}
log_file.close();
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO,
"qps = %d, recved = %d, loss = %d, slave recved = %d, slave loss = %d\n",
qps, options.s_recved_pkts.load(), options.s_pkt_loss.load(),
options.s_slave_recved.load(), options.s_slave_loss.load());
// clean up
dpdk_cleanup(&dconf);
return 0;
}

View File

@ -1,701 +0,0 @@
#include <atomic>
#include <cassert>
#include <cstdio>
#include <cstring>
#include <ctime>
#include <vector>
#include <unistd.h>
#include <sys/cpuset.h>
#include <sys/endian.h>
#include <sys/sched.h>
#include <sys/types.h>
#include <topo.h>
#include <rte_common.h>
#include <rte_config.h>
#include <rte_cycles.h>
#include <rte_eal.h>
#include <rte_ethdev.h>
#include <rte_ether.h>
#include <rte_launch.h>
#include <rte_lcore.h>
#include <rte_mbuf.h>
#include "ntr.h"
//#include "gen.hh"
#include "net/netsup.hh"
#include "net/pkt.hh"
#include "nms.h"
#include "rte_byteorder.h"
constexpr static unsigned int BURST_SIZE = 32;
constexpr static unsigned int CACHELINE_SIZE = 64;
constexpr static uint16_t THREAD_LOAD_BUFFER_SZ = 16384;
struct probe_state_t {
struct net_spec dst;
struct conn_spec cspec {
.dst = &dst
};
uint64_t last_sw_rx;
uint64_t last_sw_tx;
uint64_t last_hw_rx;
uint32_t epoch;
};
// keep track of the probe state
// when a probe packet first arrives this state is set to be influx and the
// rte_mbuf's userdata is set to PROBE_MAGIC which prevents other probe packets
// to be processed when the server sends the probe stats back to user influx is
// released this is to guarantee that the server only processes one probe packet
// at the time
// XXX: also this can be attached to the mbuf itself and processed by the lcore
// thread
// I kept this global because globally there could be only one pending
// probe request and rx_add_timestamp can save their shit here too
struct thread_info {
int tid;
int rxqid;
int txqid;
int lcore_id;
int node_id;
void *cache_lines;
void *load_buffer;
};
struct options_t {
// config
int num_threads { 1 };
cpuset_t cpu_set = CPUSET_T_INITIALIZER(0x2); // 2nd core
bool jumbo_frame_enabled {
false
}; // setting this to true changes mbuf size and mtu
int port_mtu { MAX_STANDARD_MTU };
int thread_cacheline_cnt = { 1600 }; // 100MB data per thread
uint16_t portid { 0 };
// states
struct net_spec s_host_spec { };
std::vector<struct thread_info *> s_thr_info;
int probe_state_offset { 0 };
bool s_hwtimestamp { true };
struct probe_state_t s_probe_info;
std::atomic<bool> is_probing { false };
};
struct options_t options;
static bool
mbuf_is_probe_valid(struct rte_mbuf *pkt)
{
return *RTE_MBUF_DYNFIELD(pkt, options.probe_state_offset, bool *);
}
static void
mbuf_set_probe_valid(struct rte_mbuf *pkt, bool b)
{
*RTE_MBUF_DYNFIELD(pkt, options.probe_state_offset, bool *) = b;
}
static uint16_t
rx_add_timestamp(uint16_t port __rte_unused, uint16_t qidx __rte_unused,
struct rte_mbuf **pkts, uint16_t nb_pkts, uint16_t max_pkts __rte_unused,
void *_ __rte_unused)
{
int rc = 0;
uint64_t now = topo_uptime_ns();
struct timespec ts { };
struct pkt_hdr *pkt_data;
for (int i = 0; i < nb_pkts; i++) {
pkt_data = check_valid_packet(pkts[i],
&options.s_host_spec.mac_addr);
if (pkt_data == nullptr) {
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
"rx_add_timestamp: ignoring invalid packet %p.\n",
(void *)pkts[i]);
continue;
}
if (rte_be_to_cpu_16(pkt_data->type) == PKT_TYPE_PROBE) {
bool cmp = false;
mbuf_set_probe_valid(pkts[i], false);
if (options.is_probing.compare_exchange_strong(cmp,
true)) {
options.s_probe_info.last_sw_rx = now;
if (options.s_hwtimestamp) {
if ((rc = rte_eth_timesync_read_rx_timestamp(
port, &ts,
pkts[i]->timesync & 0x3)) ==
0) {
options.s_probe_info
.last_hw_rx = ts.tv_nsec +
ts.tv_sec * S2NS;
ntr(NTR_DEP_USER1,
NTR_LEVEL_DEBUG,
"rx_add_timestamp: tagged packet %p with sw rx: %lu hw rx:%lu.\n",
(void *)pkts[i],
options.s_probe_info
.last_sw_rx,
options.s_probe_info
.last_hw_rx);
mbuf_set_probe_valid(pkts[i],
true);
} else {
options.is_probing.store(false);
ntr(NTR_DEP_USER1,
NTR_LEVEL_WARNING,
"rx_add_timestamp: packet %p not tagged - failed to read hw rx timestamp: %d.\n",
(void *)pkts[i], rc);
}
} else {
mbuf_set_probe_valid(pkts[i], true);
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
"rx_add_timestamp: tagged packet %p with sw rx only: %lu.\n",
(void *)pkts[i], now);
}
} else {
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
"rx_add_timestamp: packet %p not tagged - server is probing.\n",
(void *)pkts[i]);
}
} else {
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
"rx_add_timestamp: packet %p not tagged - not PROBE packet: type %d.\n",
(void *)pkts[i], rte_be_to_cpu_16(pkt_data->type));
}
}
return nb_pkts;
}
static uint16_t
tx_add_timestamp(uint16_t port __rte_unused, uint16_t qidx __rte_unused,
struct rte_mbuf **pkts, uint16_t nb_pkts, void *_ __rte_unused)
{
uint64_t now = topo_uptime_ns();
struct pkt_hdr *pkt_data;
for (int i = 0; i < nb_pkts; i++) {
pkt_data = check_valid_packet(pkts[i],
&options.s_host_spec.mac_addr);
if (pkt_data == nullptr) {
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
"tx_add_timestamp: ignoring invalid packet %p.\n",
(void *)pkts[i]);
continue;
}
if (rte_be_to_cpu_16(pkt_data->type) == PKT_TYPE_PROBE_RESP) {
// this packet is the response to PROBE packets
// at this time the packet is not sent to the NIC yet so
// the state must be waiting stats
assert(options.is_probing.load() &&
mbuf_is_probe_valid(pkts[i]));
options.s_probe_info.last_sw_tx = now;
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
"tx_add_timestamp: tagged packet %p with sw tx %lu\n",
(void *)pkts[i], options.s_probe_info.last_sw_tx);
} else {
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
"tx_add_timestamp: packet %p not tagged - type %d\n",
(void *)pkts[i], pkt_data->type);
}
}
return nb_pkts;
}
static void
worker_cpu_load(unsigned long us)
{
uint64_t now = topo_uptime_ns();
while(true) {
uint64_t cur = topo_uptime_ns();
if (cur - now >= us * 1000) {
break;
}
}
}
static void
worker_memory_load(int tid, uint32_t which, uint32_t load)
{
uint32_t start_cacheline = which % (options.thread_cacheline_cnt * options.s_thr_info.size());
uint32_t thrd = start_cacheline / options.thread_cacheline_cnt;
uint32_t start = start_cacheline % options.thread_cacheline_cnt;
struct thread_info * cur = options.s_thr_info.at(tid);
struct thread_info * tgt = options.s_thr_info.at(thrd);
for (uint32_t i = 0; i < load; i++) {
*(uint32_t *)cur->load_buffer = *(uint32_t *)((char *)tgt->cache_lines + ((start + i) % options.thread_cacheline_cnt) * CACHELINE_SIZE);
}
}
static int
locore_main(void *ti)
{
auto tinfo = (struct thread_info *)ti;
struct rte_mbuf *bufs[BURST_SIZE];
// + 1 because it might involve an extra PKT_TYPE_STAT packet
// when all tx timestamps are ready
struct rte_mbuf *tx_bufs[BURST_SIZE];
struct pkt_hdr *pkt_data;
// XXX: hack hardcode to be larger than MTU
bool pending_probe = false;
if (rte_eth_dev_socket_id(options.portid) > 0 &&
rte_eth_dev_socket_id(options.portid) != (int)rte_socket_id()) {
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING,
"locore_main <thread %d>: WARNING, port %d is on remote NUMA node to "
"polling thread.\n\tPerformance will "
"not be optimal.\n",
tinfo->tid, options.portid);
}
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO,
"locore_main <thread %d>: running on locore %d with txqid %d and rxqid %d.\n",
tinfo->tid, rte_lcore_id(), tinfo->txqid, tinfo->rxqid);
while (true) {
uint16_t nb_tx = 0;
const uint16_t nb_rx = rte_eth_rx_burst(options.portid,
tinfo->rxqid, bufs, BURST_SIZE);
struct rte_mbuf *pkt_buf;
struct pkt_hdr *tx_data;
for (int i = 0; i < nb_rx; i++) {
// XXX: optimization: in rx_add_timestamp every packet
// is already validated once can just mark valid packet
// with a value so we can avoid this redundant check
pkt_data = check_valid_packet(bufs[i],
&options.s_host_spec.mac_addr);
if (pkt_data == nullptr) {
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
"locore_main <thread %d>: skipping invalid packet %p.\n",
tinfo->tid, (void *)bufs[i]);
// dump_pkt(bufs[i]);
rte_pktmbuf_free(bufs[i]);
continue;
}
NTR_PKT(NTR_DEP_USER1, NTR_LEVEL_DEBUG, pkt_data,
"locore_main <thread %d>: received packet ", tinfo->tid);
switch (rte_be_to_cpu_16(pkt_data->type)) {
case PKT_TYPE_PROBE: {
if (mbuf_is_probe_valid(bufs[i])) {
// send back probe_resp pkt to probe for
// return latency
pending_probe = true;
// book keep probe results
options.s_probe_info.epoch =
rte_be_to_cpu_32(
((struct pkt_payload_epoch *)
pkt_data->payload)
->epoch);
pkt_hdr_to_netspec(pkt_data,
&options.s_probe_info.dst,
&options.s_probe_info.cspec
.dst_port,
nullptr,
&options.s_probe_info.cspec
.src_port);
options.s_probe_info.cspec.src =
&options.s_host_spec;
if (alloc_pkt_hdr(mempool_get(
tinfo->node_id),
PKT_TYPE_PROBE_RESP,
&options.s_probe_info.cspec, 0,
&pkt_buf, &tx_data) != 0) {
rte_exit(EXIT_FAILURE,
"failed to allocate pkt\n");
}
rte_memcpy(tx_data->payload,
pkt_data->payload,
sizeof(struct pkt_payload_epoch));
mbuf_set_probe_valid(pkt_buf, true);
// queue for burst send
NTR_PKT(NTR_DEP_USER1, NTR_LEVEL_DEBUG, tx_data,
"locore_main <thread %d>: sending packet ", tinfo->tid);
tx_bufs[nb_tx++] = pkt_buf;
}
break;
}
case PKT_TYPE_LOAD: {
struct conn_spec cspec;
struct net_spec src;
struct net_spec dst;
// touch the unused data to pretend that we read
// those dummy fields
memcpy(tinfo->load_buffer, pkt_data->payload,
MIN(bufs[i]->data_len -
sizeof(struct pkt_hdr),
THREAD_LOAD_BUFFER_SZ));
// perform the load
auto pld = (struct pkt_payload_load *)
pkt_data->payload;
uint32_t load_type = rte_be_to_cpu_32(pld->type);
uint32_t load_arg0 = rte_be_to_cpu_32(pld->arg0);
uint32_t load_arg1 = rte_be_to_cpu_32(pld->arg1);
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
"locore_main <thread %d>: LOAD type %d, arg0 %d, arg1 %d\n",
tinfo->tid, load_type, load_arg0, load_arg1);
if (load_type == LOAD_TYPE_CPU) {
worker_cpu_load(load_arg0);
} else if (load_type == LOAD_TYPE_MEM) {
worker_memory_load(tinfo->tid, load_arg0, load_arg1);
} else {
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING,
"locore_main <thread %d>: unknown LOAD type %d, ignoring...", tinfo->tid, load_type);
break;
}
// reply
pkt_hdr_to_netspec(pkt_data, &src,
&cspec.dst_port, &dst, &cspec.src_port);
cspec.dst = &src;
cspec.src = &dst;
// printf("LOAD PKT SIZE: %d\n",
// bufs[i]->data_len); we reply to load packet
// regardless of the server state
if (alloc_pkt_hdr(mempool_get(tinfo->node_id),
PKT_TYPE_LOAD_RESP, &cspec, 0, &pkt_buf,
&tx_data) != 0) {
rte_exit(EXIT_FAILURE,
"failed to allocate pkt\n");
}
rte_memcpy(tx_data->payload, pkt_data->payload,
sizeof(struct pkt_payload_load));
// queue for burst send
NTR_PKT(NTR_DEP_USER1, NTR_LEVEL_DEBUG, tx_data,
"locore_main <thread %d>: sending packet ", tinfo->tid);
tx_bufs[nb_tx++] = pkt_buf;
break;
}
default:
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
"locore_main <thread %d>: ignoring packet %p with unknown type %d.\n",
tinfo->tid, (void *)bufs[i],
rte_be_to_cpu_16(pkt_data->type));
break;
}
rte_pktmbuf_free(bufs[i]);
}
// send all packets
tx_burst_all(options.portid, tinfo->txqid, tx_bufs, nb_tx);
// we wanna check every loop not only when there are packets
if (pending_probe) {
assert(options.is_probing.load());
struct timespec ts { };
struct pkt_payload_stat *stat;
int status = 0;
if (options.s_hwtimestamp) {
if ((status = rte_eth_timesync_read_tx_timestamp(
options.portid, &ts)) == 0) {
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
"locore_main <thread %d>: obtained hw tx timestamp %lu.\n",
tinfo->tid,
(ts.tv_sec * S2NS + ts.tv_nsec));
} else {
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
"locore_main <thread %d>: failed to obtain hw tx timestamp: %d.\n",
tinfo->tid, status);
}
}
if (status == 0) {
// now we have everything we need
if (alloc_pkt_hdr(mempool_get(tinfo->node_id),
PKT_TYPE_STAT, &options.s_probe_info.cspec, 0,
&pkt_buf, &tx_data) != 0) {
rte_exit(EXIT_FAILURE,
"failed to alloc pkt_buf\n");
}
// populate stats
stat = (struct pkt_payload_stat *)tx_data->payload;
stat->epoch = rte_cpu_to_be_32(
options.s_probe_info.epoch);
if (options.s_hwtimestamp) {
stat->hw_rx = rte_cpu_to_be_64(
options.s_probe_info.last_hw_rx);
stat->hw_tx = rte_cpu_to_be_64(
ts.tv_nsec + ts.tv_sec * S2NS);
} else {
stat->hw_rx = 0;
stat->hw_tx = 0;
}
stat->sw_rx = rte_cpu_to_be_64(
options.s_probe_info.last_sw_rx);
stat->sw_tx = rte_cpu_to_be_64(
options.s_probe_info.last_sw_tx);
// send the packet
tx_burst_all(options.portid, tinfo->txqid, &pkt_buf, 1);
// release flux
pending_probe = false;
options.is_probing.store(false);
}
}
}
}
static void
usage()
{
fprintf(stdout,
"Usage:\n"
" -v(vv): verbose mode\n"
" -h: seek help\n"
" -A: cpu list for worker threads\n"
" -m: enable memory load generator(MLG)\n"
" -b: MLG trunk size\n"
" -x: MLG thread affinity mask\n"
" -X: MLG target domain affinity mask\n"
" -S: MLG shared buffer\n"
" -H: host spec\n"
" -J: enable jumbo frames\n"
" -p: port id\n");
fflush(stdout);
}
static void
dump_options()
{
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO,
"main: khat configuration:\n"
" verbosity: +%d\n"
" thread count: %d\n"
" ip: 0x%x\n"
" jumbo frame: %d\n"
" port id: %d\n",
ntr_get_level(NTR_DEP_USER1) - NTR_LEVEL_WARNING,
options.num_threads, options.s_host_spec.ip,
options.jumbo_frame_enabled, options.portid);
}
int
main(int argc, char *argv[])
{
bool has_host_spec { false };
struct mem_conf mconf;
struct device_conf dconf;
ntr_init();
// init dpdk
int ret = rte_eal_init(argc, argv);
if (ret < 0) {
rte_exit(EXIT_FAILURE, "rte_eal_init failed!\n");
}
argc -= ret;
argv += ret;
// set warning level
ntr_set_level(NTR_DEP_USER1, NTR_LEVEL_WARNING);
{
int c;
// parse arguments
while ((c = getopt(argc, argv, "hvA:H:Jp:")) != -1) {
switch (c) {
case 'v':
ntr_set_level(NTR_DEP_USER1,
ntr_get_level(NTR_DEP_USER1) + 1);
break;
case 'h':
usage();
rte_exit(EXIT_SUCCESS, "\n");
case 'A':
cpulist_to_cpuset(optarg, &options.cpu_set);
options.num_threads = CPU_COUNT(
&options.cpu_set);
if (options.num_threads == 0) {
rte_exit(EXIT_FAILURE,
"must run at least one thread\n");
}
break;
case 'H':
if (str_to_netspec(optarg,
&options.s_host_spec) != 0) {
rte_exit(EXIT_FAILURE,
"invalid host spec\n");
}
has_host_spec = true;
break;
case 'J':
options.jumbo_frame_enabled = true;
options.port_mtu = MAX_JUMBO_MTU;
break;
case 'p':
options.portid = atoi(optarg);
break;
default:
usage();
rte_exit(EXIT_SUCCESS, "unknown argument: %c",
c);
}
}
}
if (!has_host_spec) {
rte_exit(EXIT_FAILURE, "Must specify host spec\n");
}
// init libtopo
if (topo_init(ntr_get_level(NTR_DEP_USER1) - NTR_LEVEL_WARNING) !=
0) {
rte_exit(EXIT_FAILURE, "libtopo init failed!\n");
}
// init libnms
if (nms_init(ntr_get_level(NTR_DEP_USER1) - NTR_LEVEL_WARNING) != 0) {
rte_exit(EXIT_FAILURE, "libnms init failed!\n");
}
dump_options();
// register dynamic field
struct rte_mbuf_dynfield rte_mbuf_dynfield_probe_flag = {
.name = "rte_mbuf_dynfield_probe_valid",
.size = sizeof(bool),
.align = __alignof__(uint32_t),
.flags = 0
};
options.probe_state_offset = rte_mbuf_dynfield_register(
&rte_mbuf_dynfield_probe_flag);
if (options.probe_state_offset == -1) {
rte_exit(EXIT_FAILURE, "failed to register dynamic field: %d\n",
rte_errno);
}
// configure memory and port
struct port_conf pconf;
portconf_get(options.portid, &pconf);
if (!pconf.timesync) {
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING,
"main: timesync disabled. hw timestamp unavailable.\n ");
options.s_hwtimestamp = false;
}
dconf.mtu = options.port_mtu;
CPU_COPY(&options.cpu_set, &dconf.core_affinity);
dconf.portid = options.portid;
dconf.rss_hf = pconf.rss_hf;
dconf.rx_offloads = pconf.rxoffload;
dconf.tx_offloads = pconf.txoffload;
dconf.timesync = pconf.timesync;
dconf.rx_fn = rx_add_timestamp;
dconf.rx_user = nullptr;
dconf.rx_ring_sz = 2048;
dconf.tx_fn = tx_add_timestamp;
dconf.tx_user = nullptr;
dconf.tx_ring_sz = 2048;
mconf.cache_size = 512;
mconf.priv_size = 0;
mconf.num_elements = (dconf.rx_ring_sz + dconf.tx_ring_sz) *
rte_lcore_count() / rte_socket_count();
mconf.data_room_size = RTE_MBUF_DEFAULT_BUF_SIZE + MAX_JUMBO_MTU -
MAX_STANDARD_MTU;
mconf.max_pools = -1;
dpdk_init(&dconf, &mconf);
if (rte_eth_macaddr_get(options.portid,
&options.s_host_spec.mac_addr) != 0) {
rte_exit(EXIT_FAILURE, "cannot get mac address of port %d\n",
options.portid);
}
// init threads
uint32_t cpu_idx = CPU_FFS(&options.cpu_set);
uint32_t tid = 0;
while (cpu_idx != 0) {
uint32_t lcore_id = cpu_idx - 1;
uint32_t node_id = rte_lcore_to_socket_id(lcore_id);
auto *tinfo = (struct thread_info *)nms_malloc(node_id,
sizeof(struct thread_info));
tinfo->cache_lines = nms_malloc(node_id,
CACHELINE_SIZE * options.thread_cacheline_cnt);
tinfo->load_buffer = nms_malloc(node_id,
THREAD_LOAD_BUFFER_SZ);
tinfo->tid = tid;
tinfo->lcore_id = lcore_id;
tinfo->node_id = node_id;
tinfo->rxqid = tid;
tinfo->txqid = tid;
options.s_thr_info.push_back(tinfo);
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO,
"main: thread %d assigned to cpu %d, node %d\n", tinfo->tid,
tinfo->lcore_id, topo_core_to_numa(lcore_id));
tid++;
CPU_CLR(cpu_idx - 1, &options.cpu_set);
cpu_idx = CPU_FFS(&options.cpu_set);
}
sleep(INIT_DELAY);
for (int i = 0; i < options.num_threads; i++) {
struct thread_info *tinfo = options.s_thr_info.at(i);
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO,
"main: launching thread %d on locore %d\n", tinfo->tid,
tinfo->lcore_id);
if (rte_eal_remote_launch(locore_main,
(void *)options.s_thr_info.at(i),
tinfo->lcore_id) != 0) {
rte_exit(EXIT_FAILURE,
"failed to launch function on locore %d\n",
tinfo->lcore_id);
}
}
while (true) {
usleep(S2US);
}
// shouldn't get here
// clean up
for (int i = 0; i < options.num_threads; i++) {
struct thread_info *tinfo = options.s_thr_info.at(i);
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO,
"main: waiting for locore %d...\n", tinfo->lcore_id);
if (rte_eal_wait_lcore(tinfo->lcore_id) != 0) {
rte_exit(EXIT_FAILURE, "failed to wait for locore %d\n",
tinfo->lcore_id);
}
}
dpdk_cleanup(&dconf);
return 0;
}

View File

@ -1,204 +0,0 @@
#include "net/netsup.hh"
#include <cstdlib>
#include "rte_build_config.h"
#include "rte_common.h"
#include "rte_config.h"
#include "rte_ether.h"
#include "rte_lcore.h"
#include "rte_mempool.h"
#include "rte_mbuf.h"
#include "rte_errno.h"
#include "rte_ethdev.h"
#include "ntr.h"
static struct rte_mempool *g_mempools[MAX_NUMA_NODES] = {nullptr};
static unsigned int g_mempool_sz = 0;
static void
mempool_init(struct mem_conf *mconf)
{
struct rte_mempool * mbuf_pool;
char mempool_name[64];
for (int i = 0; i < (int)rte_socket_count(); i++) {
uint32_t nodeid = i;
// ntr(NTR_DEP_USER1, NTR_LEVEL_INFO,
// "mempool_init: creating mempool for node %d\n", nodeid);
// create one mbuf pool per socket
snprintf(mempool_name, sizeof(mempool_name), "net_mempool_%d", nodeid);
mbuf_pool = rte_pktmbuf_pool_create(mempool_name, mconf->num_elements,
mconf->cache_size, mconf->priv_size,
mconf->data_room_size, nodeid);
if (mbuf_pool == nullptr) {
rte_exit(EXIT_FAILURE, "cannot create mbuf pool: %d\n", rte_errno);
}
g_mempools[nodeid] = mbuf_pool;
g_mempool_sz++;
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "mempool_init: created mempool for node %d\n", nodeid);
}
}
struct rte_mempool *
mempool_get(int nodeid)
{
if ((unsigned int)nodeid < g_mempool_sz) {
return g_mempools[nodeid];
}
return nullptr;
}
static void
port_init(struct device_conf *dconf)
{
struct rte_ether_addr addr;
struct rte_eth_dev_info dev_info {
};
struct rte_eth_conf port_conf;
struct rte_eth_txconf txconf {
};
struct rte_eth_rxconf rxconf {
};
int ret;
int num_threads = CPU_COUNT(&dconf->core_affinity);
if (rte_eth_dev_count_avail() == 0) {
rte_exit(EXIT_FAILURE, "number of ports must be > 0\n");
}
if (!rte_eth_dev_is_valid_port(dconf->portid)) {
rte_exit(EXIT_FAILURE, "cannot find port %d\n", dconf->portid);
}
if ((ret = rte_eth_macaddr_get(dconf->portid, &addr)) != 0) {
rte_exit(EXIT_FAILURE, "cannot get mac address of port: %d\n", ret);
}
ret = rte_eth_dev_info_get(dconf->portid, &dev_info);
if (ret != 0) {
rte_exit(EXIT_FAILURE, "failed to get dev info: %d", ret);
}
memset(&port_conf, 0, sizeof(struct rte_eth_conf));
port_conf.rxmode.mtu = dconf->mtu;
port_conf.rxmode.mq_mode = RTE_ETH_MQ_RX_RSS;
port_conf.rx_adv_conf.rss_conf.rss_key = nullptr;
port_conf.rx_adv_conf.rss_conf.rss_hf = dconf->rss_hf;
port_conf.rxmode.offloads = dconf->rx_offloads;
port_conf.txmode.offloads = dconf->tx_offloads;
/* Configure the Ethernet device. */
ret = rte_eth_dev_configure(dconf->portid, num_threads, num_threads, &port_conf);
if (ret != 0)
rte_exit(EXIT_FAILURE, "failed to configure port: %d\n", ret);
ret = rte_eth_dev_adjust_nb_rx_tx_desc(dconf->portid, &dconf->rx_ring_sz, &dconf->tx_ring_sz);
if (ret != 0)
rte_exit(EXIT_FAILURE, "failed to set rx tx queue size: %d\n", ret);
/* Allocate and set up 1 RX queue per thread per Ethernet port. */
rxconf = dev_info.default_rxconf;
rxconf.offloads = port_conf.rxmode.offloads;
rxconf.rx_nseg = 0;
rxconf.rx_seg = nullptr;
txconf = dev_info.default_txconf;
txconf.offloads = port_conf.txmode.offloads;
int core;
int qid = 0;
CPU_FOREACH_ISSET(core, &dconf->core_affinity) {
int socket = rte_lcore_to_socket_id(core);
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "port_init: setting up rx & tx queue for core %d (socket %d)...\n", core, socket);
ret = rte_eth_rx_queue_setup(dconf->portid, qid, dconf->rx_ring_sz, socket, &rxconf, mempool_get(socket));
if (ret < 0)
rte_exit(EXIT_FAILURE, "failed to setup rx queue for core %d: %d\n", core, ret);
ret = rte_eth_tx_queue_setup(dconf->portid, qid, dconf->tx_ring_sz, socket, &txconf);
if (ret < 0)
rte_exit(EXIT_FAILURE, "failed to setup tx queue for core %d: %d", core, ret);
qid++;
}
// set mtu
ret = rte_eth_dev_set_mtu(dconf->portid, dconf->mtu);
if (ret != 0)
rte_exit(EXIT_FAILURE, "failed to set mtu: %d\n", ret);
ret = rte_eth_dev_start(dconf->portid);
if (ret < 0)
rte_exit(EXIT_FAILURE, "failed to start port: %d\n", ret);
if (dconf->timesync) {
ret = rte_eth_timesync_enable(dconf->portid);
if (ret != 0)
rte_exit(EXIT_FAILURE, "failed to enable timesync: %d\n", ret);
}
/* Enable RX in promiscuous mode for the Ethernet device. */
ret = rte_eth_promiscuous_enable(dconf->portid);
if (ret != 0)
rte_exit(EXIT_FAILURE, "failed to enable promiscuous mode: %d\n", ret);
for (int i = 0; i < num_threads; i++) {
if (dconf->tx_fn != nullptr) {
if (rte_eth_add_tx_callback(dconf->portid, i, dconf->tx_fn, dconf->tx_user) == nullptr) {
rte_exit(EXIT_FAILURE, "failed to attach callback to tx queue %d\n", i);
}
}
if (dconf->rx_fn != nullptr) {
if (rte_eth_add_rx_callback(dconf->portid, i, dconf->rx_fn, dconf->rx_user) == nullptr) {
rte_exit(EXIT_FAILURE, "failed to attach callback to rx queue %d\n", i);
}
}
}
// sync_port_clock(portid);
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO,
"port_init: configured port %d on socket %d with mac addr %x:%x:%x:%x:%x:%x\n",
dconf->portid, rte_eth_dev_socket_id(dconf->portid),
addr.addr_bytes[0],
addr.addr_bytes[1],
addr.addr_bytes[2],
addr.addr_bytes[3],
addr.addr_bytes[4],
addr.addr_bytes[5]);
}
void
dpdk_init(struct device_conf *dconf, struct mem_conf *mconf)
{
if (rte_socket_count() > (int)MAX_NUMA_NODES) {
rte_exit(EXIT_FAILURE, "too many numa nodes\n");
}
// ensure 1-1 mapping
for (int i = 0; i < (int)rte_socket_count(); i++) {
if (rte_socket_id_by_idx(i) != i) {
rte_exit(EXIT_FAILURE, "socket %d has id %d instead.\n", i, rte_socket_id_by_idx(i));
}
}
mempool_init(mconf);
port_init(dconf);
}
void
dpdk_cleanup(struct device_conf * dconf)
{
rte_eth_dev_stop(dconf->portid);
rte_eth_dev_close(dconf->portid);
for (int i = 0; i < (int)rte_socket_count(); i++) {
rte_mempool_free(g_mempools[i]);
}
}

View File

@ -1,66 +0,0 @@
#include "rte_ethdev.h"
#include "net/netsup.hh"
#include <cstdlib>
static struct port_conf port_confs[] = {
{
.driver_name = "net_cxgbe",
.rxoffload = RTE_ETH_RX_OFFLOAD_RSS_HASH | RTE_ETH_RX_OFFLOAD_UDP_CKSUM | RTE_ETH_RX_OFFLOAD_IPV4_CKSUM,
.txoffload = RTE_ETH_TX_OFFLOAD_UDP_CKSUM | RTE_ETH_TX_OFFLOAD_IPV4_CKSUM,
.rss_hf = RTE_ETH_RSS_UDP | RTE_ETH_RSS_FRAG_IPV4,
.timesync = false
},
{
.driver_name = "net_i40e",
.rxoffload = RTE_ETH_RX_OFFLOAD_RSS_HASH | RTE_ETH_RX_OFFLOAD_UDP_CKSUM | RTE_ETH_RX_OFFLOAD_IPV4_CKSUM,
.txoffload = RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE | RTE_ETH_TX_OFFLOAD_UDP_CKSUM | RTE_ETH_TX_OFFLOAD_IPV4_CKSUM,
.rss_hf = RTE_ETH_RSS_FRAG_IPV4 | RTE_ETH_RSS_NONFRAG_IPV4_UDP | RTE_ETH_RSS_NONFRAG_IPV4_OTHER | RTE_ETH_RSS_L2_PAYLOAD,
.timesync = false
},
{
.driver_name = "net_ice",
.rxoffload = RTE_ETH_RX_OFFLOAD_RSS_HASH | RTE_ETH_RX_OFFLOAD_UDP_CKSUM | RTE_ETH_RX_OFFLOAD_IPV4_CKSUM | RTE_ETH_RX_OFFLOAD_TIMESTAMP,
.txoffload = RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE | RTE_ETH_TX_OFFLOAD_UDP_CKSUM | RTE_ETH_TX_OFFLOAD_IPV4_CKSUM,
.rss_hf = RTE_ETH_RSS_FRAG_IPV4 | RTE_ETH_RSS_NONFRAG_IPV4_UDP | RTE_ETH_RSS_NONFRAG_IPV4_OTHER | RTE_ETH_RSS_L2_PAYLOAD,
.timesync = false
},
{
.driver_name = "net_ixgbe",
.rxoffload = RTE_ETH_RX_OFFLOAD_RSS_HASH | RTE_ETH_RX_OFFLOAD_UDP_CKSUM | RTE_ETH_RX_OFFLOAD_IPV4_CKSUM,
.txoffload = RTE_ETH_TX_OFFLOAD_UDP_CKSUM | RTE_ETH_TX_OFFLOAD_IPV4_CKSUM,
.rss_hf = RTE_ETH_RSS_IPV4 | RTE_ETH_RSS_NONFRAG_IPV4_UDP,
.timesync = true
}
};
static struct port_conf default_conf = {
.driver_name = "default",
.rxoffload = RTE_ETH_RX_OFFLOAD_RSS_HASH | RTE_ETH_RX_OFFLOAD_UDP_CKSUM | RTE_ETH_RX_OFFLOAD_IPV4_CKSUM | RTE_ETH_RX_OFFLOAD_TIMESTAMP,
.txoffload = RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE | RTE_ETH_TX_OFFLOAD_UDP_CKSUM | RTE_ETH_TX_OFFLOAD_IPV4_CKSUM,
.rss_hf = RTE_ETH_RSS_FRAG_IPV4 | RTE_ETH_RSS_NONFRAG_IPV4_UDP | RTE_ETH_RSS_NONFRAG_IPV4_OTHER | RTE_ETH_RSS_L2_PAYLOAD,
.timesync = true
};
static const int port_size = sizeof(port_confs) / sizeof(port_confs[0]);
int
portconf_get(int portid, struct port_conf * out)
{
struct rte_eth_dev_info dev_info {};
if (rte_eth_dev_info_get(portid, &dev_info) != 0) {
rte_exit(EXIT_FAILURE, "failed to obtain device info for port %d\n", portid);
}
for(int i = 0; i < port_size; i++) {
struct port_conf * conf = &port_confs[i];
if (strcmp(conf->driver_name, dev_info.driver_name) == 0) {
memcpy(out, conf, sizeof(struct port_conf));
return 0;
}
}
fprintf(stdout, "portconf_get: unable to find matching conf for port %d:%s, returning default conf.\n", portid, dev_info.driver_name);
memcpy(out, &default_conf, sizeof(struct port_conf));
return -1;
}

View File

@ -1,909 +0,0 @@
#include <atomic>
#include <cstddef>
#include <cstdlib>
#include <list>
#include <map>
#include <mutex>
#include <random>
#include <vector>
#include <sys/endian.h>
#include <topo.h>
#include <rte_byteorder.h>
#include <rte_common.h>
#include <rte_config.h>
#include <rte_eal.h>
#include <rte_ethdev.h>
#include <rte_ether.h>
#include <rte_launch.h>
#include <rte_lcore.h>
#include <rte_mbuf.h>
#include <unistd.h>
#include "ntr.h"
#include "gen.hh"
#include "net/netsup.hh"
#include "net/pkt.hh"
#include "nms.h"
constexpr static unsigned int BURST_SIZE = 32;
static unsigned int
epoch_mk(unsigned int id, unsigned int epoch)
{
return (id << 24) | epoch;
}
static unsigned int
epoch_get_id(unsigned int epoch)
{
return epoch >> 24;
}
static unsigned int
epoch_get_epoch(unsigned int epoch)
{
return epoch & 0x00FFFFFF;
}
struct epoch_info {
unsigned int epoch;
uint64_t ts;
};
struct thread_info {
unsigned int id { 0 };
unsigned int lcore_id { 0 };
unsigned int rxqid { 0 };
unsigned int txqid { 0 };
int socket_id;
// this field is read by the stat collecting thread
std::atomic<int> recved_pkts { 0 };
std::atomic<int> lost_pkts { 0 };
Generator *ia_gen { nullptr };
Generator *load_gen0 { nullptr };
Generator *load_gen1 { nullptr };
std::mutex
mtx; // this lock protects data shared between worker threads, i.e.:
std::list<struct epoch_info *> recved_epochs;
thread_info() = default;
};
constexpr static int STATE_SYNC = 0; // waiting for SYNC
constexpr static int STATE_SYNC_ACK = 1; // Waiting for sending SYNC_ACK
constexpr static int STATE_RUNNING = 2; // Running
constexpr static int STATE_FIN = 3; // FIN received
constexpr static int WORKLOAD_MAX_ARGS = 2;
struct options_t {
unsigned int run_time { 5 };
// parameters
int slave_mode { 0 };
uint32_t rage_quit_time { UINT32_MAX };
char ia_gen[256] { "fixed:0" };
char load_gen[WORKLOAD_MAX_ARGS][256] = {{"fixed:0"}, {"fixed:0"}};
uint32_t workload_type {LOAD_TYPE_CPU};
uint32_t target_qps { 0 };
uint32_t depth { 1 };
struct net_spec server_spec { };
cpuset_t cpu_set = CPUSET_T_INITIALIZER(0x2); // 1 thread @ core 2
uint32_t pkt_loss_delay_ms { UINT32_MAX };
bool jumbo_frame_enabled { false };
int pkt_pad_sz { 0 };
int port_mtu { MAX_STANDARD_MTU };
int portid { 0 };
// states
unsigned int s_num_threads { 1 }; // 1 thread
struct net_spec s_host_spec { };
struct net_spec s_master_spec { };
struct conn_spec s_master_cspec {
.src = &s_host_spec, .src_port = DEFAULT_RAT_PORT,
.dst = &s_master_spec, .dst_port = DEFAULT_RAT_PORT,
};
std::vector<struct thread_info *> s_thr_info;
std::atomic<int> s_state { STATE_RUNNING }; // default non master mode
// states for qps
std::atomic<uint64_t> s_ts_begin { 0 };
};
static struct options_t options;
static inline void
calc_stats(uint64_t now, uint32_t *qps, uint32_t *recved_pkt,
uint32_t *total_loss)
{
uint32_t recv = 0;
uint32_t loss = 0;
for (auto i : options.s_thr_info) {
recv += i->recved_pkts.load();
loss += i->lost_pkts.load();
}
if (recved_pkt != nullptr) {
*recved_pkt = recv;
}
if (total_loss != nullptr) {
*total_loss = loss;
}
if (qps != nullptr) {
*qps = (uint32_t)((double)(recv) /
((double)(now - options.s_ts_begin.load()) / (double)S2NS));
}
}
static void
proto_loop(struct thread_info *tinfo)
{
struct rte_mbuf *tx_buf;
struct rte_mbuf *rx_bufs[BURST_SIZE];
struct pkt_hdr *pkt_data;
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO,
"proto_loop <thread %d>: waiting for SYNC from cat\n", tinfo->id);
while (options.s_state.load() == STATE_SYNC) {
const uint16_t nb_rx = rte_eth_rx_burst(options.portid,
tinfo->rxqid, rx_bufs, BURST_SIZE);
if (nb_rx > 0) {
for (int i = 0; i < nb_rx; i++) {
struct pkt_hdr *each = check_valid_packet(
rx_bufs[i], &options.s_host_spec.mac_addr);
if (each != nullptr) {
uint16_t type = rte_be_to_cpu_16(
each->type);
if (type == PKT_TYPE_SYNC) {
int expected = STATE_SYNC;
ntr(NTR_DEP_USER1,
NTR_LEVEL_INFO,
"proto_loop <thread %d>: received SYNC from cat\n",
tinfo->id);
if (!options.s_state
.compare_exchange_strong(
expected,
STATE_SYNC_ACK)) {
// someone barged in,
// listen to that guy
ntr(NTR_DEP_USER1,
NTR_LEVEL_WARNING,
"proto_loop <thread %d>: failed to cmpxchg sync_recv.\n",
tinfo->id);
} else {
pkt_hdr_to_netspec(each,
&options
.s_master_spec,
nullptr, nullptr,
nullptr);
if (alloc_pkt_hdr(
mempool_get(
tinfo
->socket_id),
PKT_TYPE_SYNC_ACK,
&options
.s_master_cspec,
0, &tx_buf,
&pkt_data) !=
0) {
rte_exit(
EXIT_FAILURE,
"failed to alloc pkt hdr\n");
}
tx_burst_all(
options.portid,
tinfo->txqid,
&tx_buf, 1);
expected =
STATE_SYNC_ACK;
// we've done our job,
// set off the threads
if (!options.s_state
.compare_exchange_strong(
expected,
STATE_RUNNING)) {
rte_exit(
EXIT_FAILURE,
"state unexpectedly changed\n");
}
ntr(NTR_DEP_USER1,
NTR_LEVEL_INFO,
"proto_loop <thread %d>: sent SYNC_ACK to cat\n",
tinfo->id);
}
} else {
ntr(NTR_DEP_USER1,
NTR_LEVEL_DEBUG,
"proto_loop <thread %d>: ignoring invalid packet %p type %d.\n",
tinfo->id,
(void *)rx_bufs[i], type);
}
} else {
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
"proto_loop <thread %d>: ignoring invalid packet %p.\n",
tinfo->id, (void *)rx_bufs[i]);
//dump_pkt(rx_bufs[i]);
}
rte_pktmbuf_free(rx_bufs[i]);
}
}
}
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
"proto_loop <thread %d>: exiting loop...\n", tinfo->id);
}
static void
pkt_loop(struct thread_info *tinfo)
{
struct rte_mbuf *tx_bufs[BURST_SIZE];
struct rte_mbuf *rx_bufs[BURST_SIZE];
std::vector<struct epoch_info *> recved_epochs;
std::map<unsigned int, struct epoch_info *> sent_epochs;
uint64_t cur_epoch = 0;
uint64_t next_ts;
uint64_t last_recv_ts = 0;
struct conn_spec srv_cspec;
rdport_generator src_port_gen(MIN_RANDOM_PORT);
rdport_generator dst_port_gen(MIN_RANDOM_PORT);
srv_cspec.src = &options.s_host_spec;
srv_cspec.dst = &options.server_spec;
next_ts = topo_uptime_ns();
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "pkt_loop <thread %d>: entering\n",
tinfo->id);
while (options.s_state.load() == STATE_RUNNING) {
uint64_t now = topo_uptime_ns();
// always pop incoming packets
const uint16_t nb_rx = rte_eth_rx_burst(options.portid,
tinfo->rxqid, rx_bufs, BURST_SIZE);
if (nb_rx > 0) {
for (int i = 0; i < nb_rx; i++) {
struct pkt_hdr *each = check_valid_packet(
rx_bufs[i], &options.s_host_spec.mac_addr);
if (each == nullptr) {
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
"pkt_loop <thread %d>: ignoring invalid packet %p.\n",
tinfo->id, (void *)rx_bufs[i]);
rte_pktmbuf_free(rx_bufs[i]);
continue;
}
uint16_t type = rte_be_to_cpu_16(each->type);
NTR_PKT(NTR_DEP_USER1, NTR_LEVEL_DEBUG, each,
"locore_main <thread %d>: ", tinfo->id);
struct pkt_payload_epoch *pld_epoch;
struct epoch_info *einfo;
uint32_t epoch;
uint32_t id;
struct thread_info *other_t;
int int_expected = STATE_RUNNING;
switch (type) {
case PKT_TYPE_LOAD_RESP:
pld_epoch = (struct pkt_payload_epoch *)
each->payload;
epoch = rte_be_to_cpu_32(
pld_epoch->epoch);
id = epoch_get_id(epoch);
// printf("Load resp size : %d\n",
// rx_bufs[i]->data_len);
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
"pkt_loop <thread %d>: packet %p epoch 0x%x id %d.\n",
tinfo->id, (void *)rx_bufs[i],
epoch, id);
if (id >= options.s_num_threads) {
ntr(NTR_DEP_USER1,
NTR_LEVEL_WARNING,
"pkt_loop <thread %d>: packet %p invalid id %d.\n",
tinfo->id,
(void *)rx_bufs[i], id);
break;
}
einfo = new struct epoch_info;
einfo->epoch = epoch;
einfo->ts = now;
other_t = options.s_thr_info.at(id);
other_t->mtx.lock();
other_t->recved_epochs.push_back(einfo);
other_t->mtx.unlock();
break;
case PKT_TYPE_FIN:
if (rte_is_same_ether_addr(
&each->eth_hdr.src_addr,
&options.s_master_spec
.mac_addr)) {
ntr(NTR_DEP_USER1,
NTR_LEVEL_DEBUG,
"pkt_loop <thread %d>: recved FIN from cat.\n",
tinfo->id);
// master told us to stop!
if (!options.s_state
.compare_exchange_strong(
int_expected,
STATE_FIN)) {
ntr(NTR_DEP_USER1,
NTR_LEVEL_WARNING,
"pkt_loop <thread %d>: failed to cmpxchg state.\n",
tinfo->id);
}
uint32_t qps;
uint32_t total_recv;
uint32_t total_loss;
calc_stats(now, &qps,
&total_recv, &total_loss);
struct pkt_hdr *pkt_hdr;
if (alloc_pkt_hdr(
mempool_get(
tinfo->socket_id),
PKT_TYPE_FIN_ACK,
&options.s_master_cspec,
0, &tx_bufs[0],
&pkt_hdr) != 0) {
rte_exit(EXIT_FAILURE,
"failed to allocate pkt hdr\n");
}
auto pld_qps =
(struct pkt_payload_qps *)
pkt_hdr->payload;
pld_qps->qps = rte_cpu_to_be_32(
qps);
pld_qps->recved_pkts =
rte_cpu_to_be_32(
total_recv);
pld_qps->lost_pkts =
rte_cpu_to_be_32(
total_loss);
tx_burst_all(options.portid,
tinfo->txqid, &tx_bufs[0],
1);
options.s_state.store(
STATE_FIN);
ntr(NTR_DEP_USER1,
NTR_LEVEL_DEBUG,
"pkt_loop <thread %d>: sent FIN_ACK to cat. QPS = %d.\n",
tinfo->id, qps);
} else {
ntr(NTR_DEP_USER1,
NTR_LEVEL_WARNING,
"pkt_loop <thread %d>: invalid FIN packet from a different cat.\n",
tinfo->id);
}
break;
default:
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
"pkt_loop: ignoring packet %p with unknown type %d.\n",
(void *)rx_bufs[i], type);
}
rte_pktmbuf_free(rx_bufs[i]);
}
}
// dequeue receved epochs
struct epoch_info *einfo;
tinfo->mtx.lock();
while (!tinfo->recved_epochs.empty()) {
// only dequeue, process later
einfo = tinfo->recved_epochs.front();
tinfo->recved_epochs.pop_front();
// XXX: might call into the allocator
// otherwise we need to have an array and do batching
// => complex code and don't think it's worth it
recved_epochs.push_back(einfo);
}
tinfo->mtx.unlock();
if (!recved_epochs.empty())
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
"pkt_loop <thread %d>: dequeued %lu received epochs\n",
tinfo->id, recved_epochs.size());
// process epochs
while (!recved_epochs.empty()) {
einfo = recved_epochs.back();
recved_epochs.pop_back();
auto it = sent_epochs.find(einfo->epoch);
if (it != sent_epochs.end()) {
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
"pkt_loop <thread %d>: received epoch 0x%x\n",
tinfo->id, epoch_get_epoch(einfo->epoch));
if (einfo->ts > last_recv_ts) {
last_recv_ts = einfo->ts;
}
delete it->second;
sent_epochs.erase(it);
tinfo->recved_pkts.fetch_add(1);
} else {
// we recved an epoch we never sent
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
"pkt_loop <thread %d>: received epoch 0x%x but never sent it. Packet loss?\n",
tinfo->id, einfo->epoch);
}
delete einfo;
}
// handle packet loss
for (auto it = sent_epochs.begin(); it != sent_epochs.end();) {
einfo = it->second;
if (now - einfo->ts >
options.pkt_loss_delay_ms * MS2NS) {
// timed out
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
"pkt_loop <thread %d>: epoch 0x%x is lost after not receiving for too long\n",
tinfo->id, einfo->epoch);
delete it->second;
it = sent_epochs.erase(it);
tinfo->lost_pkts.fetch_add(1);
} else {
++it;
}
}
// check to send the next packet
uint32_t total_send = 0;
while (now >= next_ts && sent_epochs.size() < options.depth &&
total_send < BURST_SIZE) {
struct pkt_payload_load *pld_load;
struct pkt_hdr *pkt_data;
next_ts += (int)(tinfo->ia_gen->generate() * S2NS);
// change dst port for every packet for RSS
srv_cspec.dst_port = dst_port_gen.next();
srv_cspec.src_port = src_port_gen.next();
if (alloc_pkt_hdr(mempool_get(tinfo->socket_id),
PKT_TYPE_LOAD, &srv_cspec, options.pkt_pad_sz,
&tx_bufs[total_send], &pkt_data) != 0) {
rte_exit(EXIT_FAILURE,
"failed to allocate pkt hdr\n");
}
pld_load = (struct pkt_payload_load *)pkt_data->payload;
pld_load->type = rte_cpu_to_be_32(options.workload_type);
pld_load->arg0 = rte_cpu_to_be_32((uint32_t)tinfo->load_gen0->generate());
pld_load->arg1 = rte_cpu_to_be_32((uint32_t)tinfo->load_gen1->generate());
unsigned int epoch = epoch_mk(tinfo->id, cur_epoch);
pld_load->epoch = rte_cpu_to_be_32(epoch);
cur_epoch++;
einfo = new struct epoch_info;
einfo->epoch = epoch;
einfo->ts = now;
sent_epochs.insert({ epoch, einfo });
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
"pkt_loop <thread %d>: sending packet %p with epoch 0x%x\n",
tinfo->id, (void *)tx_bufs[total_send], epoch);
total_send++;
}
tx_burst_all(options.portid, tinfo->txqid, tx_bufs, total_send);
// check rage quit only when we have sent a packet
if (last_recv_ts == 0) {
last_recv_ts = topo_uptime_ns();
}
if (topo_uptime_ns() >
options.rage_quit_time * MS2NS + last_recv_ts) {
rte_exit(EXIT_FAILURE,
"rat: thread %d waiting too long for resp. I F QUIT!\n",
tinfo->id);
}
}
// clean up
for (auto it = sent_epochs.begin(); it != sent_epochs.end();) {
delete it->second;
++it;
}
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
"pkt_loop <thread %d>: exiting loop...\n", tinfo->id);
}
static int
locore_main(void *tif)
{
auto tinfo = (struct thread_info *)tif;
uint32_t core_id = rte_lcore_id();
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO,
"locore_main <thread %d>: running on core %d rxqid %d txqid %d...\n", tinfo->id,
core_id, tinfo->rxqid, tinfo->txqid);
if (rte_eth_dev_socket_id(options.portid) > 0 &&
rte_eth_dev_socket_id(options.portid) != (int)rte_socket_id()) {
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING,
"locore_main <thread %d>: WARNING, port %d is on remote NUMA node to "
"polling thread.\n\tPerformance will "
"not be optimal.\n",
tinfo->id, options.portid);
}
if (options.slave_mode == 1) {
// perform rat protocol
proto_loop(tinfo);
}
// wait for the primary thread sending SYNC_ACK
while (options.s_state.load() != STATE_RUNNING) {
}
// store the current timestamp
options.s_ts_begin.store(topo_uptime_ns());
pkt_loop(tinfo);
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "locore_main <thread %d>: exited\n",
tinfo->id);
return 0;
}
static void
dump_options()
{
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO,
"Configuration:\n"
" verbosity = +%d\n"
" run time = %d\n"
" num threads = %d\n"
" rage quit time = %ul\n"
" slave mode = %d\n"
" interarrival dist = %s\n"
" workload type = %d\n"
" workload arg0 = %s\n"
" workload arg1 = %s\n"
" qps = %d\n"
" host IP = 0x%x\n"
" depth = %u\n"
" packet loss time threshold = %u\n"
" jumbo frame = %d\n"
" packet pad size = %d\n"
" portid = %d\n",
ntr_get_level(NTR_DEP_USER1) - NTR_LEVEL_WARNING, options.run_time,
options.s_num_threads, options.rage_quit_time, options.slave_mode,
options.ia_gen, options.workload_type, options.load_gen[0], options.load_gen[1], options.target_qps,
options.s_host_spec.ip, options.depth, options.pkt_loss_delay_ms,
options.jumbo_frame_enabled, options.pkt_pad_sz, options.portid);
}
static void
usage()
{
fprintf(stdout,
"Usage:\n"
" -v(vv): verbose mode\n"
" -h: display the information\n"
" -t: run time\n"
" -s: server net spec\n"
" -S: slave(rat) mode\n"
" -A: affinity mask\n"
" -i: inter-arrival time distribution\n"
" -w: workload type\n"
" -w (repeated): workload arg0 distribution\n"
" -w (repeated): workload arg1 distribution\n"
" -r: rage quit time (in ms)\n"
" -q: target QPS\n"
" -H: host net spec\n"
" -D: max number of packets in flight\n"
" -l: packet loss time threshold\n"
" -J: enable jumbo frame\n"
" -P: pad load packets to this size\n"
" -p: portid\n");
}
int
main(int argc, char *argv[])
{
struct thread_info *tinfo;
bool has_host_spec = false;
ntr_init();
// init dpdk
int ret = rte_eal_init(argc, argv);
if (ret < 0) {
rte_exit(EXIT_FAILURE, "rte_eal_init failed!\n");
}
argc -= ret;
argv += ret;
// set warning level
ntr_set_level(NTR_DEP_USER1, NTR_LEVEL_WARNING);
{
int c;
int num_of_ws = 0;
// parse arguments
while ((c = getopt(argc, argv,
"vht:s:SA:i:w:r:q:H:D:l:JP:p:")) != -1) {
switch (c) {
case 'v':
ntr_set_level(NTR_DEP_USER1,
ntr_get_level(NTR_DEP_USER1) + 1);
break;
case 'h':
usage();
rte_exit(EXIT_SUCCESS, "\n");
case 't':
options.run_time = strtol(optarg, nullptr, 10);
break;
case 's':
if (str_to_netspec(optarg,
&options.server_spec) != 0) {
rte_exit(EXIT_FAILURE,
"invalid server net spec\n");
}
break;
case 'S':
options.slave_mode = 1;
options.s_state =
STATE_SYNC; // set state to wait for SYNC
break;
case 'A':
cpulist_to_cpuset(optarg, &options.cpu_set);
options.s_num_threads = CPU_COUNT(
&options.cpu_set);
if (options.s_num_threads == 0) {
rte_exit(EXIT_FAILURE,
"invalid cpu mask %s\n", optarg);
}
break;
case 'i':
strncpy(options.ia_gen, optarg,
sizeof(options.ia_gen) - 1);
break;
case 'w':
if (num_of_ws == 0) {
options.workload_type = strtol(optarg, NULL, 10);
if (options.workload_type >= LOAD_TYPE_MAX) {
rte_exit(EXIT_FAILURE,
"invalid workload type %s\n", optarg);
}
} else if (num_of_ws <= WORKLOAD_MAX_ARGS) {
strncpy(options.load_gen[num_of_ws - 1], optarg, 255);
}
num_of_ws++;
break;
case 'r':
options.rage_quit_time = strtol(optarg, nullptr,
10);
break;
case 'q':
options.target_qps = strtol(optarg, nullptr,
10);
break;
case 'H':
has_host_spec = true;
if (str_to_netspec(optarg,
&options.s_host_spec) != 0) {
rte_exit(EXIT_FAILURE,
"invalid host net spec.\n");
}
break;
case 'D':
options.depth = strtol(optarg, nullptr, 10);
if (options.depth == 0) {
options.depth = UINT32_MAX;
}
break;
case 'l':
options.pkt_loss_delay_ms = strtol(optarg,
nullptr, 10);
if (options.pkt_loss_delay_ms == 0) {
options.pkt_loss_delay_ms = UINT32_MAX;
}
break;
case 'J':
options.jumbo_frame_enabled = true;
options.port_mtu = MAX_JUMBO_MTU;
break;
case 'P':
options.pkt_pad_sz = strtol(optarg, nullptr,
10);
break;
case 'p':
options.portid = strtol(optarg, nullptr, 10);
break;
default:
usage();
rte_exit(EXIT_FAILURE, "unknown argument: %c\n",
c);
}
}
}
if (options.pkt_pad_sz != 0 &&
options.pkt_pad_sz > mtu_to_pkt_size(options.port_mtu)) {
rte_exit(EXIT_FAILURE, "pkt_pad_sz is too large for mtu %d\n",
options.port_mtu);
}
if (!has_host_spec) {
rte_exit(EXIT_FAILURE, "Must specify host IP.\n");
}
// init libtopo
if (topo_init(ntr_get_level(NTR_DEP_USER1) - NTR_LEVEL_WARNING) !=
0) {
rte_exit(EXIT_FAILURE, "libtopo init failed!\n");
}
if (nms_init(ntr_get_level(NTR_DEP_USER1) - NTR_LEVEL_WARNING) !=
0) {
rte_exit(EXIT_FAILURE, "libnms init failed!\n");
}
dump_options();
// configure memory and port
struct port_conf pconf;
struct device_conf dconf;
struct mem_conf mconf;
portconf_get(options.portid, &pconf);
if (!pconf.timesync) {
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING,
"main: timesync disabled. hw timestamp unavailable.\n ");
}
dconf.mtu = options.port_mtu;
CPU_COPY(&options.cpu_set, &dconf.core_affinity);
dconf.portid = options.portid;
dconf.rss_hf = pconf.rss_hf;
dconf.rx_offloads = pconf.rxoffload;
dconf.tx_offloads = pconf.txoffload;
dconf.timesync = pconf.timesync;
dconf.rx_fn = nullptr;
dconf.rx_user = nullptr;
dconf.rx_ring_sz = 2048;
dconf.tx_fn = nullptr;
dconf.tx_user = nullptr;
dconf.tx_ring_sz = 2048;
mconf.cache_size = 512;
mconf.priv_size = 0;
mconf.num_elements = (dconf.rx_ring_sz + dconf.tx_ring_sz) *
rte_lcore_count() / rte_socket_count();
mconf.data_room_size = RTE_MBUF_DEFAULT_BUF_SIZE + MAX_JUMBO_MTU -
MAX_STANDARD_MTU;
mconf.max_pools = -1;
dpdk_init(&dconf, &mconf);
if (rte_eth_macaddr_get(options.portid,
&options.s_host_spec.mac_addr) != 0) {
rte_exit(EXIT_FAILURE, "cannot get mac address of port %d\n",
options.portid);
}
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO,
"Configured port %d with mac addr %x:%x:%x:%x:%x:%x\n",
options.portid, options.s_host_spec.mac_addr.addr_bytes[0],
options.s_host_spec.mac_addr.addr_bytes[1],
options.s_host_spec.mac_addr.addr_bytes[2],
options.s_host_spec.mac_addr.addr_bytes[3],
options.s_host_spec.mac_addr.addr_bytes[4],
options.s_host_spec.mac_addr.addr_bytes[5]);
unsigned int cpuset_idx = CPU_FFS(&options.cpu_set);
unsigned int tid = 0;
while (cpuset_idx != 0) {
unsigned int lcore_id = cpuset_idx - 1;
tinfo = new thread_info;
tinfo->ia_gen = createGenerator(options.ia_gen);
tinfo->load_gen0 = createGenerator(options.load_gen[0]);
tinfo->load_gen1 = createGenerator(options.load_gen[1]);
if (tinfo->ia_gen == nullptr || tinfo->load_gen0 == nullptr || tinfo->load_gen1 == nullptr) {
rte_exit(EXIT_FAILURE,
"invalid ia_gen or ld_gen string\n");
}
tinfo->ia_gen->set_lambda((double)options.target_qps /
(double)(options.s_num_threads));
tinfo->id = tid;
tinfo->lcore_id = lcore_id;
tinfo->socket_id = rte_lcore_to_socket_id(lcore_id);
tinfo->rxqid = tid;
tinfo->txqid = tid;
options.s_thr_info.push_back(tinfo);
tid++;
CPU_CLR(lcore_id, &options.cpu_set);
cpuset_idx = CPU_FFS(&options.cpu_set);
}
sleep(INIT_DELAY);
for (unsigned int i = 0; i < options.s_num_threads; i++) {
tinfo = options.s_thr_info.at(i);
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO,
"main: launching thread %d on locore %d\n", tinfo->id,
tinfo->lcore_id);
if (rte_eal_remote_launch(locore_main,
(void *)options.s_thr_info.at(i),
tinfo->lcore_id) != 0) {
rte_exit(EXIT_FAILURE,
"failed to launch function on locore %d\n",
tinfo->lcore_id);
}
}
// poor man's timer
uint32_t second = 0;
// this loop exit is signaled by SYNC_FIN in slave mode and by itself in
// non slave mode
while (options.s_state.load() != STATE_FIN) {
if (options.slave_mode != 1) {
if (second >= options.run_time) {
options.s_state.store(STATE_FIN);
break;
}
usleep(1 * S2US);
second++;
}
}
for (unsigned int i = 0; i < options.s_num_threads; i++) {
tinfo = options.s_thr_info.at(i);
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO,
"main: waiting for locore %d...\n", tinfo->lcore_id);
if (rte_eal_wait_lcore(tinfo->lcore_id) != 0) {
rte_exit(EXIT_FAILURE, "failed to wait for locore %d\n",
tinfo->lcore_id);
}
}
uint32_t qps;
uint32_t total_recv;
uint32_t total_loss;
calc_stats(topo_uptime_ns(), &qps, &total_recv, &total_loss);
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "qps = %d, recv = %d, loss = %d\n",
qps, total_recv, total_loss);
for (auto each : options.s_thr_info) {
delete each->load_gen0;
delete each->load_gen1;
delete each->ia_gen;
delete each;
}
// clean up
dpdk_cleanup(&dconf);
return 0;
}

544
rat/rat.cc Normal file
View File

@ -0,0 +1,544 @@
#include <cstdio>
#include <ctime>
#include <netinet/in.h>
#include <rte_config.h>
#include <rte_common.h>
#include <rte_eal.h>
#include <rte_ethdev.h>
#include <rte_cycles.h>
#include <rte_lcore.h>
#include <rte_mbuf.h>
#include <rte_ether.h>
#include <rte_launch.h>
#include <rte_log.h>
#include <rte_byteorder.h>
#include <rte_ip.h>
#include <atomic>
#include <vector>
#include <fstream>
#include <unistd.h>
#include "nm.h"
#include "gen.h"
#include "ntr.h"
#include "pkt.h"
#include "util.h"
constexpr static unsigned int MBUF_MAX_COUNT = 16384;
constexpr static unsigned int MBUF_CACHE_SIZE = 512;
constexpr static unsigned int RX_RING_SIZE = 4096;
constexpr static unsigned int TX_RING_SIZE = 4096;
constexpr static unsigned int BURST_SIZE = 32;
constexpr static unsigned int MODE_MASTER = 0;
constexpr static unsigned int MODE_CLIENT = 1;
static const struct rte_eth_conf port_conf_default{};
struct datapt {
uint32_t epoch;
uint32_t valid;
uint64_t clt_hw_tx;
uint64_t clt_sw_tx;
uint64_t clt_hw_rx;
uint64_t clt_sw_rx;
uint64_t srv_hw_tx;
uint64_t srv_sw_tx;
uint64_t srv_hw_rx;
uint64_t srv_sw_rx;
};
struct thread_info {
unsigned int id;
unsigned int rxqid{0};
unsigned int txqid{0};
std::vector<struct datapt *> data;
struct datapt * last_datapt{nullptr};
unsigned int tot_send{0};
unsigned int tot_recv{0};
Generator * ia_gen;
};
struct options_t {
unsigned int run_time{5};
unsigned int warmup_time{0};
unsigned int num_threads{1};
unsigned int mode{MODE_MASTER};
char output[256] = "output.txt";
char ia_gen[256] = "fixed:1";
struct rte_ether_addr server_mac;
uint64_t cpu_mask;
// states
struct rte_mempool * mbuf_pool;
struct rte_ether_addr s_host_mac;
uint16_t s_portid;
std::vector<struct thread_info *> s_thr_info;
std::atomic<uint32_t> s_epoch;
std::atomic<bool> s_stop {false};
std::atomic<uint32_t> s_record {0};
};
static struct options_t options;
// static struct thread_info * get_thread_info(int qid)
// {
// return options.s_thr_info.at(qid);
// }
static int
locore_main(void * tif)
{
struct thread_info * tinfo = (struct thread_info *)tif;
struct rte_mbuf *tx_buf;
struct rte_mbuf *rx_bufs[BURST_SIZE];
struct pkt_hdr *pkt_data;
uint32_t core_id = rte_lcore_id();
int32_t ret;
bool read_tx = true;
bool recv_stat = true;
bool recv_resp = true;
uint64_t next_ts;
// XXX: check link status instead
sleep(1);
if (rte_eth_dev_socket_id(options.s_portid) > 0 && rte_eth_dev_socket_id(options.s_portid) != (int)rte_socket_id()) {
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING, "locore_main: WARNING, port %d is on remote NUMA node to "
"polling thread.\n\tPerformance will "
"not be optimal.\n", options.s_portid);
}
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "locore_main: core %d running thread %d...\n", core_id, tinfo->id);
next_ts = get_time_us();
while(!options.s_stop.load()) {
uint64_t now = get_time_us();
// always pop incoming packets
const uint16_t nb_rx = rte_eth_rx_burst(options.s_portid, 0, rx_bufs, BURST_SIZE);
if (nb_rx > 0) {
for (int i = 0; i < nb_rx; i++) {
struct pkt_hdr * each = check_valid_packet(rx_bufs[i]);
if (each == NULL) {
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "locore_main: ignoring invalid packet %p.\n", (void*)rx_bufs[i]);
rte_pktmbuf_free(rx_bufs[i]);
continue;
}
uint16_t type = rte_be_to_cpu_16(each->type);
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "locore_main: received packet %p type %d.\n", (void*)rx_bufs[i], type);
switch (type) {
struct pkt_payload_epoch * pld_epoch;
struct pkt_payload_stat * pld_stat;
uint32_t epoch;
case PKT_TYPE_PROBE_RESP:
pld_epoch = (struct pkt_payload_epoch *)each->payload;
epoch = rte_be_to_cpu_32(pld_epoch->epoch);
if (tinfo->last_datapt == nullptr || epoch != tinfo->last_datapt->epoch) {
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING, "locore_main: packet %p epoch %d doesn't match datapt %d.\n", (void*)rx_bufs[i], epoch, tinfo->last_datapt->epoch);
break;
}
tinfo->tot_recv++;
recv_resp = true;
break;
case PKT_TYPE_STAT:
pld_stat = (struct pkt_payload_stat *)each->payload;
epoch = rte_be_to_cpu_32(pld_stat->epoch);
if (tinfo->last_datapt == nullptr || epoch != tinfo->last_datapt->epoch) {
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING, "locore_main: packet %p epoch %d doesn't match datapt %d.\n", (void*)rx_bufs[i], epoch, tinfo->last_datapt->epoch);
break;
}
tinfo->last_datapt->srv_hw_tx = rte_be_to_cpu_64(pld_stat->hw_tx);
tinfo->last_datapt->srv_hw_rx = rte_be_to_cpu_64(pld_stat->hw_rx);
tinfo->last_datapt->srv_sw_tx = rte_be_to_cpu_64(pld_stat->sw_tx);
tinfo->last_datapt->srv_sw_rx = rte_be_to_cpu_64(pld_stat->sw_rx);
tinfo->tot_recv++;
recv_stat = true;
break;
default:
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING, "locore_main: ignoring packet %p with unknown type %d.\n", (void*)rx_bufs[i], type);
rte_pktmbuf_free(rx_bufs[i]);
continue;
}
rte_pktmbuf_free(rx_bufs[i]);
}
}
if (read_tx && recv_stat & recv_resp) {
// if we have all the data
if (tinfo->last_datapt != nullptr) {
// push the data to the queue if we haven't done so already
tinfo->data.push_back(tinfo->last_datapt);
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "locore_main: datapt for epoch %d dump:\n" \
" Valid: %d\n"
" client TX HW: %llu\n" \
" client TX SW: %llu\n" \
" client RX HW: %llu\n" \
" client RX SW: %llu\n" \
" server TX HW: %llu\n" \
" server TX SW: %llu\n" \
" server RX HW: %llu\n" \
" server RX SW: %llu\n\n",
tinfo->last_datapt->epoch,
tinfo->last_datapt->valid,
tinfo->last_datapt->clt_hw_tx,
tinfo->last_datapt->clt_sw_tx,
tinfo->last_datapt->clt_hw_rx,
tinfo->last_datapt->clt_sw_rx,
tinfo->last_datapt->srv_hw_tx,
tinfo->last_datapt->srv_sw_tx,
tinfo->last_datapt->srv_hw_rx,
tinfo->last_datapt->srv_sw_rx);
tinfo->last_datapt = nullptr;
}
if (now >= next_ts) {
struct pkt_payload_epoch * pld_epoch;
uint32_t epoch;
next_ts += (int)(tinfo->ia_gen->generate() * 1000000.0);
// generate the packet
tx_buf = rte_pktmbuf_alloc(options.mbuf_pool);
if (tx_buf == NULL) {
rte_exit(EXIT_FAILURE, "cannot allocate tx_buf\n");
}
pkt_data = construct_pkt_hdr(tx_buf, PKT_TYPE_PROBE,
&options.s_host_mac, &options.server_mac);
if (pkt_data == NULL) {
rte_exit(EXIT_FAILURE, "cannot allocate space for packet_data in mbuf\n");
}
epoch = options.s_epoch.fetch_add(1);
pld_epoch = (struct pkt_payload_epoch *)pkt_data->payload;
pld_epoch->epoch = rte_cpu_to_be_32(epoch);
tinfo->last_datapt = new struct datapt;
tinfo->last_datapt->epoch = epoch;
tinfo->last_datapt->valid = options.s_record.load();
read_tx = false;
recv_resp = false;
recv_stat = false;
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "locore_main: sending packet %p with epoch %d\n", (void*)tx_buf, epoch);
const uint16_t nb_tx = rte_eth_tx_burst(options.s_portid, tinfo->txqid, &tx_buf, 1);
if (nb_tx != 1) {
rte_exit(EXIT_FAILURE, "failed to send packet 0x%p, epoch %d\n", (void*)tx_buf, epoch);
}
}
}
if (!read_tx) {
struct timespec ts;
if ((ret = rte_eth_timesync_read_tx_timestamp(options.s_portid, &ts)) == 0) {
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "locore_main: read hw tx timestamp %lld.\n", ts.tv_nsec + ts.tv_sec * S2NS);
tinfo->last_datapt->clt_hw_tx = ts.tv_nsec + ts.tv_sec * S2NS;
read_tx = true;
}
}
}
rte_pktmbuf_free(tx_buf);
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "locore_main: core %d successfully stopped.\n", core_id);
return 0;
}
static int
port_init(uint16_t portid, struct rte_mempool *mbuf_pool)
{
struct rte_eth_dev_info dev_info;
struct rte_eth_conf port_conf = port_conf_default;
struct rte_eth_txconf txconf;
struct rte_eth_rxconf rxconf;
uint16_t nb_rxd = RX_RING_SIZE;
uint16_t nb_txd = TX_RING_SIZE;
port_conf.rxmode.max_rx_pkt_len = RTE_ETHER_MAX_LEN;
if(!rte_eth_dev_is_valid_port(portid)) {
return -1;
}
int ret = rte_eth_dev_info_get(portid, &dev_info);
if (ret != 0) {
return ret;
}
port_conf.rxmode.max_rx_pkt_len = RTE_ETHER_MAX_LEN;
port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_UDP_CKSUM;
port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_IPV4_CKSUM;
port_conf.txmode.offloads |= DEV_TX_OFFLOAD_UDP_CKSUM;
port_conf.txmode.offloads |= DEV_TX_OFFLOAD_IPV4_CKSUM;
port_conf.txmode.offloads |= DEV_TX_OFFLOAD_MBUF_FAST_FREE;
/* Configure the Ethernet device. */
ret = rte_eth_dev_configure(portid, options.num_threads, options.num_threads, &port_conf);
if (ret != 0)
return ret;
ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd, &nb_txd);
if (ret != 0)
return ret;
/* Allocate and set up 1 RX queue per thread . */
rxconf = dev_info.default_rxconf;
rxconf.offloads = port_conf.rxmode.offloads;
for (uint32_t i = 0; i < options.num_threads; i++) {
ret = rte_eth_rx_queue_setup(portid, i, nb_rxd, rte_eth_dev_socket_id(portid), &rxconf, mbuf_pool);
if (ret < 0)
return ret;
}
txconf = dev_info.default_txconf;
txconf.offloads = port_conf.txmode.offloads;
/* Allocate and set up 1 TX queue per Ethernet port. */
for (uint32_t i = 0; i < options.num_threads; i++) {
ret = rte_eth_tx_queue_setup(portid, i, nb_txd, rte_eth_dev_socket_id(portid), &txconf);
if (ret < 0)
return ret;
}
ret = rte_eth_dev_start(portid);
if (ret < 0)
return ret;
/* Display the port MAC address. */
struct rte_ether_addr addr;
ret = rte_eth_macaddr_get(portid, &addr);
if (ret != 0)
return ret;
/* Enable RX in promiscuous mode for the Ethernet device. */
ret = rte_eth_promiscuous_enable(portid);
if (ret != 0)
return ret;
return 0;
}
static void dump_options()
{
fprintf(stdout, "Configuration:\n" \
" run time = %d\n" \
" warmup time = %d\n" \
" output file = %s\n" \
" server MAC = %x:%x:%x:%x:%x:%x\n",
options.run_time,
options.warmup_time,
options.output,
options.server_mac.addr_bytes[0],
options.server_mac.addr_bytes[1],
options.server_mac.addr_bytes[2],
options.server_mac.addr_bytes[3],
options.server_mac.addr_bytes[4],
options.server_mac.addr_bytes[5]);
}
static void usage()
{
fprintf(stdout,
"Usage:\n " \
" -v(vv): verbose mode\n" \
" -h: display the information\n" \
" -o: output filename\n" \
" -t: run time\n" \
" -T: warmup time\n" \
" -s: server's mac\n" \
" -A: affinity mask\n" \
" -a: number of threads\n" \
" -C: client mode\n"
" -i: inter-arrival time distribution\n\n");
}
// static void int_handler(int)
// {
// //rte_exit(EXIT_SUCCESS, "Caught SIGINT, exiting...\n");
// }
int main(int argc, char* argv[])
{
unsigned int nb_ports;
struct rte_mempool *mbuf_pool;
std::ofstream log_file;
struct thread_info *tinfo;
ntr_init();
if (nm_init() != 0)
rte_exit(EXIT_FAILURE, "failed to init libnm\n");
// signal(SIGINT, int_handler);
// init dpdk
int ret = rte_eal_init(argc, argv);
if (ret < 0) {
rte_exit(EXIT_FAILURE, "rte_eal_init failed!\n");
}
argc -= ret;
argv += ret;
// set warning level
ntr_set_level(NTR_DEP_USER1, NTR_LEVEL_WARNING);
{
int c;
// parse arguments
while((c = getopt(argc, argv, "hvo:t:T:s:A:a:Ci:")) != -1) {
switch (c) {
case 'v':
ntr_set_level(NTR_DEP_USER1, ntr_get_level(NTR_DEP_USER1) + 1);
break;
case 's':
if (rte_ether_unformat_addr(optarg, &options.server_mac) == -1) {
rte_exit(EXIT_FAILURE, "cannot parse %s as mac address.\n", optarg);
}
break;
case 't':
options.run_time = atoi(optarg);
break;
case 'T':
options.warmup_time = atoi(optarg);
break;
case 'h':
usage();
rte_exit(EXIT_SUCCESS, "success\n");
case 'o':
strncpy(options.output, optarg, sizeof(options.output) - 1);
break;
case 'A':
options.cpu_mask = atoll(optarg);
break;
case 'a':
options.num_threads = atoi(optarg);
break;
case 'C':
options.mode = MODE_CLIENT;
break;
case 'i':
strncpy(options.ia_gen, optarg, sizeof(options.ia_gen) - 1);
break;
default:
usage();
rte_exit(EXIT_FAILURE, "unknown argument: %c\n", c);
break;
}
}
}
// open log file for writing
if (options.mode == MODE_MASTER) {
log_file.open(options.output, std::ofstream::out);
if (!log_file) {
rte_exit(EXIT_FAILURE, "failed to open log file %s\n", options.output);
}
}
nb_ports = rte_eth_dev_count_avail();
if (nb_ports == 0) {
rte_exit(EXIT_FAILURE, "number of ports must be > 0\n");
}
uint16_t portid = rte_eth_find_next(0);
if (portid == RTE_MAX_ETHPORTS) {
rte_exit(EXIT_FAILURE, "cannot find an available port\n");
}
options.s_portid = portid;
// create a mbuf memory pool on the socket
mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", MBUF_MAX_COUNT, MBUF_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_eth_dev_socket_id(options.s_portid));
if (mbuf_pool == nullptr) {
rte_exit(EXIT_FAILURE, "cannot create mbuf pool\n");
}
options.mbuf_pool = mbuf_pool;
for(int i = 0; i < 1; i++) {
tinfo = new thread_info;
tinfo->id = i;
tinfo->ia_gen = createGenerator(options.ia_gen);
options.s_thr_info.push_back(tinfo);
}
if (port_init(portid, mbuf_pool) != 0) {
rte_exit(EXIT_FAILURE, "cannot init port %d\n", portid);
}
if (rte_eth_macaddr_get(portid, &options.s_host_mac) != 0) {
rte_exit(EXIT_FAILURE, "cannot get mac address of port %d\n", portid);
}
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "Configured port %d with mac addr %x:%x:%x:%x:%x:%x\n", portid,
options.s_host_mac.addr_bytes[0],
options.s_host_mac.addr_bytes[1],
options.s_host_mac.addr_bytes[2],
options.s_host_mac.addr_bytes[3],
options.s_host_mac.addr_bytes[4],
options.s_host_mac.addr_bytes[5]);
dump_options();
sleep(1);
uint16_t core_id = rte_get_next_lcore(0, true, false);
if (rte_eal_remote_launch(locore_main, options.s_thr_info.at(0), core_id) != 0) {
rte_exit(EXIT_FAILURE, "failed to launch function on locore\n");
}
// poor man's timer
// XXX: use kqueue instead
struct timespec ts;
ts.tv_sec = 1;
ts.tv_nsec = 0;
uint32_t second = 0;
while(true) {
if (second >= options.warmup_time) {
options.s_record.store(1);
}
if (second >= options.run_time + options.warmup_time) {
options.s_stop.store(true);
break;
}
clock_nanosleep(CLOCK_REALTIME, 0, &ts, NULL);
second++;
}
if (rte_eal_wait_lcore(core_id) < 0)
rte_exit(EXIT_FAILURE, "failed to wait for job completion\n");
// dump stats
if (options.mode == MODE_MASTER) {
thread_info * master_thrd = options.s_thr_info.at(0);
for (auto it : master_thrd->data) {
if (it->valid) {
log_file << it->clt_sw_rx << ',' << it->clt_sw_tx << ','
<< it->clt_hw_rx << ',' << it->clt_hw_tx << ','
<< it->srv_sw_rx << ',' << it->srv_sw_tx << ','
<< it->srv_hw_rx << ',' << it->srv_hw_tx << std::endl;
}
}
}
log_file.close();
// clean up
rte_eth_dev_stop(portid);
rte_eth_dev_close(portid);
return 0;
}

View File

@ -1,50 +0,0 @@
import os
import sys
import getopt
import subprocess
options = getopt.getopt(sys.argv[1:], 'b:s:d:p:')[0]
base=0
stride=2
num = 0
port = 0
for opt, arg in options:
if opt == '-b':
base = int(arg)
elif opt == '-s':
stride = int(arg)
elif opt == '-d':
num = int(arg)
elif opt == '-p':
port = int(arg)
result = subprocess.run("sysctl -a", shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
lines = result.stdout.decode().split('\n')
cclines : list[str] = []
for line in lines:
if ("irq" in line) and (f"t6nex{num}" in line) and (f"{port}a" in line):
cclines.append(line)
if len(cclines) == 0:
print(f"No t6nex {num}a lines from sysctl.\n")
exit(1)
irqs = []
for line in cclines:
eles = line.split(' ')
irq = eles[0]
if (irq.startswith("irq") and irq.endswith(":")):
irq = irq[3:-1]
irqs.append(int(irq))
else:
print(f"Unknown line format: f{line}")
print(f"Detected {len(irqs)} irqs:\n{str(irqs)}")
for irq in irqs:
print(f"Setting irq{irq}'s affinity to core {base}...")
subprocess.run(f"cpuset -l {base} -x {irq}", check=True, shell=True)
base = base + stride
exit(0)

38
scripts/compile.sh Executable file
View File

@ -0,0 +1,38 @@
#!/bin/sh
test_dir="/numam.d"
root=".."
servers="skylake2.rcs.uwaterloo.ca skylake3.rcs.uwaterloo.ca"
rsync_flags="-vchr"
ssh_args="-o StrictHostKeyChecking=no -p77"
user=$1
if [ -z $user ]
then
user=$(whoami)
fi
echo "USER: $user"
compile() {
# separate these functions because we might change kernel (reboot) without needing to recompile
echo "====================$1===================="
echo "Syncing directories..."
ssh $(echo $ssh_args $user@$1) "sudo mkdir -p $test_dir"
ssh $(echo $ssh_args $user@$1) "sudo chmod 777 $test_dir"
rsync $(echo $rsync_flags) -e 'ssh -p 77' $root/ $user@$1:$test_dir/
echo "Compiling..."
ssh $(echo $ssh_args $user@$1) "mkdir -p $test_dir/build; cd $test_dir/build; cmake ../; make clean all -j8" &
wait
echo "$1 Done."
echo ""
}
i=0
for server in $servers
do
i=$(expr $i + 1)
compile "$server" &
done
wait

View File

@ -1,9 +0,0 @@
#!/bin/sh
scp -P77 mount.sh oscar@icelake1-int.rcs.uwaterloo.ca:~/
scp -P77 mount_small.sh oscar@icelake1-int.rcs.uwaterloo.ca:~/
scp -P77 mount.sh oscar@milan1-int.rcs.uwaterloo.ca:~/
scp -P77 mount_small.sh oscar@milan1-int.rcs.uwaterloo.ca:~/
scp -P77 mount.sh oscar@icelake2-int.rcs.uwaterloo.ca:~/
scp -P77 mount_small.sh oscar@icelake2-int.rcs.uwaterloo.ca:~/
scp -P77 mount.sh oscar@milan2-int.rcs.uwaterloo.ca:~/
scp -P77 mount_small.sh oscar@milan2-int.rcs.uwaterloo.ca:~/

View File

@ -1,230 +0,0 @@
from cgi import test
from site import abs_paths
import subprocess as sp
import time
import select
import os
import datetime
import pwd
import sys
import getopt
import numpy as np
import re
import libpar as par
import libtc as tc
import libmechspec as mechspec
import netexp
only_max_qps = True
# [[counter names], counting mode (0 = sampling, 1 = counting)]
pmc_counters = [
"",
# [["mem_load_l3_miss_retired.local_dram"], 1],
# [["mem_load_l3_miss_retired.remote_dram"], 1],
# [["mem_load_l3_miss_retired.remote_hitm"], 1],
# [["mem_load_l3_miss_retired.remote_fwd"], 1]
# [["mem_trans_retired.load_latency_gt_8"], 0],
# [["mem_trans_retired.load_latency_gt_16"], 0],
# [["mem_trans_retired.load_latency_gt_32"], 0],
# [["mem_trans_retired.load_latency_gt_64"], 0],
# [["mem_trans_retired.load_latency_gt_128"], 0],
# [["mem_trans_retired.load_latency_gt_256"], 0],
# [["mem_trans_retired.load_latency_gt_512"], 0],
#[["mem_trans_retired.load_latency_gt_8", ""], 0],
]
# pkt_pad
clt_pkt_pads = [
0,
# 256,
# 512,
# 1024,
# 2048,
# 4096,
# 8192
]
clt_pkt_pads_depth = {}
clt_pkt_pads_depth[0] = 8
clt_pkt_pads_depth[256] = 6
clt_pkt_pads_depth[512] = 6
clt_pkt_pads_depth[1024] = 4
clt_pkt_pads_depth[1518] = 4
clt_pkt_pads_depth[2048] = 2
clt_pkt_pads_depth[4096] = 2
clt_pkt_pads_depth[8192] = 1
clt_pkt_pads_depth[9018] = 1
# clt_load
clt_wrkld = [
[0, "fixed:0", "fixed:0"],
# [0, "uniform:1000", "fixed:0"],
# [0, "uniform:100", "fixed:0"],
# [0, "uniform:10", "fixed:0"],
# [1, "uniform:480", "uniform:1024"],
# [1, "uniform:480", "uniform:256"],
# [1, "uniform:480", "uniform:64"]
]
# paths
file_dir = os.path.dirname(os.path.realpath(__file__))
root_dir = os.path.join(file_dir,"..")
# [srv_affinity, OPTIONAL( memgen_affinity, iteration, buffer_size, target_dom )]
server_affinity = [
["1,3,5,7,9,11,13,15,17,19,21,23"],
["25,27,29,31,33,35,37,39,41,43,45,47"],
#["1,3,5,7,9,11,13,15,17,19,21,23", "26,28,30,32,34,36,38,40,42,44,46", -1, 512*1024*1024, 0],
#["25,27,29,31,33,35,37,39,41,43,45,47", "2,4,6,8,10,12,14,16,18,20,22", -1, 512*1024*1024, 1],
# "65,67,69,71,73,75,77,79,81,83,85,87,89,91,93,95,97,99,101,103,105,107,109,111,113,115,117,119,121,123,125,127",
# "1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31,33,35,37,39,41,43,45,47,49,51,53,55,57,59,61,63",
# "1,3,5,7,9,11,13,15",
# "17,19,21,23,25,27,29,31",
# "33,35,37,39,41,43,45,47",
# "49,51,53,55,57,59,61,63"
]
def flush_netresult(conf : netexp.NetExpConf, result : netexp.NetExpResult):
sample_out = tc.get_odir() + "/" + str(result.parser.qps) + ".txt"
with open(sample_out, "w") as f:
f.write(result.sample)
if conf.enable_pmc:
pmc_out = tc.get_odir() + "/" + str(result.parser.qps) + ".pmc"
if conf.pmc_mode != 0:
with open(pmc_out, "w") as f:
f.write(result.pmc_parser.raw)
else:
with open(pmc_out, "wb") as f:
f.write(result.pmc_parser[0])
with open(pmc_out + "_parsed", "w") as g:
g.write(result.pmc_parser[1])
tc.log_print("=== Summary - qps: " + str(result.parser.qps) + " master loss: " + str(float(result.parser.master_loss) / float(result.parser.master_recv + result.parser.master_loss) * 100.00) + "% slave loss: " + str(float(result.parser.slave_loss) / float(result.parser.slave_recv + result.parser.slave_loss) * 100.0) + "%" )
tc.log_print("=== Server HW:")
tc.log_print(par.mutilate_data.build_mut_output(result.parser.srv_hwlat, [result.parser.qps]) + "\n")
tc.log_print("=== Server SW:")
tc.log_print(par.mutilate_data.build_mut_output(result.parser.srv_swlat, [result.parser.qps]) + "\n")
tc.log_print("=== Client HW:")
tc.log_print(par.mutilate_data.build_mut_output(result.parser.clt_hwlat, [result.parser.qps]) + "\n")
tc.log_print("=== Client SW:")
tc.log_print(par.mutilate_data.build_mut_output(result.parser.clt_swlat, [result.parser.qps]) + "\n")
if conf.enable_pmc:
if conf.pmc_mode != 0:
tc.log_print("=== PMC:")
tc.log_print("counter: " + result.pmc_parser.counter + " count: " + str(result.pmc_parser.count) + " cores: " + str(result.pmc_parser.cores))
def main():
tc.set_ssh_param("-o StrictHostKeyChecking=no -p77")
tc.set_ssh_user("oscar")
output_dirname = "run"
conf = netexp.NetExpConf()
conf.srv_mechspec = mechspec.LAB.SKYLAKE1_10G
conf.clt_mechspecs = [mechspec.LAB.SKYLAKE3_10G, mechspec.LAB.SKYLAKE5_10G]
conf.mst_mechspec = mechspec.LAB.SKYLAKE2_10G
conf.finalize_mechspecs()
conf.root_dir = "/numam.d/build/bin"
# server fixed configs
conf.srv_port = 0
# client fixed configs
conf.clt_ia = "exponential"
conf.clt_affinity = "1,3,5,7,9,11,13,15,17,19,21,23"
conf.clt_port = 0
conf.clt_pkt_loss_lat = 5000
conf.clt_rage_quit_lat = 5000
# master fixed configs
conf.mst_port = 0
conf.mst_warmup = 5
conf.mst_duration = 20
conf.mst_qps = 100
conf.mst_ia = "exponential"
conf.mst_pkt_loss_lat = 5000
conf.mst_pkt_loss_max = 100
conf.mst_affinity = "2"
# pmc stuff
conf.pmc_sampling_rate = 4096
conf.pmc_counting_interval = 0.1
options = getopt.getopt(sys.argv[1:], 'scSD')[0]
for opt, arg in options:
if opt in ('-s'):
netexp.stop_all(conf)
return
elif opt in ('-c'):
conf.enable_client_only=True
elif opt in ('-S'):
netexp.setup(conf, bench = True, dpdk = False)
return
elif opt in ('-D'):
netexp.setup(conf, bench=False, dpdk=True)
return
tc.init("~/results.d/numam_neo/" + output_dirname + "_" + datetime.datetime.now().strftime('%Y%m%d%H%M%S'))
cpcmd = "cp " + __file__ + " " + tc.get_odir() + "/"
tc.log_print(cpcmd)
sp.check_call(cpcmd, shell=True)
for eaff in server_affinity:
conf.srv_affinity = eaff[0]
conf.enable_memgen = False
if len(eaff) > 1:
conf.enable_memgen = True
conf.memgen_affinity = eaff[1]
conf.memgen_iteration = eaff[2]
conf.memgen_size = eaff[3]
conf.memgen_tgtdom = eaff[4]
for epad in clt_pkt_pads:
conf.clt_pkt_pad = 0
conf.clt_pkt_depth = clt_pkt_pads_depth[conf.clt_pkt_pad]
for eload in clt_wrkld:
conf.clt_wrkld = eload[0]
conf.clt_wrkarg0 = eload[1]
conf.clt_wrkarg1 = eload[2]
for epmc in pmc_counters:
conf.enable_pmc = False
if len(epmc) > 0:
conf.enable_pmc = True
conf.pmc_counters = epmc[0]
conf.pmc_mode = epmc[1]
test_name = "affinity" + eaff[0] + "_pad" + str(epad) + "_load" + str(eload[0]) + "," + str(eload[1]) + "," + str(eload[2])
if (conf.enable_memgen):
test_name += "_memload" + str(eaff[1]) + "," + str(eaff[2]) + "," + str(eaff[3]) + "," + str(eaff[4])
if (conf.enable_pmc):
test_name += "_pmc" + str(epmc[1]) + "_" + conf.get_pmc_str()
tc.begin(test_name)
conf.clt_qps = 0
tc.log_print("============ " + test_name + " QPS: MAX ============")
result : netexp.NetExpResult = netexp.run(conf)
flush_netresult(conf, result)
max_qps = result.parser.qps
if conf.enable_client_only:
return
if only_max_qps:
continue
finish = (int)(max_qps - max(conf.mst_qps, 0.01 * max_qps))
step = (int)(finish / 10)
cur_qps = step
while cur_qps <= finish:
tc.log_print("============ " + test_name + " QPS: " + str(cur_qps) + " ============")
conf.clt_qps = cur_qps
result : netexp.NetExpResult = netexp.run(conf)
flush_netresult(result)
cur_qps += step
tc.log_print("")
tc.end()
netexp.stop_all(conf)
main()

View File

@ -1,132 +0,0 @@
#!/usr/bin/env python3.6
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import ticker
import numpy as np
import sys
import re
import os
import json
import libpar as par
import getopt
import math
import concurrent.futures as CF
def process_dir(rootdir):
ret = []
print("Processing directory " + rootdir + " ...")
for subdir in os.listdir(rootdir):
each_dir = os.path.join(rootdir, subdir)
if os.path.isfile(each_dir) and each_dir.endswith(".txt"):
output = None
try:
with open(each_dir, 'r') as f:
if len(f.readlines()) <= 1:
print("Skipping empty file - " + each_dir)
continue
with open(each_dir, 'r') as f:
output = f.read()
parser = par.khat_parser()
parser.parse(output)
print("Processed raw data - " + each_dir)
ret.append(parser)
except:
print("Unrecognized format - " + subdir)
print("")
return ret
marker_map = ["o", "P", "s", "v", "*", "+", "^", "1", "2", "d", "X", "o", "P", "s", "v", "*", "+", "^", "1", "2", "d", "X"]
color_map = ["xkcd:black", "xkcd:red", "xkcd:blue", "xkcd:green", "xkcd:cyan", "xkcd:purple", "xkcd:orange", "xkcd:salmon", "xkcd:lightgreen", "xkcd:indigo", "xkcd:brown", "xkcd:bubblegum", "xkcd:lavender", "xkcd:maroon", "xkcd:fern", "xkcd:sky", "xkcd:orchid", "xkcd:sienna"]
parser_idx_labels = ["srv_hw", "srv_sw", "clt_hw", "clt_sw"]
def add_curve(eax, label : str, qps_arr : [], lat_arr : [], marker : str, color : str):
df_dict = {}
df_dict['qps'] = qps_arr
df_dict['lat'] = lat_arr
df = pd.DataFrame(df_dict)
df = df.sort_values('qps')
eax.plot('qps', 'lat', data = df, label=label, marker=marker, color=color, markersize=8)
# adds curves (avg and 99th percentile) for a specific parser idx
def add_curves(rax, label : str, parsers : [], parser_idx : int, marker : str, color : str):
qps_arr = []
avg_arr = []
p99_arr = []
for parser in parsers:
qps_arr.append(parser.qps)
each_lat_arr = []
each_lat_arr.extend(parser.get_stat_arr(parser_idx))
avg_arr.append(np.mean(each_lat_arr))
p99_arr.append(np.percentile(each_lat_arr, 99))
add_curve(rax[0], label, qps_arr, avg_arr, marker, color)
add_curve(rax[1], label, qps_arr, p99_arr, marker, color)
# generate the graphs for a parser index
def generate_graph(aff_to_parser : {}, parser_idx : int, fn : str):
marker_idx = 0
color_idx = 0
fig, rax = plt.subplots(2, 1)
rax[0].set_yscale("log")
rax[0].set_title("Average")
rax[0].set_xlabel("QPS")
rax[0].set_ylabel("Latency (ns)")
rax[0].xaxis.get_major_formatter().set_scientific(False)
rax[0].yaxis.set_minor_formatter(ticker.ScalarFormatter())
rax[1].set_yscale("log")
rax[1].set_title("99th percentile")
rax[1].set_xlabel("QPS")
rax[1].set_ylabel("Latency (ns)")
rax[1].xaxis.get_major_formatter().set_scientific(False)
rax[1].yaxis.set_minor_formatter(ticker.ScalarFormatter())
print("Generating graph => " + fn + "...")
for aff in aff_to_parser:
# each affinity gets a different marker type
marker_type = marker_map[marker_idx]
color_type = color_map[color_idx]
marker_idx += 1
color_idx += 1
print(" Processing affinity " + aff + "...")
add_curves(rax, aff, aff_to_parser[aff], parser_idx, marker_type, color_type)
rax[0].legend()
rax[1].legend()
fig.set_size_inches(23.4, 16.5)
plt.savefig(fn, dpi=150)
plt.close()
def main():
datdir = None
options = getopt.getopt(sys.argv[1:], 'd:')[0]
for opt, arg in options:
if opt in ('-d'):
datdir = arg
if datdir == None:
raise Exception("Must specify -d parameter")
dat = {}
for subdir in os.listdir(datdir):
each_dir = os.path.join(datdir, subdir)
if not os.path.isfile(each_dir):
dat[subdir] = process_dir(each_dir)
for i in range(len(parser_idx_labels)):
generate_graph(dat, i, datdir + "/" + parser_idx_labels[i])
if __name__ == "__main__":
main()

View File

@ -12,7 +12,7 @@ import math
import concurrent.futures as CF
import libpar as par
num_bins = 250
num_bins = 100
extra_pct = []
def saveplot(fp : str, data : [], title : str):
@ -20,6 +20,7 @@ def saveplot(fp : str, data : [], title : str):
plt.xlabel("Delay")
plt.title(title)
plt.ylabel("Frequency")
plt.title(os.path.basename(fp))
f = plt.gcf()
f.set_size_inches(11.69, 8.27)
f.savefig(fp + "_" + title + "_" + ".png", dpi=160)
@ -28,15 +29,6 @@ def saveplot(fp : str, data : [], title : str):
executor = CF.ProcessPoolExecutor(max_workers=int(os.cpu_count()))
def clean_data(dat: []):
ret = []
arr = np.array(dat)
cutoff = np.percentile(arr, 99)
for i in arr:
if i <= cutoff:
ret.append(i)
return ret
def process_file(each_dir):
try:
print("Processing " + each_dir + " ...")
@ -53,28 +45,12 @@ def process_file(each_dir):
ss.append(pt.s_stx - pt.s_srx)
ch.append(pt.c_hrx - pt.c_htx)
cs.append(pt.c_srx - pt.c_stx)
sh = clean_data(sh)
ss = clean_data(ss)
ch = clean_data(ch)
cs = clean_data(cs)
saveplot(each_dir, sh, "server_hw_delay")
saveplot(each_dir, ss, "server_sw_delay")
saveplot(each_dir, ch, "client_hw_delay")
saveplot(each_dir, cs, "client_sw_delay")
# output median, etc.
with open(each_dir + "_" + "stats.txt", 'w') as f:
f.write("===================== SERVER HW ====================\n")
f.write(par.mutilate_data.build_mut_output(sh, [len(sh)]))
f.write("\n===================== SERVER SW ====================\n")
f.write(par.mutilate_data.build_mut_output(ss, [len(ss)]))
f.write("\n===================== CLIENT HW ====================\n")
f.write(par.mutilate_data.build_mut_output(ch, [len(ch)]))
f.write("\n===================== CLIENT SW ====================\n")
f.write(par.mutilate_data.build_mut_output(cs, [len(cs)]))
except Exception:
print("Unexpected error:", sys.exc_info())
@ -82,7 +58,8 @@ def process_dir(rootdir):
for subdir in os.listdir(rootdir):
each_dir = os.path.join(rootdir, subdir)
if os.path.isfile(each_dir):
if each_dir.endswith(".txt") or each_dir.endswith(".sample"):
if each_dir.endswith("sample.txt") or each_dir.endswith(".sample"):
#executor.submit(process_file, each_dir)
process_file(each_dir)
else:
process_dir(each_dir)
@ -96,7 +73,8 @@ def main():
datdir = arg
if datdir == None:
raise Exception("Must specify -d parameter")
datdir = "/home/oscar/projs/kqsched/scripts/pingpong/results.d/sample"
#raise Exception("Must specify -d parameter")
process_dir(datdir)
executor.shutdown()

View File

@ -1,25 +0,0 @@
class NetSpec:
def __init__(self, fqdn, ip, mac) -> None:
self.mac = mac
self.ip = ip
self.fqdn = fqdn
self.netspec = ip + "@" + mac
class LabNetSpecs:
def __init__(self) -> None:
self.SKYLAKE1_10G = NetSpec(fqdn = "skylake1.rcs.uwaterloo.ca",ip = "192.168.123.11", mac = "3c:15:fb:62:9b:28")
self.SKYLAKE2_10G = NetSpec(fqdn = "skylake2.rcs.uwaterloo.ca",ip = "192.168.123.12", mac = "3c:15:fb:c9:f3:36")
self.SKYLAKE3_10G = NetSpec(fqdn = "skylake3.rcs.uwaterloo.ca",ip = "192.168.123.13", mac = "3c:15:fb:c9:f3:4b")
self.SKYLAKE4_10G = NetSpec(fqdn = "skylake4.rcs.uwaterloo.ca",ip = "192.168.123.14", mac = "")
self.SKYLAKE5_10G = NetSpec(fqdn = "skylake5.rcs.uwaterloo.ca",ip = "192.168.123.15", mac = "3c:15:fb:c9:f3:28")
self.SKYLAKE6_10G = NetSpec(fqdn = "skylake6.rcs.uwaterloo.ca",ip = "192.168.123.16", mac = "3c:15:fb:62:9b:2f")
self.SKYLAKE7_10G = NetSpec(fqdn = "skylake7.rcs.uwaterloo.ca",ip = "192.168.123.17", mac = "3c:15:fb:c9:f3:44")
self.SKYLAKE8_10G = NetSpec(fqdn = "skylake8.rcs.uwaterloo.ca",ip = "192.168.123.18", mac = "3c:15:fb:62:9c:be")
self.MILAN1_100G = NetSpec(fqdn = "milan1-int.rcs.uwaterloo.ca",ip = "192.168.123.19", mac = "")
self.MILAN1_10G = NetSpec(fqdn = "milan1-int.rcs.uwaterloo.ca",ip = "192.168.123.19", mac = "a0:42:3f:4d:cb:bc")
self.ICELAKE2_100G = NetSpec(fqdn = "icelake2-int.rcs.uwaterloo.ca",ip = "192.168.123.20", mac = "")
self.ICELAKE2_10G = NetSpec(fqdn = "icelake2-int.rcs.uwaterloo.ca",ip = "192.168.123.20", mac = "")
LAB = LabNetSpecs()

View File

@ -1,56 +1,6 @@
import json
import numpy as np
class iperf_json_parser:
def __init__(self, inputs):
self.aggregate_egress_bps = 0
self.jsonobjs = []
for input in inputs:
jsobj = json.loads(input)
self.jsonobjs.append(jsobj)
each_bps = jsobj['end']['sum_sent']['bits_per_second']
self.aggregate_egress_bps += each_bps
class memloadgen_parser:
def __init__(self, input, min, max):
lines = input.split('\n')
if max > len(lines):
max = len(lines)
if len(lines) <= min:
raise Exception("Not enough lines!")
if min > max:
min = max
arr = []
for i in range(min, max):
arr.append(int(lines[i]))
self.bps = np.mean(arr)
class pmc_parser:
def __init__(self, input):
self.raw = input
lines = input.split('\n')
if len(lines) < 2:
raise Exception("Invalid pmc file format")
spec = lines[0].strip()
if (spec[0] != '#'):
raise Exception("Invalid pmc file spec line: \"" + lines[0] + "\"")
spec = spec.split(' ')
self.cores = len(spec) - 1
elements = spec[1].split('/')
if (len(elements) != 3):
raise Exception("Invalid pmc file spec line: \"" + lines[0] + "\"")
self.counter = elements[2].strip()
last_line = lines[-1]
elements = last_line.split(' ')
total = 0
for e in elements:
if (len(e) > 0):
total += int(e)
self.count = total
class khat_parser:
class pt:
def __init__(self):
@ -62,42 +12,13 @@ class khat_parser:
self.c_hrx = 0
self.c_stx = 0
self.c_srx = 0
self.master_total = 0
self.master_loss = 0
self.slave_total = 0
self.slave_loss = 0
self.qps = 0
def __init__(self):
self.datapt = []
self.srv_hwlat = []
self.srv_swlat = []
self.clt_hwlat = []
self.clt_swlat = []
self.lat_idx_arr = []
self.lat_idx_arr.append(self.srv_hwlat)
self.lat_idx_arr.append(self.srv_swlat)
self.lat_idx_arr.append(self.clt_hwlat)
self.lat_idx_arr.append(self.clt_swlat)
def get_stat_arr(self, idx : int):
return self.lat_idx_arr[idx]
def parse(self, output : str):
first = True
for line in output.splitlines():
# the first line is qps
cells = line.split(',')
if (first):
if len(cells) != 5:
raise Exception("Invalid headline:" + line)
self.qps = int(cells[0])
self.master_recv = int(cells[1])
self.master_loss = int(cells[2])
self.slave_recv = int(cells[3])
self.slave_loss = int(cells[4])
first = False
continue
if len(cells) != 8:
raise Exception("Invalid line:" + line)
pt = self.pt()
@ -110,10 +31,6 @@ class khat_parser:
pt.s_hrx = int(cells[6])
pt.s_htx = int(cells[7])
self.datapt.append(pt)
self.srv_hwlat.append(pt.s_htx - pt.s_hrx)
self.srv_swlat.append(pt.s_stx - pt.s_srx)
self.clt_hwlat.append(pt.c_hrx - pt.c_htx)
self.clt_swlat.append(pt.c_srx - pt.c_stx)
class mutilate_data:

View File

@ -23,7 +23,7 @@ tc_test_id = 0
def init(odir = "./results.d/"):
global tc_output_dir
tc_output_dir = odir
tc_output_dir = odir + "_" + datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
tc_output_dir = os.path.expanduser(tc_output_dir)
os.system("mkdir -p " + tc_output_dir)
global tc_logfile
@ -40,7 +40,7 @@ def begin(name):
def end():
global tc_cur_test
log_print("\n===== Test #" + str(tc_test_id) + " - " + tc_cur_test + " completed =====")
tc_cur_test = ""
tc_cur_test = None
def get_odir():
return tc_output_dir + "/" + tc_cur_test
@ -65,20 +65,12 @@ def set_ssh_param(para):
global ssh_param
ssh_param = para
def get_ssh_param():
global ssh_param
return ssh_param
ssh_user = None
def set_ssh_user(user):
global ssh_user
ssh_user = user
def get_ssh_user():
global ssh_user
return ssh_user
def remote_exec(srv : list[str], cmd : str, blocking=True, check=True) -> sp.Popen:
def remote_exec(srv, cmd, blocking=True, check=True):
sub = []
for s in srv:
p = sp.Popen(["ssh " + ssh_param + " " + ((ssh_user + "@") if ssh_user != None else "") + s + " \"" + cmd +"\""], shell=True, stdout=sp.PIPE, stderr=sp.PIPE)
@ -93,27 +85,28 @@ def remote_exec(srv : list[str], cmd : str, blocking=True, check=True) -> sp.Pop
return sub
def check_stderr(p, sel, exclude = []):# -> tuple[bool, list[str]]:
max_stderr_rd = 10
err = []
while sel.poll(1) and max_stderr_rd > 0:
err.append(p.stderr.readline().decode().strip())
max_stderr_rd = max_stderr_rd - 1
def scan_stderr(p, exclude = None):
for err in p.stderr:
fail = True
err = err.decode()
err = err.strip()
# print(err)
good = True
for e in err:
e = e.strip()
if len(e) == 0:
if len(err) == 0:
continue
good = False
for exc in exclude:
if exc in e:
good = True
break
if exclude != None:
for exc in exclude:
if (exc != None) and (re.match(exc, err) != None):
fail = False
break
if fail:
log_print("Error detected: " + err)
return False
return good, err
return True
# stderr threads
errthr_objs = []
@ -123,22 +116,15 @@ errthr_failed = False
def errthr_get_failed():
return errthr_failed
def thr_check_stderr(p : sp.Popen, name: str, exclude):
def thr_check_stderr(p : sp.Popen, exclude):
# print("thread start!")
global errthr_failed
sel = select.poll()
sel.register(p.stderr, select.POLLIN)
local_failed = False
while(not errthr_sigstop):
if (not local_failed):
status, err = check_stderr(p, sel, exclude=exclude)
if not status:
errthr_failed = True
local_failed = True
log_print("Error detected in \"" + name + "\":")
for e in err:
log_print(" \"" + e + "\"")
log_print("")
time.sleep(random.uniform(0.001, 0.1))
if not scan_stderr(p, exclude=exclude):
errthr_failed = True
# print("running!")
time.sleep(0.5 + random.uniform(-0.1, 0.1))
# print("thread exit!")
def errthr_start():
global errthr_sigstop
@ -146,18 +132,18 @@ def errthr_start():
errthr_sigstop = False
errthr_failed = False
for thr in errthr_objs:
thr.daemon = True
thr.start()
def errthr_create(cp, name, exclude = None):
def errthr_create(cp, exclude = None):
global errthr_objs
for i in range(len(cp)):
errthr_objs.append(Thread(target = thr_check_stderr, args=(cp[i], name[i], exclude)))
for p in cp:
errthr_objs.append(Thread(target = thr_check_stderr, args=(p, exclude)))
def errthr_stop():
global errthr_objs
global errthr_sigstop
errthr_sigstop = True
# print("waiting!")
for thr in errthr_objs:
thr.join()
errthr_objs.clear()

View File

@ -1,340 +0,0 @@
import time
import subprocess as sp
import os
import libpar as par
import libtc as tc
import libmechspec as mechspec
class NetExpResult:
def __init__(self):
self.parser = None
self.pmc_parser = None
self.sample = None
class NetExpConf:
def __init__(self):
self.root_dir = ""
self.enable_client_only = False
self.enable_memgen = False
self.memgen_affinity = ""
self.memgen_iteration = -1
self.memgen_size = 512 * 1024 * 1024
self.memgen_tgtdom = 1
self.srv_affinity = ""
self.srv_mechspec = None
self.srv_port = 0
self.clt_qps = 0
self.clt_mechspecs = []
self.clt_affinity = "1"
self.clt_wrkld = 0
self.clt_wrkarg0 = "fixed:0"
self.clt_wrkarg1 = "fixed:0"
self.clt_pkt_loss_lat = 1000
self.clt_rage_quit_lat = 1000
self.clt_port = 0
self.clt_pkt_pad = 0
self.clt_pkt_depth = 1
self.clt_ia = "exponential"
self.mst_mechspec = None
self.mst_affinity = "2"
self.mst_qps = 100
self.mst_port = 0
self.mst_pkt_loss_lat = 1000
self.mst_pkt_loss_max = 1000
self.mst_duration = 10
self.mst_warmup = 5
self.mst_ia = "exponential"
self.enable_pmc = False
self.pmc_counters = []
self.pmc_mode = 0 # 0 = sampling
self.pmc_sampling_rate = 8192
self.pmc_counting_interval = 0.1
def __build_fqdn_arr(self, ns):
ret = []
for n in ns:
if n != None:
ret.append(n.fqdn)
return ret
def get_pmc_str(self):
ret = ""
for counter in self.pmc_counters:
ret = ret + counter + ","
return ret[:-1]
def calc_client_qps(self):
return 0 if self.clt_qps == 0 else (int)((self.clt_qps - self.mst_qps) / len(self.clt_mechspecs))
def finalize_mechspecs(self):
self.clt_fqdns = self.__build_fqdn_arr(self.clt_mechspecs)
self.srv_fqdns = self.__build_fqdn_arr([self.srv_mechspec])
self.mst_fqdns = self.__build_fqdn_arr([self.mst_mechspec])
__SAMPLE_FN = "sample.txt.tmp"
__PMC_FN = "pmc.txt.tmp"
def __keep_result(conf : NetExpConf):
result = NetExpResult()
target_scp_fn = tc.get_odir() + "/" + __SAMPLE_FN
scpcmd = "scp -P77 " + tc.get_ssh_user() + "@" + conf.mst_mechspec.fqdn + ":" + conf.root_dir + "/" + __SAMPLE_FN + " " + target_scp_fn
tc.log_print(scpcmd)
sp.check_call(scpcmd, shell=True)
result.parser = par.khat_parser()
with open(target_scp_fn, "r") as f:
result.sample = f.read()
result.parser.parse(result.sample)
rmcmd = "rm " + target_scp_fn
tc.log_print(rmcmd)
sp.check_call(rmcmd, shell=True)
if conf.enable_pmc:
target_pmc_fn = tc.get_odir() + "/" + __PMC_FN
pmcscpcmd = "scp -P77 " + tc.get_ssh_user() + "@" + conf.srv_mechspec.fqdn + ":" + conf.root_dir + "/" + __PMC_FN + " " + target_pmc_fn
tc.log_print(pmcscpcmd)
sp.check_call(pmcscpcmd, shell=True)
if conf.pmc_mode == 0:
pmcproccmd = "sudo pmcstat -R " + conf.root_dir + "/" + __PMC_FN + " -m " + conf.root_dir + "/" + __PMC_FN + ".proc"
tc.log_print(pmcproccmd)
tc.remote_exec(conf.srv_fqdns, pmcproccmd)
pmcscpcmd = "scp -P77 " + tc.get_ssh_user() + "@" + conf.srv_mechspec.fqdn + ":" + conf.root_dir + "/" + __PMC_FN + ".proc" + " " + target_pmc_fn + ".proc"
tc.log_print(pmcscpcmd)
sp.check_call(pmcscpcmd, shell=True)
if conf.pmc_mode != 0:
with open(target_pmc_fn, "r") as f:
result.pmc_parser = par.pmc_parser(f.read())
else:
with open(target_pmc_fn, "rb") as f:
with open(target_pmc_fn + ".proc", "r") as g:
result.pmc_parser = [f.read(), g.read()]
rmcmd = "rm " + target_pmc_fn + ".proc"
tc.log_print(rmcmd)
sp.check_call(rmcmd, shell=True)
rmcmd = "rm " + target_pmc_fn
tc.log_print(rmcmd)
sp.check_call(rmcmd, shell=True)
return result
def stop_all(conf : NetExpConf):
# stop clients
tc.log_print("Stopping clients...")
tc.remote_exec(conf.clt_fqdns, "sudo killall -9 rat; sudo killall -9 cat; sudo killall -9 khat; sudo killall -9 memloadgen", check=False)
# stop master
tc.log_print("Stopping master...")
tc.remote_exec(conf.mst_fqdns, "sudo killall -9 rat; sudo killall -9 cat; sudo killall -9 khat; sudo killall -9 memloadgen", check=False)
if not conf.enable_client_only:
# stop server
tc.log_print("Stopping server...")
tc.remote_exec(conf.srv_fqdns, "sudo killall -9 rat; sudo killall -9 cat; sudo killall -9 khat; sudo killall -9 memloadgen", check=False)
if conf.enable_pmc:
tc.log_print("Stopping server PMC...")
tc.remote_exec(conf.srv_fqdns, "sudo killall -9 pmcstat", check=False)
def __run_setup_cmd(conf : NetExpConf, cmd : str, desc : str):
all = []
all.extend(conf.srv_fqdns)
all.extend(conf.clt_fqdns)
all.extend(conf.mst_fqdns)
ssrv : list[tuple[str, sp.Popen]] = []
for s in all:
tc.log_print(f"Running \'{desc}\' on {s}...")
ssrv.append((s, tc.remote_exec([s], cmd, blocking=False, check=False)[0]))
for p in ssrv:
_ , stderr = p[1].communicate()
if p[1].returncode != 0:
print(f"{ p[0] } \'{desc}\' failed. stderr:\n{stderr.decode()}\n")
else:
print(f"{ p[0] } \'{desc}\' succeeded")
def setup(conf : NetExpConf, bench : False, dpdk : False):
libtopo_path = "/libtopo"
dpdk_path = "/dpdk"
bench_path = "/numam.d"
if dpdk:
setup_cmd = f'''sudo rm -rf {libtopo_path}; sudo rm -rf /usr/local/include/libtopo;
sudo rm -rf /usr/local/lib/libtopo;
sudo mkdir -p {libtopo_path};
sudo chmod 777 {libtopo_path};
cd {libtopo_path};
git clone https://git.quacker.org/d/libtopo;
cd libtopo;
mkdir build;
cd build;
cmake ../;
sudo make install'''
__run_setup_cmd(conf, setup_cmd, "dpdk - libtopo")
setup_cmd = f'''sudo pkg install -y meson pkgconf py39-pyelftools;
sudo rm -rf {dpdk_path}
sudo mkdir -p {dpdk_path};
sudo chmod 777 {dpdk_path};
cd {dpdk_path};
git clone https://git.quacker.org/d/numam-dpdk;
cd numam-dpdk;
git checkout migration;
CC=gcc CXX=g++ meson -Denable_kmods=true build;
cd build;
sudo ninja install'''
__run_setup_cmd(conf, setup_cmd, "dpdk - dpdk")
if bench:
setup_cmd = f'''sudo rm -rf {bench_path};
sudo mkdir -p {bench_path};
sudo chmod 777 {bench_path}'''
__run_setup_cmd(conf, setup_cmd, "bench - remove")
all = []
all.extend(conf.srv_fqdns)
all.extend(conf.clt_fqdns)
all.extend(conf.mst_fqdns)
dir = f"{os.path.dirname(__file__)}/../"
for clt in all:
print("Syncing files to " + clt + "...")
rsync_cmd = f"rsync -az --no-perms --rsync-path=\"sudo rsync\" --omit-dir-times -e \"ssh -p77\" {dir} {tc.get_ssh_user()}@{clt}:{bench_path}/"
sp.check_call(rsync_cmd, shell=True)
setup_cmd = f'''cd {bench_path};
sudo rm -rf build;
mkdir build;
cd build;
cmake ../;
make -j8 khat cat rat memloadgen'''
__run_setup_cmd(conf, setup_cmd, "bench - compile")
def run(conf : NetExpConf):
stop_all(conf)
while True:
server_cmd = "sudo "
if conf.enable_pmc:
if conf.pmc_mode != 0:
pmc_cmd = "sudo pmcstat -C -w " + str(conf.pmc_counting_interval) + " -s " + conf.get_pmc_str() + " -o " + conf.root_dir + "/" + __PMC_FN
else:
pmc_cmd = "sudo pmcstat -n " + str(conf.pmc_sampling_rate) + " -S " + conf.get_pmc_str() + " -O " + conf.root_dir + "/" + __PMC_FN
tc.log_print("Starting server PMC...")
tc.log_print(pmc_cmd)
spmc = tc.remote_exec(conf.srv_fqdns, pmc_cmd, blocking=False)
server_cmd += conf.root_dir + "/khat --log-level lib.eal:err -- -A " + conf.srv_affinity + \
" -H " + conf.srv_mechspec.netspec + " -p " + str(conf.srv_port)
if int(conf.clt_pkt_pad) > 1518:
server_cmd += " -J "
if conf.enable_client_only:
ssrv = None
tc.log_print(server_cmd)
else:
# start server
tc.log_print("Starting server...")
tc.log_print(server_cmd)
ssrv = tc.remote_exec(conf.srv_fqdns, server_cmd, blocking=False)
if conf.enable_memgen:
memgen_cmd = "sudo " + conf.root_dir + "/memloadgen -b " + str(conf.memgen_size) + " -s " + conf.memgen_affinity + \
" -i " + str(conf.memgen_iteration) + " -d " + str(conf.memgen_tgtdom)
tc.log_print("Starting memloadgen...")
tc.log_print(memgen_cmd)
smem = tc.remote_exec(conf.srv_fqdns, memgen_cmd, blocking=False)
# start clients
tc.log_print("Starting clients...")
sclt = []
sclt_name = []
for i in range(len(conf.clt_fqdns)):
client_cmd = "sudo " + conf.root_dir + "/rat --log-level lib.eal:err -- -S -A " + conf.clt_affinity + \
" -i " + conf.clt_ia + \
" -q " + str(conf.calc_client_qps()) + \
" -H " + conf.clt_mechspecs[i].netspec + \
" -s " + conf.srv_mechspec.netspec + \
" -r " + str(conf.clt_rage_quit_lat) + \
" -l " + str(conf.clt_pkt_loss_lat) + \
" -w " + str(conf.clt_wrkld) + \
" -w " + str(conf.clt_wrkarg0) + \
" -w " + str(conf.clt_wrkarg1) + \
" -P " + str(conf.clt_pkt_pad) + \
" -D " + str(conf.clt_pkt_depth) + \
" -p " + str(conf.clt_port)
if int(conf.clt_pkt_pad) > 1518:
client_cmd += " -J "
tc.log_print(client_cmd)
sclt.append(tc.remote_exec([conf.clt_fqdns[i]], client_cmd, blocking=False)[0])
sclt_name.append(conf.clt_fqdns[i])
time.sleep(5)
# start master
tc.remote_exec
tc.log_print("Starting master...")
master_cmd = "sudo " + conf.root_dir + "/cat --log-level lib.eal:err -- " + \
" -s " + conf.srv_mechspec.netspec + \
" -o " + conf.root_dir + "/" + __SAMPLE_FN + \
" -t " + str(conf.mst_duration) + \
" -T " + str(conf.mst_warmup) + \
" -i " + conf.mst_ia + \
" -q " + str(conf.mst_qps) + \
" -l " + str(conf.mst_pkt_loss_lat) + \
" -L " + str(conf.mst_pkt_loss_max) + \
" -A " + conf.mst_affinity + \
" -H " + conf.mst_mechspec.netspec + \
" -p " + str(conf.mst_port)
for clt in conf.clt_mechspecs:
master_cmd += " -S " + clt.netspec
tc.log_print(master_cmd)
sp = tc.remote_exec(conf.mst_fqdns, master_cmd, blocking=False)
p = sp[0]
# launch stderr monitoring thread
exclude = ["Pseudo-terminal", "ice_", "i40e_"]
tc.errthr_create([p], conf.mst_fqdns, exclude)
if not conf.enable_client_only:
tc.errthr_create(ssrv, conf.srv_fqdns, exclude)
tc.errthr_create(sclt, sclt_name, exclude)
if conf.enable_memgen:
tc.errthr_create(smem, ["memloadgen"], exclude)
if conf.enable_pmc:
tc.errthr_create(spmc, ["pmcstat"], exclude)
tc.errthr_start()
success = False
cur = 0
# selec = select.poll()
# selec.register(p.stdout, select.POLLIN)
while True:
# either failed or timeout
# we use failure detection to save time for long durations
if tc.errthr_get_failed() or cur >= (conf.mst_warmup + conf.mst_duration) * 3:
break
# while selec.poll(1):
# print(p.stdout.readline())
if p.poll() != None:
success = True
break
time.sleep(1)
cur = cur + 1
stop_all(conf)
tc.errthr_stop()
tc.log_print("Cooling down...")
time.sleep(5)
if success:
return __keep_result(conf)

225
scripts/run.py Executable file
View File

@ -0,0 +1,225 @@
import subprocess as sp
import time
import select
import os
import datetime
import pwd
import sys
import getopt
import numpy as np
import re
import libpar as par
import libtc as tc
step_inc_pct = 100
init_step = 20000 #
start_step = 10000
term_qps = 85000000000
term_pct = 1
inc_pct = 50
server_port = 23444
# paths
test_dir = "/numam.d/build"
file_dir = os.path.dirname(os.path.realpath(__file__))
root_dir = os.path.join(file_dir,"..")
sample_filename = "sample.txt"
affinity = [
"0x4", # core 2
"0x1000" # core 12
]
master = ["skylake3.rcs.uwaterloo.ca"]
master_mac = ["3c:15:fb:c9:f3:4b"]
server = ["skylake2.rcs.uwaterloo.ca"]
server_mac = ["3c:15:fb:c9:f3:36"]
clients = []
client_mac = []
rage_quit = 1000 #1s
warmup = 5
duration = 10
cooldown = 0
cacheline = 0
SSH_PARAM = "-o StrictHostKeyChecking=no -p77"
SSH_USER = "oscar"
hostfile = None
lockstat = False
client_only = False
def stop_all():
# stop clients
tc.log_print("Stopping clients...")
tc.remote_exec(clients, "sudo killall -9 rat", check=False)
if not client_only:
# stop server
tc.log_print("Stopping server...")
tc.remote_exec(server, "sudo killall -9 khat", check=False)
# stop master
tc.log_print("Stopping master...")
tc.remote_exec(master, "sudo killall -9 cat", check=False)
def get_client_str(clt):
ret = " "
for client in clt:
ret += " -a " + client + " "
return ret
def run_exp(sc, ld):
while True:
if client_only:
ssrv = None
else:
# start server
tc.log_print("Starting server...")
server_cmd = "sudo " + test_dir + "/khat -- -A " + sc
tc.log_print(server_cmd)
ssrv = tc.remote_exec(server, server_cmd, blocking=False)
# start clients
# tc.log_print("Starting clients...")
# client_cmd = tc.get_cpuset_core(client_threads) + " " + test_dir + "/pingpong/build/dismember -A"
# tc.log_print(client_cmd)
# sclt = tc.remote_exec(ssh_clients, client_cmd, blocking=False)
time.sleep(3)
# start master
tc.log_print("Starting master...")
master_cmd = "sudo " + test_dir + "/cat -- " + \
" -s " + server_mac[0] + \
" -o " + test_dir + "/" + sample_filename + \
" -t " + str(duration) + \
" -T " + str(warmup) + \
" -i fixed:0.01" + \
" -r " + str(rage_quit) + \
" -A 0x4"
tc.log_print(master_cmd)
sp = tc.remote_exec(master, master_cmd, blocking=False)
p = sp[0]
# launch stderr monitoring thread
tc.errthr_create(sp, exclude=[".*EAL.*"])
tc.errthr_create(ssrv, exclude=[".*EAL.*"])
tc.errthr_start()
success = False
cur = 0
while True:
# either failed or timeout
# we use failure detection to save time for long durations
if tc.errthr_get_failed() or cur >= int(warmup + duration) + 5 :
break
if p.poll() != None:
success = True
break
time.sleep(1)
cur = cur + 1
stop_all()
tc.errthr_stop()
print("Cooling down...")
time.sleep(cooldown)
if success:
return
def keep_results():
scpcmd = "scp -P77 oscar@" + master[0] + ":" + test_dir + "/" + sample_filename + " " + tc.get_odir() + "/sample.txt"
tc.log_print(scpcmd)
sp.check_call(scpcmd, shell=True)
with open(tc.get_odir() + "/sample.txt", 'r') as f:
tc.log_print("Total requests: " + str(len(f.readlines())))
return
def main():
global hostfile
global server
global master
global clients
global client_only
tc.set_ssh_param(SSH_PARAM)
tc.set_ssh_user(SSH_USER)
options = getopt.getopt(sys.argv[1:], 'h:sldcp')[0]
for opt, arg in options:
if opt in ('-h'):
hostfile = arg
elif opt in ('-s'):
stop_all()
return
elif opt in ('-c'):
client_only=True
tc.init("~/results.d/numam/")
tc.log_print("Configuration:\n" + \
"Hostfile: " + ("None" if hostfile == None else hostfile) + "\n" \
"Client only: " + str(client_only) + "\n")
if hostfile != None:
hosts = tc.parse_hostfile(hostfile)
server = tc.process_hostnames(server, hosts)
clients = tc.process_hostnames(clients, hosts)
master = tc.process_hostnames(master, hosts)
stop_all()
for i in range(0, len(affinity)):
eaff = affinity[i]
# step_mul = 100
# last_load = 0
# cur_load = start_step
tc.begin(eaff)
tc.log_print("============ Affinity: " + str(eaff) + " Load: MAX" + " ============")
run_exp(eaff, 0)
keep_results()
stop_all()
# while True:
# tc.log_print("============ Sched: " + str(ename) + " Flag: " + format(esched, '#04x') + " Load: " + str(cur_load) + " ============")
# output, sout, serr = run_exp(esched, cur_load, lockstat)
# qps = keep_results(output, sout, serr)
# pct = int((qps - last_load) / init_step * 100)
# tc.log_print("last_load: " + str(last_load) + " this_load: " + str(qps) + " inc_pct: " + str(pct) + "%")
# if cur_load > term_qps:
# tc.log_print("qps more than " + str(term_qps) + "%. Done.")
# break
# if pct <= term_pct:
# tc.log_print("inc_pct less than TERM_PCT " + str(term_pct) + "%. Done.")
# break
# if pct <= inc_pct:
# step_mul += step_inc_pct
# tc.log_print("inc_pct less than INC_PCT " + str(inc_pct) + "%. Increasing step multiplier to " + str(step_mul) + "%")
# last_load = qps
# cur_load += int(init_step * step_mul / 100)
# tc.log_print("")
tc.end()
stop_all()
main()

View File

@ -1,112 +0,0 @@
#!/usr/bin/env python3.6
import numpy as np
import sys
import re
import os
import json
import getopt
import math
import concurrent.futures as CF
columns = [
("Req per second", "rps", ".2f"),
("Bytes per second", "bps", ".2f"),
("Average Latency", "lat_avg", ".2f"),
("50th Latency", "lat_50", ".0f"),
("95th Latency", "lat_95", ".0f"),
("99th Latency", "lat_99", ".0f"),
("Latency stddev", "lat_std", ".2f")
]
TIME = 30
REQ_SZ = 4096
class DatObj:
def __init__(self, raw : list, time : int, req_sz : int):
self.raw = raw
self.rps = len(raw) / time
self.bps = self.rps * req_sz
self.lat_avg = np.average(self.raw)
self.lat_99 = np.percentile(self.raw, 99)
self.lat_95 = np.percentile(self.raw, 95)
self.lat_50 = np.percentile(self.raw, 50)
self.lat_std = np.std(self.raw)
def parse_file(lines : list, time : int, req_sz : int) -> DatObj :
raw = []
for line in lines:
if len(line) > 0:
raw.append(int(line))
return DatObj(raw, time, req_sz)
def output_col():
ret = "Benchmark"
for name,_,_ in columns:
ret = ret + "," + name + "," + name + " (NUMA)" + "," + "% change"
return ret
def get_attr_or_none(obj, attr):
if (obj != None):
val = getattr(obj, attr)
else:
val = None
return val
def output_objs(name: str, obj : DatObj, obj_numa : DatObj):
ret = name
for _, attr, fmt in columns:
val = get_attr_or_none(obj, attr)
val_numa = get_attr_or_none(obj_numa, attr)
ret = ret + "," + (format(val, fmt) if val != None else "N/A")
ret = ret + "," + (format(val_numa, fmt) if val_numa != None else "N/A")
if val == None or val_numa == None:
ret = ret + "," + "N/A"
else:
ret = ret + "," + format((val_numa - val) / val * 100, ".2f") + "%"
return ret
def process_file(f : str, obj_map):
with open(f, "r") as fp:
lines = fp.readlines()
bench_name = os.path.basename(f)
obj_map[bench_name] = parse_file(lines, TIME, REQ_SZ)
print("Processed file " + f + ". Benchmark name: " + bench_name)
def process_dir(path : str, obj_map):
files = [os.path.abspath(os.path.join(path, x)) for x in os.listdir(path)]
for f in files:
if (".sh" in f):
continue
if (os.path.isfile(f)):
process_file(f, obj_map)
def main():
datdir = None
options = getopt.getopt(sys.argv[1:], 'd:')[0]
for opt, arg in options:
if opt in ('-d'):
datdir = arg
if datdir == None:
raise Exception("Must specify -d parameter")
obj_map = dict()
process_dir(datdir, obj_map)
with open("results.csv", "w") as f:
f.write(output_col())
f.write("\n")
for bench in obj_map:
if bench.endswith("_numa"):
continue
f.write(output_objs(bench, obj_map[bench], obj_map.get(bench+"_numa")))
f.write("\n")
if __name__ == "__main__":
main()

View File

@ -1,19 +0,0 @@
# rand_read
sudo /numam/code/build/birb_posix -m 0x2 -t 35 -w 5 -I fixed -a 0x555555 -b 4096 -q 0 -D /dev/nvd0 -P R,100 -Q 3 -o rand_read
sudo /numam/code/build/birb_posix -m 0x2 -t 35 -w 5 -I fixed -a 0x555555000000 -b 4096 -q 0 -D /dev/nvd0 -P R,100 -Q 3 -o rand_read_numa
# rand_write
sudo /numam/code/build/birb_posix -m 0x2 -t 35 -w 5 -I fixed -a 0x555555 -b 4096 -q 0 -D /dev/nvd0 -P R,0 -Q 3 -o rand_write
sudo /numam/code/build/birb_posix -m 0x2 -t 35 -w 5 -I fixed -a 0x555555000000 -b 4096 -q 0 -D /dev/nvd0 -P R,0 -Q 3 -o rand_write_numa
# mono_read
sudo /numam/code/build/birb_posix -m 0x2 -t 35 -w 5 -I fixed -a 0x555555 -b 4096 -q 0 -D /dev/nvd0 -P M,100 -Q 3 -o mono_read
sudo /numam/code/build/birb_posix -m 0x2 -t 35 -w 5 -I fixed -a 0x555555000000 -b 4096 -q 0 -D /dev/nvd0 -P M,100 -Q 3 -o mono_read_numa
# mono_write
sudo /numam/code/build/birb_posix -m 0x2 -t 35 -w 5 -I fixed -a 0x555555 -b 4096 -q 0 -D /dev/nvd0 -P M,0 -Q 3 -o mono_write
sudo /numam/code/build/birb_posix -m 0x2 -t 35 -w 5 -I fixed -a 0x555555000000 -b 4096 -q 0 -D /dev/nvd0 -P M,0 -Q 3 -o mono_write_numa
# mixed
sudo /numam/code/build/birb_posix -m 0x2 -t 35 -w 5 -I fixed -a 0x555555 -b 4096 -q 0 -D /dev/nvd0 -P R,70 -Q 3 -o mixed_read
sudo /numam/code/build/birb_posix -m 0x2 -t 35 -w 5 -I fixed -a 0x555555000000 -b 4096 -q 0 -D /dev/nvd0 -P R,70 -Q 3 -o mixed_read_numa

View File

@ -1,19 +0,0 @@
# rand_read
sudo /numam/code/build/birb -m 0xAAAAAA -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555 -b 4096 -q 0 -D Nvme0n1 -P R,100 -Q 3 -o rand_read -k bdev
sudo /numam/code/build/birb -m 0xAAAAAA000000 -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555000000 -b 4096 -q 0 -D Nvme0n1 -P R,100 -Q 3 -o rand_read_numa -k bdev
# rand_write
sudo /numam/code/build/birb -m 0xAAAAAA -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555 -b 4096 -q 0 -D Nvme0n1 -P R,0 -Q 3 -o rand_write -k bdev
sudo /numam/code/build/birb -m 0xAAAAAA000000 -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555000000 -b 4096 -q 0 -D Nvme0n1 -P R,0 -Q 3 -o rand_write_numa -k bdev
# mono_read
sudo /numam/code/build/birb -m 0xAAAAAA -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555 -b 4096 -q 0 -D Nvme0n1 -P M,100 -Q 3 -o mono_read -k bdev
sudo /numam/code/build/birb -m 0xAAAAAA000000 -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555000000 -b 4096 -q 0 -D Nvme0n1 -P M,100 -Q 3 -o mono_read_numa -k bdev
# mono_write
sudo /numam/code/build/birb -m 0xAAAAAA -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555 -b 4096 -q 0 -D Nvme0n1 -P M,0 -Q 3 -o mono_write -k bdev
sudo /numam/code/build/birb -m 0xAAAAAA000000 -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555000000 -b 4096 -q 0 -D Nvme0n1 -P M,0 -Q 3 -o mono_write_numa -k bdev
# mixed
sudo /numam/code/build/birb -m 0xAAAAAA -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555 -b 4096 -q 0 -D Nvme0n1 -P R,70 -Q 3 -o mixed_read -k bdev
sudo /numam/code/build/birb -m 0xAAAAAA000000 -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555000000 -b 4096 -q 0 -D Nvme0n1 -P R,70 -Q 3 -o mixed_read_numa -k bdev

View File

@ -1,19 +0,0 @@
# rand_read
sudo /numam/code/build/birb -m 0xAAAAAA -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555 -b 4096 -q 0 -D Nvme0n1 -P R,100 -Q 3 -o rand_read -k bdev
sudo /numam/code/build/birb -m 0xAAAAAA000000 -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555000000 -b 4096 -q 0 -D Nvme0n1 -P R,100 -Q 3 -o rand_read_numa -k bdev
# rand_write
sudo /numam/code/build/birb -m 0xAAAAAA -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555 -b 4096 -q 0 -D Nvme0n1 -P R,0 -Q 3 -o rand_write -k bdev
sudo /numam/code/build/birb -m 0xAAAAAA000000 -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555000000 -b 4096 -q 0 -D Nvme0n1 -P R,0 -Q 3 -o rand_write_numa -k bdev
# mono_read
sudo /numam/code/build/birb -m 0xAAAAAA -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555 -b 4096 -q 0 -D Nvme0n1 -P M,100 -Q 3 -o mono_read -k bdev
sudo /numam/code/build/birb -m 0xAAAAAA000000 -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555000000 -b 4096 -q 0 -D Nvme0n1 -P M,100 -Q 3 -o mono_read_numa -k bdev
# mono_write
sudo /numam/code/build/birb -m 0xAAAAAA -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555 -b 4096 -q 0 -D Nvme0n1 -P M,0 -Q 3 -o mono_write -k bdev
sudo /numam/code/build/birb -m 0xAAAAAA000000 -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555000000 -b 4096 -q 0 -D Nvme0n1 -P M,0 -Q 3 -o mono_write_numa -k bdev
# mixed
sudo /numam/code/build/birb -m 0xAAAAAA -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555 -b 4096 -q 0 -D Nvme0n1 -P R,70 -Q 3 -o mixed_read -k bdev
sudo /numam/code/build/birb -m 0xAAAAAA000000 -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555000000 -b 4096 -q 0 -D Nvme0n1 -P R,70 -Q 3 -o mixed_read_numa -k bdev

View File

@ -1,797 +0,0 @@
#include <sys/endian.h>
#include <sys/errno.h>
#include <sys/types.h>
#include <x86/_stdint.h>
#include <getopt.h>
#include <pthread.h>
#include <pthread_np.h>
#include <threads.h>
#include <unistd.h>
#include <cerrno>
#include <cstddef>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <fstream>
#include <chrono>
#include <list>
#include <set>
#include "rte_lcore.h"
#include "spdk/cpuset.h"
#include "spdk/stdinc.h"
#include "spdk/thread.h"
#include "spdk/env.h"
#include "spdk/event.h"
#include "spdk/log.h"
#include "spdk/string.h"
#include "gen.hh"
#include "ntr.h"
#include "defs.hh"
#include "nm.hh"
#include "storage/io_gen.hh"
#include "storage/drivers/driver.hh"
#include "storage/drivers/bdev.hh"
#include "storage/drivers/nvme.hh"
static inline uint64_t get_cur_ts_nano()
{
return std::chrono::duration_cast<std::chrono::nanoseconds>
(std::chrono::high_resolution_clock::now().time_since_epoch()).count();
}
/*
* We'll use this struct to gather housekeeping hello_context to pass between
* our events and callbacks.
*/
static constexpr unsigned long MAX_SPEC_LEN = 32;
static constexpr unsigned long MAX_DEV_NAME_LEN = 32;
static constexpr unsigned long MAX_OUTPUT_FILE_LEN = 256;
struct options_t {
// args
int verbosity = NTR_LEVEL_DEFAULT;
int num_threads = 1;
unsigned long cpumask = 1;
char pattern_spec[MAX_SPEC_LEN] = "R,100";
char ia_spec[MAX_SPEC_LEN] = "fixed";
unsigned int time = 5;
unsigned int warmup = 2;
unsigned int queue_depth = 1;
char dev_name[MAX_DEV_NAME_LEN] = "Malloc0";
char driver_name[MAX_DEV_NAME_LEN] = "bdev";
unsigned int read_pct = 0;
io_generator_address_mode addr_mode = IOGEN_ADDR_UNIFORM_RANDOM;
char output_file[MAX_OUTPUT_FILE_LEN] = "output.txt";
unsigned long req_size = 4096;
unsigned long rps = 0;
};
struct main_thread_cb_vars {
uint32_t worker_thread_init_cnt;
uint32_t worker_thread_stop_cnt;
};
struct worker_thread_cb_vars {
uint32_t worker_start;
uint32_t worker_stop;
struct thread_context * ctx;
std::list<struct io_request *> * free_ios;
};
static __thread void * cb_vars;
static struct options_t options;
struct io_record {
uint64_t start_ts;
uint64_t end_ts;
};
struct io_request {
uint64_t start_ts;
io_generator_opcode op;
char * user_buf;
char * dma_buf;
};
struct thread_context {
unsigned int tid;
unsigned int coreid;
unsigned int sockid;
pthread_t sys_thread;
struct spdk_thread * main_thread;
birb_driver * driver;
unsigned long start_region_offset;
unsigned long start_region_length;
/* modified by worker threads */
struct spdk_thread * sp_thread;
std::list<io_record *> *io_records;
uint64_t overhead_avg;
uint32_t overhead_cnt;
uint64_t overhead_max;
uint64_t overhead_min;
};
static void dump_options()
{
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: Options:\n"
" dev name: %s\n"
" driver name: %s\n"
" worker threads: 0x%lx\n"
" number of threads: %d\n"
" IO request size: %lu\n"
" IO requests per second: %lu\n"
" IO pattern: %s\n"
" IO queue depth: %d\n"
" IO addressing mode: %d\n"
" read percent: %u\n"
" inter-arrival dist: %s\n"
" run time: %d\n"
" warmup time: %d\n"
" output file: %s\n",
options.dev_name,
options.driver_name,
options.cpumask,
options.num_threads,
options.req_size,
options.rps,
options.pattern_spec,
options.queue_depth,
options.addr_mode,
options.read_pct,
options.ia_spec,
options.time,
options.warmup,
options.output_file
);
}
static void usage()
{
fprintf(stdout,
" -V(VV): verbose mode\n"
" -D: dev name\n"
" -k: driver to use (default bdev)\n"
" -a: worker threads spec (0x3 = spawn 2 threads on core 1 & 2)\n"
" -b: IO request size\n"
" -q: IO requests per second\n"
" -P: IO request pattern\n"
" -Q: IO request queue depth\n"
" -I: inter-arrival time distribution\n"
" -t: total run time\n"
" -w: warm up time\n"
" -o: latency response output file\n");
}
static int parse_arg(int c, char *arg)
{
switch (c) {
case 'V':
ntr_set_level(NTR_DEP_USER1,
ntr_get_level(NTR_DEP_USER1) + 1);
break;
case 'D':
strncpy(options.dev_name, arg, MAX_DEV_NAME_LEN);
break;
case 'k':
strncpy(options.driver_name, arg, MAX_DEV_NAME_LEN);
break;
case 'a':
options.cpumask = strtoull(optarg, nullptr, 16);
options.num_threads = cmask_get_num_cpus(
options.cpumask);
if (options.num_threads == 0) {
fprintf(stderr,
"must run at least one thread\n");
return EINVAL;
}
break;
case 'b':
options.req_size = strtoull(
optarg, nullptr, 10);
break;
case 'q':
options.rps = strtoull(
optarg, nullptr, 10);
break;
case 'Q':
options.queue_depth = strtoull(
optarg, nullptr, 10);
break;
case 'P':
strncpy(options.pattern_spec, optarg, MAX_SPEC_LEN);
break;
case 'I':
strncpy(options.ia_spec, optarg, MAX_SPEC_LEN);
break;
case 't':
options.time = strtoull(
optarg, nullptr, 10);
break;
case 'w':
options.warmup = strtoull(
optarg, nullptr, 10);
break;
case 'o':
strncpy(options.output_file, optarg, MAX_OUTPUT_FILE_LEN);
break;
case 'h':
default:
return EINVAL;
}
return 0;
}
static birb_driver *
birb_create_driver(const char * driver_name, void * context)
{
if (strcmp(driver_name, "bdev") == 0) {
return new birb_bdev_driver(reinterpret_cast<const char *>(context));
} else if (strcmp(driver_name, "nvme") == 0) {
return new birb_nvme_driver(reinterpret_cast<const char *>(context));
} else {
return nullptr;
}
}
static birb_driver_thread_context *
birb_create_thread_context(birb_driver * driver)
{
if (driver->get_type() == birb_driver::BIRB_DRV_BDEV) {
return new birb_bdev_thread_context(dynamic_cast<birb_bdev_driver *>(driver));
} else if (driver->get_type() == birb_driver::BIRB_DRV_NVME) {
return new birb_nvme_thread_context(dynamic_cast<birb_nvme_driver *>(driver));
} else {
return nullptr;
}
}
static void
birb_destroy_driver(birb_driver * drv)
{
delete drv;
}
static void
birb_destroy_thread_context(birb_driver_thread_context * ctx)
{
delete ctx;
}
/*
* Callback function for io completion.
*/
static void
worker_io_complete(bool success, void *cb_arg)
{
auto vars = (struct worker_thread_cb_vars *)cb_vars;
auto req = (struct io_request *)cb_arg;
uint64_t end_ts = get_cur_ts_nano();
if (!success) {
// XXX: print warning for errors for now
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING, "thread %d <worker_io_complete>: io request failed\n", vars->ctx->tid);
} else {
auto rec = new struct io_record;
rec->start_ts = req->start_ts;
rec->end_ts = end_ts;
vars->ctx->io_records->push_back(rec);
if (req->op == IOGEN_READ) {
memcpy(req->user_buf, req->dma_buf, options.req_size);
}
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d <worker_io_complete>: completed io request type %d\n", vars->ctx->tid, req->op);
}
vars->free_ios->push_back(req);
}
static void
cb_notify_main_init(void * arg)
{
auto * ctx = (struct thread_context *)arg;
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "cb_notify_main_init: from thread %d to main.\n", ctx->tid);
auto * vars = (struct main_thread_cb_vars *) cb_vars;
vars->worker_thread_init_cnt++;
}
static void
cb_notify_main_stop(void * arg)
{
auto * ctx = (struct thread_context *)arg;
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "cb_notify_main_stop: from thread %d to main.\n", ctx->tid);
auto * vars = (struct main_thread_cb_vars *) cb_vars;
vars->worker_thread_stop_cnt++;
}
static void
cb_notify_worker_start(void * arg)
{
auto * ctx = (struct thread_context *)arg;
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "cb_notify_worker_start: from main to thread %d.\n", ctx->tid);
auto * vars = (struct worker_thread_cb_vars *) cb_vars;
vars->worker_start = 1;
}
static void
cb_notify_worker_stop(void * arg)
{
auto * ctx = (struct thread_context *)arg;
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "cb_notify_worker_stop: from main to thread %d.\n", ctx->tid);
auto * vars = (struct worker_thread_cb_vars *) cb_vars;
vars->worker_stop = 1;
}
static void
main_thread_cb_vars_init(struct main_thread_cb_vars * vars)
{
vars->worker_thread_init_cnt = 0;
vars->worker_thread_stop_cnt = 0;
}
static void
worker_thread_cb_vars_init(struct worker_thread_cb_vars * vars, struct thread_context * ctx,
std::list<struct io_request *> * free_ios)
{
vars->worker_start = 0;
vars->worker_stop = 0;
vars->ctx = ctx;
vars->free_ios = free_ios;
}
static void *
worker_thread_main(void * arg)
{
int rc = 0;
constexpr static unsigned int SPDK_THREAD_NAME_SZ = 16;
struct worker_thread_cb_vars vars;
auto *ctx = (struct thread_context *)arg;
birb_driver_thread_context * driver_thread_ctx;
std::list<struct io_request *> free_ios;
char spdk_thread_name[SPDK_THREAD_NAME_SZ];
struct spdk_cpuset * cpuset;
Generator * ia_gen = nullptr;
io_generator * io_gen = nullptr;
struct io_generator_ctx io_ctx;
uint64_t next_ts;
uint64_t a_offset;
uint64_t last_loop_ts = 0;
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d: init...\n", ctx->tid);
ctx->overhead_avg = 0;
ctx->overhead_cnt = 0;
ctx->overhead_max = 0;
ctx->overhead_min = UINT64_MAX;
// create spdk thread
cpuset = spdk_cpuset_alloc();
if (cpuset == nullptr) {
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: failed to alloc cpuset\n");
rc = ENOMEM;
goto cleanup;
}
spdk_cpuset_zero(cpuset);
spdk_cpuset_set_cpu(cpuset, ctx->coreid, true);
snprintf(spdk_thread_name, SPDK_THREAD_NAME_SZ, "birb_worker_%u", ctx->tid);
ctx->sp_thread = spdk_thread_create(spdk_thread_name, cpuset);
if (ctx->sp_thread == nullptr) {
rc = ENOMEM;
goto cleanup;
}
spdk_set_thread(ctx->sp_thread);
// create thread context
driver_thread_ctx = birb_create_thread_context(ctx->driver);
if (driver_thread_ctx == nullptr || driver_thread_ctx->get_status() != birb_driver::BIRB_SUCCESS) {
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "thread %d: could not create thread context!\n", ctx->tid);
rc = EINVAL;
goto cleanup;
}
// create io request objects
for (unsigned int i = 0; i < options.queue_depth; i++) {
auto dma_buf = (char *)spdk_dma_zmalloc_socket(options.req_size, ctx->driver->get_align(), NULL, ctx->sockid);
auto user_buf = (char *)nm_malloc(ctx->sockid, options.req_size);
if (dma_buf == nullptr || user_buf == nullptr) {
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "thread %d: could not allocate buffers!\n", ctx->tid);
rc = ENOMEM;
goto cleanup;
}
auto io_req = new struct io_request;
io_req->dma_buf = dma_buf;
io_req->user_buf = user_buf;
free_ios.push_back(io_req);
}
// init thread local states
worker_thread_cb_vars_init(&vars, ctx, &free_ios);
cb_vars = &vars;
ia_gen = createGenerator(options.ia_spec);
if (ia_gen == nullptr) {
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "thread %d: could not allocate ia generator!\n", ctx->tid);
rc = EINVAL;
goto cleanup;
}
ia_gen->set_lambda((double)options.rps / (double)(options.num_threads));
io_gen = new io_generator(options.req_size, ctx->start_region_length, options.read_pct, options.addr_mode);
if (io_gen == nullptr) {
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "thread %d: could not allocate ia generator!\n", ctx->tid);
rc = EINVAL;
goto cleanup;
}
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d: init complete.\n", ctx->tid);
if ((rc = spdk_thread_send_msg(ctx->main_thread, cb_notify_main_init, ctx)) != 0) {
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "thread %d: could not send message %d\n", ctx->tid, rc);
goto cleanup;
}
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d: waiting for start...\n", ctx->tid);
while (vars.worker_start != 1) {
spdk_thread_poll(spdk_get_thread(), 0, 0);
}
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d: started...\n", ctx->tid);
/* random delay 0-100 us */
usleep(nm_get_uptime_ns() % 100);
next_ts = get_cur_ts_nano();
while (true) {
uint64_t cur_loop_ts = get_cur_ts_nano();
if (last_loop_ts > 0) {
uint64_t overhead = cur_loop_ts - last_loop_ts;
if (ctx->overhead_max < overhead) {
ctx->overhead_max = overhead;
}
if (ctx->overhead_min > overhead) {
ctx->overhead_min = overhead;
}
ctx->overhead_avg = ctx->overhead_avg * ctx->overhead_cnt + overhead;
ctx->overhead_cnt++;
ctx->overhead_avg /= ctx->overhead_cnt;
}
last_loop_ts = cur_loop_ts;
spdk_thread_poll(spdk_get_thread(), 0, 0);
driver_thread_ctx->poll();
if (vars.worker_stop != 0) {
if (free_ios.size() >= options.queue_depth) {
break;
}
} else {
if (!free_ios.empty()) {
auto io_req = free_ios.front();
uint64_t cur_ts = get_cur_ts_nano();
if (cur_ts >= next_ts) {
io_gen->issue(&io_ctx, io_req->dma_buf);
a_offset = io_ctx.offset + ctx->start_region_offset;
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d: issuing IO type %d at offset 0x%lx size 0x%lx...\n", ctx->tid, io_ctx.op, a_offset, io_ctx.size);
io_req->start_ts = cur_ts;
io_req->op = io_ctx.op;
if(io_ctx.op == IOGEN_READ) {
rc = driver_thread_ctx->read(a_offset, io_ctx.size, io_req->dma_buf, worker_io_complete, io_req);
} else {
rc = driver_thread_ctx->write(a_offset, io_ctx.size, io_req->dma_buf, worker_io_complete, io_req);
}
if (rc != 0) {
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING, "thread %d: failed to issue io %d, retrying...", ctx->tid, rc);
} else {
free_ios.pop_front();
next_ts = next_ts + ia_gen->generate() * S2NS;
}
}
}
}
}
cleanup:
while (!free_ios.empty()) {
auto req = free_ios.front();
free_ios.pop_front();
spdk_dma_free(req->dma_buf);
nm_free(ctx->sockid, req->user_buf);
}
if (ia_gen != nullptr) {
delete ia_gen;
}
if (io_gen != nullptr) {
delete io_gen;
}
if (cpuset != nullptr) {
spdk_cpuset_free(cpuset);
}
if (driver_thread_ctx != nullptr) {
birb_destroy_thread_context(driver_thread_ctx);
}
if (rc == 0) {
if ((rc = spdk_thread_send_msg(ctx->main_thread, cb_notify_main_stop, ctx)) != 0) {
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "thread %d: could not send message %d\n", ctx->tid, rc);
}
}
spdk_thread_exit(ctx->sp_thread);
while (!spdk_thread_is_exited(ctx->sp_thread)) {
spdk_thread_poll(ctx->sp_thread, 0, 0);
};
if (ctx->sp_thread != nullptr) {
spdk_set_thread(nullptr);
spdk_thread_destroy(ctx->sp_thread);
}
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d: stopped...\n", ctx->tid);
if (rc != 0) {
spdk_app_stop(rc);
}
return nullptr;
}
static void
parse_pattern(char * pattern, unsigned int * read_pct, io_generator_address_mode * addr_mode)
{
char * token = strtok(pattern, ",");
if (strcmp(token, "M") == 0) {
*addr_mode = IOGEN_ADDR_MONOTONIC_INCREASING;
} else {
*addr_mode = IOGEN_ADDR_UNIFORM_RANDOM;
}
token = strtok(nullptr, ",");
*read_pct = strtoull(token, nullptr, 10);
}
static void
birb_main(void * arg1 UNUSED)
{
int rc = 0;
std::list<struct thread_context *> worker_threads;
std::ofstream output_file;
struct main_thread_cb_vars vars;
birb_driver * drv = nullptr;
unsigned long record_cutoff_time = 0;
unsigned long current_s = 0;
unsigned int total_reqs = 0;
unsigned int tid = 0;
unsigned long per_thread_cap = 0;
int cur_core;
/* initialize driver */
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: initializing device driver for device %s\n", options.dev_name);
drv = birb_create_driver(options.driver_name, options.dev_name);
if (drv == nullptr || drv->get_status() != birb_driver::BIRB_SUCCESS) {
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: failed to create device driver.\n");
rc = EINVAL;
goto end;
}
per_thread_cap = drv->get_capacity() / options.num_threads;
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: initialized device with capacity %zu bytes ~= %zu MB\n", drv->get_capacity(), drv->get_capacity() / 1024 / 1024);
/* misc init */
main_thread_cb_vars_init(&vars);
cb_vars = &vars;
parse_pattern(options.pattern_spec, &options.read_pct, &options.addr_mode);
dump_options();
output_file.open(options.output_file, std::ofstream::out);
if (!output_file) {
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: failed to open output file %s\n", options.output_file);
rc = EINVAL;
goto end;
}
cur_core = cmask_get_next_cpu(&options.cpumask);
while(cur_core != NEXT_CPU_NULL) {
auto * ctx = new struct thread_context;
memset(ctx, 0, sizeof(struct thread_context));
if (ctx == NULL) {
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: failed to alloc thread ctx.\n");
spdk_app_stop(ENOMEM);
return;
}
ctx->tid = tid++;
ctx->driver = drv;
ctx->main_thread = spdk_get_thread();
ctx->sockid = rte_lcore_to_socket_id(cur_core);
ctx->coreid = cur_core;
ctx->io_records = new std::list<struct io_record *>();
ctx->start_region_length = per_thread_cap;
ctx->start_region_offset = per_thread_cap * ctx->tid;
// create sys thread
pthread_attr_t attr;
cpuset_t scpuset;
CPU_ZERO(&scpuset);
CPU_SET(cur_core, &scpuset);
pthread_attr_init(&attr);
pthread_attr_setaffinity_np(&attr, sizeof(cpuset_t), &scpuset);
rc = pthread_create(&ctx->sys_thread, &attr, worker_thread_main, ctx);
if (rc != 0) {
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: failed to create sys thread: %d\n", rc);
rc = EINVAL;
goto end;
}
worker_threads.push_back(ctx);
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: created worker thread %d on core %d socket %d offset 0x%lx length %ld\n", ctx->tid, cur_core, ctx->sockid,
ctx->start_region_offset,
ctx->start_region_length);
cur_core = cmask_get_next_cpu(&options.cpumask);
}
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "main: waiting for worker thread init...\n");
while(vars.worker_thread_init_cnt < (uint32_t)options.num_threads) {
spdk_thread_poll(spdk_get_thread(), 0, 0);
}
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "main: starting worker threads...\n");
for (struct thread_context * tctx : worker_threads) {
rc = spdk_thread_send_msg(tctx->sp_thread, cb_notify_worker_start, tctx);
if (rc != 0) {
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: failed to send message %d\n", rc);
goto end;
}
}
/* main event loop */
while(current_s < options.time) {
if (current_s >= options.warmup && record_cutoff_time == 0) {
record_cutoff_time = get_cur_ts_nano();
}
usleep(1 * S2US);
current_s++;
}
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "main: stopping worker threads...\n");
for (struct thread_context * tctx : worker_threads) {
rc = spdk_thread_send_msg(tctx->sp_thread, cb_notify_worker_stop, tctx);
if (rc != 0) {
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: failed to send message %d\n", rc);
goto end;
}
}
while(vars.worker_thread_stop_cnt < (uint32_t)options.num_threads) {
spdk_thread_poll(spdk_get_thread(), 0, 0);
}
// keep stats
for (struct thread_context * tctx : worker_threads) {
uint64_t last_ts = 0;
uint64_t processed = 0;
for (struct io_record * r : *tctx->io_records) {
if (r->start_ts >= record_cutoff_time) {
if (r->end_ts > last_ts) {
last_ts = r->end_ts;
}
processed++;
output_file << r->end_ts - r->start_ts << std::endl;
total_reqs++;
}
}
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: thread %d processed requests: %lu, last request %lu. Overhead - avg %lu min %lu max %lu\n",
tctx->tid, processed, last_ts, tctx->overhead_avg, tctx->overhead_min, tctx->overhead_max);
}
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: total requests: %u, bytes per second: %lu\n",
total_reqs, total_reqs * options.req_size / (options.time - options.warmup));
end:
if (drv != nullptr) {
birb_destroy_driver(drv);
}
output_file.close();
for (struct thread_context * tctx : worker_threads) {
for (struct io_record * r : *tctx->io_records) {
delete r;
}
delete tctx->io_records;
delete tctx;
}
exit(0);
spdk_app_stop(rc);
return;
}
int
main(int argc, char **argv)
{
struct spdk_app_opts opts = {};
int rc = 0;
ntr_init();
ntr_set_level(NTR_DEP_USER1, NTR_LEVEL_INFO);
/* Set default values in opts structure. */
spdk_app_opts_init(&opts, sizeof(opts));
opts.name = "birb";
/*
* Parse built-in SPDK command line parameters as well
* as our custom one(s).
*/
if ((rc = spdk_app_parse_args(argc, argv, &opts, "VD:k:a:b:q:Q:P:I:t:w:o:", NULL, parse_arg,
usage)) != SPDK_APP_PARSE_ARGS_SUCCESS) {
exit(rc);
}
nm_init(options.verbosity);
/*
* spdk_app_start() will initialize the SPDK framework, call hello_start(),
* and then block until spdk_app_stop() is called (or if an initialization
* error occurs, spdk_app_start() will return with rc even without calling
* hello_start().
*/
rc = spdk_app_start(&opts, birb_main, NULL);
if (rc) {
SPDK_ERRLOG("ERROR starting application\n");
}
/* At this point either spdk_app_stop() was called, or spdk_app_start()
* failed because of internal error.
*/
/* Gracefully close out all of the SPDK subsystems. */
spdk_app_fini();
return rc;
}

View File

@ -1,585 +0,0 @@
#include <sys/endian.h>
#include <sys/errno.h>
#include <sys/signal.h>
#include <sys/types.h>
#include <fcntl.h>
#include <getopt.h>
#include <pthread.h>
#include <pthread_np.h>
#include <threads.h>
#include <unistd.h>
#include <aio.h>
#include <getopt.h>
#include <sys/ioctl.h>
#include <sys/disk.h>
#include <cerrno>
#include <cstddef>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <fstream>
#include <chrono>
#include <list>
#include <set>
#include "gen.hh"
#include "ntr.h"
#include "defs.hh"
#include "nm.hh"
#include "storage/io_gen.hh"
static inline uint64_t get_cur_ts_nano()
{
return std::chrono::duration_cast<std::chrono::nanoseconds>
(std::chrono::high_resolution_clock::now().time_since_epoch()).count();
}
/*
* We'll use this struct to gather housekeeping hello_context to pass between
* our events and callbacks.
*/
static constexpr unsigned long MAX_SPEC_LEN = 32;
static constexpr unsigned long MAX_DEV_NAME_LEN = 32;
static constexpr unsigned long MAX_OUTPUT_FILE_LEN = 256;
struct options_t {
// args
int verbosity = NTR_LEVEL_DEFAULT;
int num_threads = 1;
unsigned long cpumask = 1;
char pattern_spec[MAX_SPEC_LEN] = "R,100";
char ia_spec[MAX_SPEC_LEN] = "fixed";
unsigned int time = 5;
unsigned int warmup = 2;
unsigned int queue_depth = 1;
char dev_name[MAX_DEV_NAME_LEN] = "Malloc0";
char driver_name[MAX_DEV_NAME_LEN] = "bdev";
unsigned int read_pct = 0;
io_generator_address_mode addr_mode = IOGEN_ADDR_UNIFORM_RANDOM;
char output_file[MAX_OUTPUT_FILE_LEN] = "output.txt";
unsigned long req_size = 4096;
unsigned long rps = 0;
};
std::atomic<int> worker_thread_init_cnt(0);
std::atomic<int> worker_thread_stop_cnt(0);
std::atomic<int> worker_start(0);
std::atomic<int> worker_stop(0);
static struct options_t options;
struct io_record {
uint64_t start_ts;
uint64_t end_ts;
};
struct io_request {
uint64_t start_ts;
io_generator_opcode op;
char * user_buf;
char * dma_buf;
struct aiocb aio;
};
struct thread_context {
unsigned int tid;
unsigned int coreid;
unsigned int sockid;
pthread_t sys_thread;
int disk_fd;
unsigned long start_region_offset;
unsigned long start_region_length;
/* modified by worker threads */
std::list<io_record *> *io_records;
uint64_t overhead_avg;
uint32_t overhead_cnt;
uint64_t overhead_max;
uint64_t overhead_min;
};
static void dump_options()
{
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: Options:\n"
" dev name: %s\n"
" driver name: %s\n"
" worker threads: 0x%lx\n"
" number of threads: %d\n"
" IO request size: %lu\n"
" IO requests per second: %lu\n"
" IO pattern: %s\n"
" IO queue depth: %d\n"
" IO addressing mode: %d\n"
" read percent: %u\n"
" inter-arrival dist: %s\n"
" run time: %d\n"
" warmup time: %d\n"
" output file: %s\n",
options.dev_name,
options.driver_name,
options.cpumask,
options.num_threads,
options.req_size,
options.rps,
options.pattern_spec,
options.queue_depth,
options.addr_mode,
options.read_pct,
options.ia_spec,
options.time,
options.warmup,
options.output_file
);
}
static void usage()
{
fprintf(stdout,
" -V(VV): verbose mode\n"
" -D: dev name\n"
" -k: driver to use (default bdev)\n"
" -a: worker threads spec (0x3 = spawn 2 threads on core 1 & 2)\n"
" -b: IO request size\n"
" -q: IO requests per second\n"
" -P: IO request pattern\n"
" -Q: IO request queue depth\n"
" -I: inter-arrival time distribution\n"
" -t: total run time\n"
" -w: warm up time\n"
" -o: latency response output file\n");
}
static void *
worker_thread_main(void * arg)
{
int rc = 0;
auto *ctx = (struct thread_context *)arg;
std::list<struct io_request *> free_ios;
std::list<struct io_request *> prog_ios;
Generator * ia_gen = nullptr;
io_generator * io_gen = nullptr;
struct io_generator_ctx io_ctx;
uint64_t next_ts;
uint64_t a_offset;
uint64_t last_loop_ts = 0;
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d: init...\n", ctx->tid);
// create io request objects
for (unsigned int i = 0; i < options.queue_depth; i++) {
auto buf = (char *)nm_malloc(ctx->sockid, options.req_size);
auto user_buf = (char *)nm_malloc(ctx->sockid, options.req_size);
if (buf == nullptr || user_buf == nullptr) {
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "thread %d: could not allocate buffers!\n", ctx->tid);
rc = ENOMEM;
goto cleanup;
}
auto io_req = new struct io_request;
io_req->dma_buf = buf;
io_req->user_buf = user_buf;
io_req->aio.aio_fildes = ctx->disk_fd;
io_req->aio.aio_nbytes = options.req_size;
io_req->aio.aio_buf = buf;
io_req->aio.aio_sigevent.sigev_notify = SIGEV_NONE;
io_req->aio.aio_reqprio = 0;
free_ios.push_back(io_req);
}
// init thread local states
ia_gen = createGenerator(options.ia_spec);
if (ia_gen == nullptr) {
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "thread %d: could not allocate ia generator!\n", ctx->tid);
rc = EINVAL;
goto cleanup;
}
ia_gen->set_lambda((double)options.rps / (double)(options.num_threads));
io_gen = new io_generator(options.req_size, ctx->start_region_length, options.read_pct, options.addr_mode);
if (io_gen == nullptr) {
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "thread %d: could not allocate ia generator!\n", ctx->tid);
rc = EINVAL;
goto cleanup;
}
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d: init complete.\n", ctx->tid);
worker_thread_init_cnt.fetch_add(1);
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d: waiting for start...\n", ctx->tid);
while (worker_start.load() == 0) {}
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d: started...\n", ctx->tid);
/* random delay 0-100 us */
usleep(nm_get_uptime_ns() % 100);
next_ts = get_cur_ts_nano();
while (true) {
uint64_t cur_ts = get_cur_ts_nano();
if (last_loop_ts > 0) {
uint64_t overhead = cur_ts - last_loop_ts;
if (ctx->overhead_max < overhead) {
ctx->overhead_max = overhead;
}
if (ctx->overhead_min > overhead) {
ctx->overhead_min = overhead;
}
ctx->overhead_avg = ctx->overhead_avg * ctx->overhead_cnt + overhead;
ctx->overhead_cnt++;
ctx->overhead_avg /= ctx->overhead_cnt;
}
last_loop_ts = cur_ts;
// process io completion
auto itr = prog_ios.begin();
while (itr != prog_ios.end()) {
int err;
struct io_request * ioreq = *itr;
if ((err = aio_error(&ioreq->aio)) != EINPROGRESS) {
if (err == 0) {
auto rec = new struct io_record;
rec->start_ts = ioreq->start_ts;
rec->end_ts = cur_ts;
ctx->io_records->push_back(rec);
if (ioreq->op == IOGEN_READ) {
memcpy(ioreq->user_buf, ioreq->dma_buf, options.req_size);
}
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d <worker_io_complete>: completed io request type %d\n", ctx->tid, ioreq->op);
} else {
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING, "thread %d: aio failed with %d...\n", ctx->tid, err);
}
if (aio_return(&ioreq->aio) == -1) {
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "thread %d: aio_return failed with %d...\n", ctx->tid, errno);
exit(errno);
}
/* cleanup */
itr = prog_ios.erase(itr);
free_ios.push_back(ioreq);
} else {
++itr;
}
}
if (worker_stop.load() == 1) {
if (free_ios.size() >= options.queue_depth) {
break;
}
} else {
if (!free_ios.empty()) {
auto io_req = free_ios.front();
cur_ts = get_cur_ts_nano();
if (cur_ts >= next_ts) {
io_gen->issue(&io_ctx, io_req->dma_buf);
a_offset = io_ctx.offset + ctx->start_region_offset;
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d: issuing IO type %d at offset 0x%lx size 0x%lx...\n", ctx->tid, io_ctx.op, a_offset, io_ctx.size);
io_req->start_ts = cur_ts;
io_req->op = io_ctx.op;
io_req->aio.aio_offset = a_offset;
if(io_ctx.op == IOGEN_READ) {
rc = aio_read(&io_req->aio);
} else {
rc = aio_write(&io_req->aio);
}
if (rc != 0) {
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING, "thread %d: failed to issue io %d, retrying...\n", ctx->tid, errno);
} else {
free_ios.pop_front();
prog_ios.push_back(io_req);
next_ts = next_ts + ia_gen->generate() * S2NS;
}
}
}
}
}
cleanup:
while (!free_ios.empty()) {
auto req = free_ios.front();
free_ios.pop_front();
nm_free(ctx->sockid, req->dma_buf);
nm_free(ctx->sockid, req->user_buf);
}
if (ia_gen != nullptr) {
delete ia_gen;
}
if (io_gen != nullptr) {
delete io_gen;
}
worker_thread_stop_cnt.fetch_add(1);
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d: stopped...\n", ctx->tid);
return nullptr;
}
static void
parse_pattern(char * pattern, unsigned int * read_pct, io_generator_address_mode * addr_mode)
{
char * token = strtok(pattern, ",");
if (strcmp(token, "M") == 0) {
*addr_mode = IOGEN_ADDR_MONOTONIC_INCREASING;
} else {
*addr_mode = IOGEN_ADDR_UNIFORM_RANDOM;
}
token = strtok(nullptr, ",");
*read_pct = strtoull(token, nullptr, 10);
}
static void
birb_main()
{
int rc = 0;
std::list<struct thread_context *> worker_threads;
std::ofstream output_file;
unsigned long record_cutoff_time = 0;
unsigned long current_s = 0;
unsigned int total_reqs = 0;
unsigned int tid = 0;
unsigned long per_thread_cap = 0;
int cur_core;
int disk_fd;
off_t disk_size;
u_int disk_sec_size;
/* initialize driver */
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: initializing device driver for device %s\n", options.dev_name);
disk_fd = open(options.dev_name, O_RDWR | O_DIRECT);
if (disk_fd == -1) {
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: failed to open device - %d\n", errno);
exit(errno);
}
rc = ioctl(disk_fd, DIOCGMEDIASIZE, &disk_size);
if (rc == -1) {
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: failed to obtain disk size - %d\n", errno);
exit(errno);
}
rc = ioctl(disk_fd, DIOCGSECTORSIZE, &disk_sec_size);
if (rc == -1) {
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: failed to obtain disk sector size - %d\n", errno);
exit(errno);
}
per_thread_cap = disk_size / options.num_threads;
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: initialized device with capacity %zu bytes ~= %zu MB, sector %u bytes\n", disk_size, disk_size / 1024 / 1024, disk_sec_size);
parse_pattern(options.pattern_spec, &options.read_pct, &options.addr_mode);
dump_options();
output_file.open(options.output_file, std::ofstream::out);
if (!output_file) {
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: failed to open output file %s\n", options.output_file);
rc = EINVAL;
goto end;
}
cur_core = cmask_get_next_cpu(&options.cpumask);
while(cur_core != NEXT_CPU_NULL) {
auto * ctx = new struct thread_context;
memset(ctx, 0, sizeof(struct thread_context));
if (ctx == NULL) {
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: failed to alloc thread ctx.\n");
exit(ENOMEM);
}
ctx->tid = tid++;
ctx->sockid = nm_get_node_from_core(cur_core);
ctx->coreid = cur_core;
ctx->io_records = new std::list<struct io_record *>();
ctx->start_region_length = per_thread_cap;
ctx->start_region_offset = per_thread_cap * ctx->tid;
ctx->disk_fd = disk_fd;
// create sys thread
pthread_attr_t attr;
cpuset_t scpuset;
CPU_ZERO(&scpuset);
CPU_SET(cur_core, &scpuset);
pthread_attr_init(&attr);
pthread_attr_setaffinity_np(&attr, sizeof(cpuset_t), &scpuset);
rc = pthread_create(&ctx->sys_thread, &attr, worker_thread_main, ctx);
if (rc != 0) {
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: failed to create sys thread: %d\n", rc);
rc = EINVAL;
goto end;
}
worker_threads.push_back(ctx);
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: created worker thread %d on core %d socket %d offset 0x%lx length %ld\n", ctx->tid, cur_core, ctx->sockid,
ctx->start_region_offset,
ctx->start_region_length);
cur_core = cmask_get_next_cpu(&options.cpumask);
}
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "main: waiting for worker thread init...\n");
while(worker_thread_init_cnt.load() < options.num_threads) {
}
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "main: starting worker threads...\n");
worker_start.store(1);
/* main event loop */
while(current_s < options.time) {
if (current_s >= options.warmup && record_cutoff_time == 0) {
record_cutoff_time = get_cur_ts_nano();
}
usleep(1 * S2US);
current_s++;
}
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "main: stopping worker threads...\n");
worker_stop.store(1);
while(worker_thread_stop_cnt.load() < options.num_threads) {
}
// keep stats
for (struct thread_context * tctx : worker_threads) {
uint64_t last_ts = 0;
uint64_t processed = 0;
for (struct io_record * r : *tctx->io_records) {
if (r->start_ts >= record_cutoff_time) {
if (r->end_ts > last_ts) {
last_ts = r->end_ts;
}
processed++;
output_file << r->end_ts - r->start_ts << std::endl;
total_reqs++;
}
}
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: thread %d processed requests: %lu, last request %lu. Overhead - avg %lu min %lu max %lu\n",
tctx->tid, processed, last_ts, tctx->overhead_avg, tctx->overhead_min, tctx->overhead_max);
}
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: total requests: %u, bytes per second: %lu\n",
total_reqs, total_reqs * options.req_size / (options.time - options.warmup));
end:
if (disk_fd != -1) {
close(disk_fd);
}
output_file.close();
for (struct thread_context * tctx : worker_threads) {
for (struct io_record * r : *tctx->io_records) {
delete r;
}
delete tctx->io_records;
delete tctx;
}
return;
}
int
main(int argc, char **argv)
{
ntr_init();
ntr_set_level(NTR_DEP_USER1, NTR_LEVEL_INFO);
int c;
while (( c = getopt(argc, argv, "VD:k:a:b:q:Q:P:I:t:w:o:")) != -1)
{
switch (c) {
case 'V':
ntr_set_level(NTR_DEP_USER1,
ntr_get_level(NTR_DEP_USER1) + 1);
break;
case 'D':
strncpy(options.dev_name, optarg, MAX_DEV_NAME_LEN);
break;
case 'k':
strncpy(options.driver_name, optarg, MAX_DEV_NAME_LEN);
break;
case 'a':
options.cpumask = strtoull(optarg, nullptr, 16);
options.num_threads = cmask_get_num_cpus(
options.cpumask);
if (options.num_threads == 0) {
fprintf(stderr,
"must run at least one thread\n");
return EINVAL;
}
break;
case 'b':
options.req_size = strtoull(
optarg, nullptr, 10);
break;
case 'q':
options.rps = strtoull(
optarg, nullptr, 10);
break;
case 'Q':
options.queue_depth = strtoull(
optarg, nullptr, 10);
break;
case 'P':
strncpy(options.pattern_spec, optarg, MAX_SPEC_LEN);
break;
case 'I':
strncpy(options.ia_spec, optarg, MAX_SPEC_LEN);
break;
case 't':
options.time = strtoull(
optarg, nullptr, 10);
break;
case 'w':
options.warmup = strtoull(
optarg, nullptr, 10);
break;
case 'o':
strncpy(options.output_file, optarg, MAX_OUTPUT_FILE_LEN);
break;
case 'h':
usage();
exit(0);
default:
usage();
exit(EINVAL);
}
}
nm_init(options.verbosity);
birb_main();
return 0;
}

View File

@ -1,95 +0,0 @@
#include <sys/endian.h>
#include "storage/drivers/bdev.hh"
#include "ntr.h"
#include "spdk/bdev.h"
#include "spdk/thread.h"
size_t
birb_bdev_driver::get_capacity()
{
return block_num * block_sz;
}
birb_driver::birb_driver_status
birb_bdev_driver::get_status()
{
return this->status;
}
void
birb_bdev_driver::bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev * bdev UNUSED,
void * event_ctx UNUSED)
{
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING, "bdev_event_cb: unsupported bdev event: type %d\n", type);
}
void
birb_bdev_driver::print_all_bdev()
{
struct spdk_bdev * cur = spdk_bdev_first();
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "birb_bdev_driver: all registered block devices: ");
while(cur != NULL) {
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "%s, ", spdk_bdev_get_name(cur));
cur = spdk_bdev_next(cur);
}
}
birb_bdev_driver::birb_bdev_driver(const char * dev_name) : bdev_desc(nullptr),
bdev(nullptr),
block_sz(0),
block_num(0),
status(BIRB_FAIL)
{
int rc;
rc = spdk_bdev_open_ext(dev_name, true, birb_bdev_driver::bdev_event_cb, NULL, &this->bdev_desc);
if (rc != 0) {
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "birb_bdev_driver: failed to open bdev: %d\n", rc);
return;
}
/* A bdev pointer is valid while the bdev is opened. */
this->bdev = spdk_bdev_desc_get_bdev(this->bdev_desc);
this->block_sz = spdk_bdev_get_block_size(this->bdev);
this->block_num = spdk_bdev_get_num_blocks(this->bdev);
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "birb_bdev_driver: bdev block size %zu bytes, blocks count %zu\n", this->block_sz, this->block_num);
this->status = BIRB_SUCCESS;
}
birb_bdev_driver::~birb_bdev_driver()
{
if (this->status == BIRB_SUCCESS) {
spdk_bdev_close(this->bdev_desc);
}
}
birb_driver::birb_driver_type
birb_bdev_driver::get_type()
{
return BIRB_DRV_BDEV;
}
size_t
birb_bdev_driver::get_align()
{
return spdk_bdev_get_buf_align(this->bdev);
}
struct spdk_bdev *
birb_bdev_driver::get_bdev()
{
return this->bdev;
}
struct spdk_bdev_desc *
birb_bdev_driver::get_bdev_desc()
{
return this->bdev_desc;
}

View File

@ -1,72 +0,0 @@
#include <sys/endian.h>
#include "storage/drivers/bdev.hh"
#include "ntr.h"
#include "spdk/bdev.h"
#include "spdk/thread.h"
birb_bdev_thread_context::birb_bdev_thread_context(birb_bdev_driver * driver) : io_channel(nullptr),
status(birb_driver::BIRB_FAIL),
driver(driver)
{
struct spdk_bdev_desc * desc = driver->get_bdev_desc();
// obtain io channel
this->io_channel = spdk_bdev_get_io_channel(desc);
if (io_channel == nullptr) {
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "birb_bdev_thread_context: could not create bdev I/O channel!\n");
}
this->status = birb_driver::BIRB_SUCCESS;
}
birb_driver::birb_driver_status
birb_bdev_thread_context::get_status()
{
return this->status;
}
birb_bdev_thread_context::~birb_bdev_thread_context()
{
if (this->io_channel != nullptr) {
spdk_put_io_channel(this->io_channel);
}
}
/*
* Callback function for io completion.
*/
void
birb_bdev_thread_context::io_callback(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
{
spdk_bdev_free_io(bdev_io);
auto ctx = reinterpret_cast<struct cb_context *>(cb_arg);
ctx->cb(success, ctx->ctx);
delete ctx;
}
int
birb_bdev_thread_context::read(size_t offset, size_t size, char * buffer, callback callback, void * context)
{
auto ctx = new struct cb_context;
ctx->cb = callback;
ctx->ctx = context;
return spdk_bdev_read(driver->get_bdev_desc(), this->io_channel, buffer, offset, size, io_callback, reinterpret_cast<void*>(ctx));
}
int
birb_bdev_thread_context::write(size_t offset, size_t size, char * buffer, callback callback, void * context)
{
auto ctx = new struct cb_context;
ctx->cb = callback;
ctx->ctx = context;
return spdk_bdev_write(driver->get_bdev_desc(), this->io_channel, buffer, offset, size, io_callback, reinterpret_cast<void*>(ctx));
}
void
birb_bdev_thread_context::poll()
{
return;
}

View File

@ -1,135 +0,0 @@
#include <sys/endian.h>
#include "ntr.h"
#include "spdk/nvme.h"
#include "spdk/thread.h"
#include "storage/drivers/nvme.hh"
size_t
birb_nvme_driver::get_capacity()
{
return spdk_nvme_ns_get_size(this->ns);
}
birb_driver::birb_driver_status
birb_nvme_driver::get_status()
{
return this->status;
}
void
birb_nvme_driver::attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts UNUSED)
{
struct spdk_nvme_ns * ns;
auto ctx = reinterpret_cast<struct attach_context *>(cb_ctx);
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "birb_nvme_driver: attached to nvme at %s\n", trid->traddr);
for (int nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); nsid != 0;
nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid)) {
ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
if (ns == nullptr || !spdk_nvme_ns_is_active(ns)) {
continue;
}
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "birb_nvme_driver: namespace id: %d size: %zu LBA size: %u\n", spdk_nvme_ns_get_id(ns), spdk_nvme_ns_get_size(ns), spdk_nvme_ns_get_sector_size(ns));
/* XXX: use the first namespace */
break;
}
*ctx->ns = ns;
*ctx->ctrlr = ctrlr;
ctx->valid = 1;
}
bool
birb_nvme_driver::probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
struct spdk_nvme_ctrlr_opts *opts UNUSED)
{
printf("birb_nvme_driver: found nvme at %s\n", trid->traddr);
auto ctx = reinterpret_cast<struct attach_context *>(cb_ctx);
if (strcmp(trid->traddr, ctx->dev_name) == 0) {
return true;
}
return false;
}
birb_nvme_driver::birb_nvme_driver(const char * dev_name) : status(BIRB_FAIL),
ctrlr(nullptr),
ns(nullptr),
opts()
{
int rc;
struct spdk_nvme_transport_id trid;
struct attach_context ctx;
ctx.ctrlr = &this->ctrlr;
ctx.ns = &this->ns;
ctx.dev_name = dev_name;
ctx.valid = 0;
spdk_nvme_trid_populate_transport(&trid, SPDK_NVME_TRANSPORT_PCIE);
snprintf(trid.subnqn, sizeof(trid.subnqn), "%s", SPDK_NVMF_DISCOVERY_NQN);
rc = spdk_nvme_probe(&trid, reinterpret_cast<void *>(&ctx), probe_cb, attach_cb, nullptr);
if (rc != 0) {
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "birb_nvme_driver: failed to probe nvme device: %d\n", rc);
goto end;
}
if (ctx.valid != 1) {
rc = EINVAL;
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "birb_nvme_driver: could not find device: %s\n", dev_name);
goto end;
}
if (spdk_nvme_ns_get_csi(this->ns) == SPDK_NVME_CSI_ZNS) {
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "birb_nvme_driver: zoned nvme namespace is unsupported\n");
spdk_nvme_detach(this->ctrlr);
goto end;
} else {
spdk_nvme_ctrlr_get_default_io_qpair_opts(ctrlr, &this->opts, sizeof(this->opts));
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "birb_nvme_driver: io queue depth: %d io queue requests: %d\n", opts.io_queue_size, opts.io_queue_requests);
this->status = BIRB_SUCCESS;
}
end:
return;
}
birb_nvme_driver::~birb_nvme_driver()
{
if (this->ctrlr != nullptr) {
spdk_nvme_detach(this->ctrlr);
}
}
birb_driver::birb_driver_type
birb_nvme_driver::get_type()
{
return BIRB_DRV_NVME;
}
size_t
birb_nvme_driver::get_align()
{
return 0x1000;
}
spdk_nvme_ctrlr *
birb_nvme_driver::get_ctrlr()
{
return this->ctrlr;
}
spdk_nvme_ns *
birb_nvme_driver::get_ns()
{
return this->ns;
}
spdk_nvme_io_qpair_opts *
birb_nvme_driver::get_io_qpair_opts()
{
return &this->opts;
}

View File

@ -1,90 +0,0 @@
#include <sys/endian.h>
#include "storage/drivers/nvme.hh"
#include "ntr.h"
#include "spdk/bdev.h"
#include "spdk/nvme.h"
#include "spdk/nvme_spec.h"
#include "spdk/thread.h"
birb_nvme_thread_context::birb_nvme_thread_context(birb_nvme_driver * driver) : status(birb_driver::BIRB_FAIL),
driver(driver),
qpair(nullptr)
{
struct spdk_nvme_ctrlr * ctrlr = driver->get_ctrlr();
struct spdk_nvme_qpair * qpair = spdk_nvme_ctrlr_alloc_io_qpair(ctrlr, driver->get_io_qpair_opts(), sizeof(struct spdk_nvme_io_qpair_opts));
if (qpair == nullptr) {
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "birb_nvme_thread_context: could not allocate qpairs.\n");
} else {
this->qpair = qpair;
status = birb_driver::BIRB_SUCCESS;
}
}
birb_driver::birb_driver_status
birb_nvme_thread_context::get_status()
{
return this->status;
}
birb_nvme_thread_context::~birb_nvme_thread_context()
{
if (this->qpair != nullptr) {
spdk_nvme_ctrlr_free_io_qpair(this->qpair);
}
}
/*
* Callback function for io completion.
*/
void
birb_nvme_thread_context::io_callback(void *arg, const struct spdk_nvme_cpl *completion)
{
bool success = !spdk_nvme_cpl_is_error(completion);
auto ctx = reinterpret_cast<struct cb_context *>(arg);
ctx->cb(success, ctx->ctx);
delete ctx;
}
uint32_t
birb_nvme_thread_context::size_to_lba(size_t size, int lba_size)
{
return (size - 1) / lba_size + 1;
}
uint64_t
birb_nvme_thread_context::addr_to_lba(size_t addr, int lba_size)
{
return addr / lba_size;
}
int
birb_nvme_thread_context::read(size_t offset, size_t size, char * buffer, callback callback, void * context)
{
auto ctx = new struct cb_context;
ctx->cb = callback;
ctx->ctx = context;
struct spdk_nvme_ns * ns = this->driver->get_ns();
int lba_size = spdk_nvme_ns_get_sector_size(ns);
return spdk_nvme_ns_cmd_read(ns, this->qpair, buffer, addr_to_lba(offset, lba_size), size_to_lba(size, lba_size), io_callback, reinterpret_cast<void*>(ctx), 0);
}
int
birb_nvme_thread_context::write(size_t offset, size_t size, char * buffer, callback callback, void * context)
{
auto ctx = new struct cb_context;
ctx->cb = callback;
ctx->ctx = context;
struct spdk_nvme_ns * ns = this->driver->get_ns();
int lba_size = spdk_nvme_ns_get_sector_size(ns);
return spdk_nvme_ns_cmd_write(ns, this->qpair, buffer, addr_to_lba(offset, lba_size), size_to_lba(size, lba_size), io_callback, reinterpret_cast<void*>(ctx), 0);
}
void
birb_nvme_thread_context::poll()
{
spdk_nvme_qpair_process_completions(this->qpair, 0);
}

View File

@ -1,57 +0,0 @@
#include <sys/endian.h>
#include <random>
#include "nm.hh"
#include "storage/io_gen.hh"
io_generator::io_generator(
unsigned long req_size,
unsigned long capacity,
unsigned int read_pct,
io_generator_address_mode addr_mode) : cur_offset(0),
capacity(capacity),
req_size(req_size),
read_pct(read_pct),
addr_mode(addr_mode),
rng(rd()),
dist(std::uniform_int_distribution<int>(0, 99)),
addr_rng(addr_rd()),
addr_dist(std::uniform_int_distribution<uint64_t>(0, capacity - 1))
{
rng.seed(nm_get_uptime_ns());
addr_rng.seed(nm_get_uptime_ns());
}
/* returns 0 on success */
int io_generator::issue(struct io_generator_ctx *ctx, char * buf)
{
ctx->size = req_size;
// determine next IO offset
if (addr_mode == IOGEN_ADDR_MONOTONIC_INCREASING) {
if (cur_offset + req_size > capacity) {
cur_offset = 0;
}
ctx->offset = cur_offset;
cur_offset = cur_offset + req_size;
} else {
ctx->offset = (addr_dist(addr_rng) / req_size) * req_size;
if (ctx->offset + req_size > capacity) {
ctx->offset -= req_size;
}
}
// determine next IO data
int op_rng = dist(rng);
if (op_rng < (int)read_pct) {
ctx->op = IOGEN_READ;
} else {
ctx->op = IOGEN_WRITE;
int data = dist(rng);
memset(buf, data, req_size);
}
return 0;
}

View File

@ -1,32 +0,0 @@
#include "nms.h"
#include <assert.h>
#include <stdio.h>
int main(void)
{
void * ret;
nms_init(1);
// duplicate init
nms_init(1);
// 1G
ret = nms_malloc(0, 1024 * 1024 * 1024);
assert(ret != NULL);
printf("1G: %p\n", ret);
// two 511Ms
ret = nms_malloc(0, 511 * 1024 * 1024);
assert(ret != NULL);
printf("511M: %p\n", ret);
ret = nms_malloc(0, 511 * 1024 * 1024);
assert(ret != NULL);
printf("511M: %p\n", ret);
// another 1G
ret = nms_malloc(0, 1024 * 1024 * 1024);
assert(ret != NULL);
printf("1G: %p\n", ret);
return 0;
}

View File

@ -1,239 +0,0 @@
#include <sys/endian.h>
#include <sys/select.h>
#include <sys/signal.h>
#include "gen.hh"
#include <array>
#include <atomic>
#include <cstdlib>
#include <cstring>
#include <list>
#include <iostream>
#include <fstream>
#include "ntr.h"
#include "nms.h"
#include <getopt.h>
#include <pthread.h>
#include <unistd.h>
#include <topo.h>
static void
usage()
{
fprintf(stdout,
"Usage:\n"
" -v: verbose mode\n"
" -b: buffer size\n"
" -q: bytes per second\n"
" -d: destination domain index\n"
" -s: worker threads cpu list\n"
" -m: pull mode cpu list\n"
" -S: enable shared buffer\n"
" -t: time to run\n"
" -T: transaction size\n"
" -i: inter arrival time distribution\n"
" -o: output file path\n"
" -H: history size for pct adjustment\n"
" -M: print this string when threads are ready to run\n");
fflush(stdout);
}
static char output_file[256] = "memloadgen_samples.txt";
int main(int argc, char * argv[])
{
ntr_init();
ntr_set_level(NTR_DEP_USER1, NTR_LEVEL_WARNING);
size_t arr_sz = 64 * 1024 * 1024;
uint32_t time = -1;
uint64_t bps = 0;
uint64_t transaction_size = arr_sz;
cpuset_t threads, modes;
char magic[256] = {0};
CPU_ZERO(&threads);
CPU_ZERO(&modes);
CPU_SET(0, &threads);
char ia_dist[32] = "fixed";
int history_sz = 5;
std::list<uint64_t> history;
int shared_buffer = 0;
int rate_ctrl = 0;
cpuset_t domain_mask;
CPU_ZERO(&domain_mask);
CPU_SET(0, &domain_mask);
{
int c;
// parse arguments
while ((c = getopt(argc, argv, "vhb:d:s:m:So:T:t:q:i:H:M:")) != -1) {
switch (c) {
case 'v':
ntr_set_level(NTR_DEP_USER1, ntr_get_level(NTR_DEP_USER1) + 1);
break;
case 'h':
usage();
exit(0);
case 'b':
arr_sz = strtoull(optarg, nullptr, 10);
break;
case 'd':
cpulist_to_cpuset(optarg, &domain_mask);
break;
case 's':
cpulist_to_cpuset(optarg, &threads);
break;
case 'm':
cpulist_to_cpuset(optarg, &modes);
break;
case 'S':
shared_buffer = 1;
break;
case 'o':
strncpy(output_file, optarg, 256);
break;
case 't':
time = strtoul(optarg, nullptr, 10);
break;
case 'T':
transaction_size = strtoul(optarg, nullptr, 10);
break;
case 'q':
bps = (uint64_t)strtoull(optarg, nullptr, 10);
break;
case 'i':
strncpy(ia_dist, optarg, sizeof(ia_dist));
break;
case 'H':
history_sz = strtol(optarg, nullptr, 10);
break;
case 'M':
strncpy(magic, optarg, sizeof(magic));
break;
default:
usage();
exit(0);
}
}
}
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "Configruation:\n"
" buffer size: %ld\n"
" num threads: %d\n"
" target domain: %ld\n"
" bytes per second: %lu\n"
" interarrival distribution: %s\n"
" shared buffer: %d\n"
" transaction time: %lu\n"
" runtime: %d\n"
" history: %d\n"
" magic: %s\n",
arr_sz, CPU_COUNT(&threads),
CPU_FFS(&domain_mask) - 1, bps,
ia_dist, shared_buffer,
transaction_size,time, history_sz, magic);
// init topo
if (topo_init(ntr_get_level(NTR_DEP_USER1) != NTR_LEVEL_DEFAULT)) {
fprintf(stderr, "libtopo init failed!\n");
exit(1);
}
// init
if (nms_init(ntr_get_level(NTR_DEP_USER1) != NTR_LEVEL_DEFAULT)) {
fprintf(stderr, "libnms init failed!\n");
exit(1);
}
bool success = false;
memload_generator::memload_generator_options opts;
opts.buffer_size = arr_sz;
opts.trans_per_second = bps / transaction_size;
opts.shared_buffer = shared_buffer;
opts.transaction_size = transaction_size;
opts.verbose = ntr_get_level(NTR_DEP_USER1) != NTR_LEVEL_DEFAULT;
strncpy(opts.ia_dist, ia_dist, sizeof(opts.ia_dist));
std::ofstream ofile;
ofile.open(output_file, std::ios::out | std::ios::trunc);
auto mgen = new memload_generator(&threads, &modes, &domain_mask, &opts, &success);
if (strlen(magic) > 0) {
fprintf(stdout, "%s\n", magic);
fflush(stdout);
}
if (!mgen->start()) {
fprintf(stderr, "failed to start memloadgen!\n");
exit(1);
}
struct timeval stval;
stval.tv_sec = 0;
stval.tv_usec = 0;
char pct_line[64] = {0};
uint64_t prev_ts = topo_uptime_ns();
uint64_t prev_trans = mgen->get_transactions();
uint32_t cur_time = 0;
while(cur_time < time) {
usleep(S2US);
uint64_t cur_ts = topo_uptime_ns();
uint64_t trans = mgen->get_transactions();
uint64_t bps = (uint64_t)((double)((trans - prev_trans) * transaction_size) / ((double)(cur_ts - prev_ts) / (double)S2NS));
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "%ldB,%ldM\n", bps, bps / 1024 / 1024);
ofile << "s," << cur_time << "," << bps << std::endl;
ofile.flush();
prev_ts = cur_ts;
prev_trans = trans;
cur_time++;
if (rate_ctrl == 0) {
// keep history
history.emplace_back(bps);
if ((int)history.size() > history_sz) {
history.pop_front();
}
fd_set fdset;
FD_ZERO(&fdset);
FD_SET(STDIN_FILENO, &fdset);
int ret = select(1, &fdset, NULL, NULL, &stval);
if (ret < 0) {
if (errno != EINTR) {
fprintf(stderr, "select() failed with %d\n", errno);
exit(1);
}
} else if (ret > 0) {
if (FD_ISSET(STDIN_FILENO, &fdset)) {
ret = read(STDIN_FILENO, pct_line, sizeof(pct_line) - 1);
if (ret < 0) {
fprintf(stderr, "read() failed with %d\n", errno);
exit(1);
}
unsigned int pct = strtoul(pct_line, NULL, 10);
uint64_t sum = 0;
size_t sz = history.size();
while (history.size() > 0) {
sum += history.front();
history.pop_front();
}
uint64_t newbps = ((sum / sz) * (double)pct / 100.0);
mgen->set_transactions(newbps / transaction_size);
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "adjusted target bps to %u%% = %ldB ~= %ldM\n", pct, newbps, newbps / 1024 / 1024);
ofile << "p," << cur_time << "," << pct << std::endl;
ofile.flush();
rate_ctrl = 1;
}
}
}
}
mgen->stop();
delete mgen;
ofile.close();
return 0;
}

View File

@ -1,237 +0,0 @@
#include <stdio.h>
#include <stdlib.h>
#include "nms.h"
#include <getopt.h>
#include <unistd.h>
#include <topo.h>
#include <immintrin.h>
#include <x86intrin.h>
#include <stdatomic.h>
#include <string.h>
#include <time.h>
#include <math.h>
#include <errno.h>
#include <stdint.h>
#include <sys/cpuset.h>
#include <sys/sysctl.h>
#include <pthread.h>
#include <pthread_np.h>
#define BUFFER_SIZE (128 * 1024 * 1024)
#define BUFFER_CNT (BUFFER_SIZE / sizeof(int))
static _Atomic int flush = 0;
static _Atomic uint64_t offset = 0;
static int * remote_buffer = NULL;
static uint64_t * latencies;
static int times = 100;
static int local_core = 0;
static int remote_core = 1;
static int cache_mode = 0;
static int verbose = 0;
static int random_access = 0;
static uint64_t tsc_freq = 0;
static inline uint64_t cyc2ns(uint64_t cyc)
{
return (double)cyc / ((double)tsc_freq / 1000000000.0);
}
static inline uint64_t read_time(void)
{
uint64_t l;
unsigned int a;
l = __rdtscp(&a);
_mm_lfence();
return l;
}
static void * local_thread(void *)
{
int temp, *addr;
uint64_t start, end;
printf("Local thread running...\n");
while(times > 0) {
if (random_access) {
// change offset
offset = (rand() % BUFFER_CNT) * sizeof(int);
}
flush = 1;
while(flush != 0) {
}
addr = (int *)((char *)remote_buffer + offset);
if (verbose > 1) {
printf("Local thread(%d): flushing %p.\n", local_core, addr);
}
_mm_clflush(addr);
_mm_mfence();
atomic_signal_fence(memory_order_seq_cst);
start = read_time();
temp = *addr;
end = read_time();
atomic_signal_fence(memory_order_seq_cst);
if (verbose > 1) {
printf("Local thread(%d): read %p.\n", local_core, addr);
}
latencies[times - 1] = end - start;
times--;
}
return (void *)(uintptr_t)temp;
}
static void * remote_thread(void *)
{
int temp;
int * addr;
printf("Remote thread running...\n");
while(1) {
while(flush == 0) {
}
addr = (int *)((char *)remote_buffer + offset);
if(cache_mode) {
temp = *addr;
_mm_mfence();
} else {
_mm_clflush(addr);
_mm_mfence();
}
if (verbose > 1) {
printf("Remote thread(%d): %p %s.\n", remote_core, addr, cache_mode ? "read into cache" : "flushed");
}
flush = 0;
}
return (void *)(uintptr_t)temp;
}
int main(int argc, char * argv[])
{
{
int c;
// parse arguments
while ((c = getopt(argc, argv, "l:r:t:vR")) != -1) {
switch (c) {
case 'l':
local_core = atoi(optarg);
break;
case 'r':
remote_core = atoi(optarg);
break;
case 't':
times = atoi(optarg);
break;
case 'R':
random_access = 1;
break;
case 'v':
verbose++;
break;
default:
exit(1);
}
}
}
srand(time(NULL));
// init topo
if (topo_init(1)) {
fprintf(stderr, "libtopo init failed!\n");
exit(1);
}
// init
if (nms_init(1)) {
fprintf(stderr, "libnms init failed!\n");
exit(1);
}
size_t sz = sizeof(tsc_freq);
int rc;
if ((rc = sysctlbyname("machdep.tsc_freq", &tsc_freq, &sz, NULL, 0)) < 0) {
fprintf(stderr,"failed to query tsc frequency via sysctl (%d)\n", errno);
} else {
fprintf(stdout,"system tsc frequency = %lu\n", tsc_freq);
}
latencies = malloc(sizeof(uint64_t) * times);
const int remote_numa = topo_core_to_numa(remote_core);
const int local_numa = topo_core_to_numa(local_core);
const int total = times;
remote_buffer = nms_malloc(remote_numa, BUFFER_SIZE);
// fill with random values
for (int i = 0; i < BUFFER_SIZE; i++) {
remote_buffer[i] = rand();
}
pthread_attr_t lattr, rattr;
pthread_t lthread, rthread;
cpuset_t lcpuset, rcpuset;
CPU_ZERO(&lcpuset);
CPU_ZERO(&rcpuset);
CPU_SET(local_core, &lcpuset);
CPU_SET(remote_core, &rcpuset);
pthread_attr_init(&rattr);
pthread_attr_setaffinity_np(&rattr, sizeof(cpuset_t), &rcpuset);
pthread_attr_init(&lattr);
pthread_attr_setaffinity_np(&lattr, sizeof(cpuset_t), &lcpuset);
printf("local thread: %d numa: %d, remote: %d numa: %d\n", local_core, local_numa, remote_core, remote_numa);
pthread_create(&lthread, &lattr, local_thread, NULL);
pthread_create(&rthread, &rattr, remote_thread, NULL);
pthread_join(lthread, NULL);
uint64_t min = UINT64_MAX;
uint64_t max = 0;
uint64_t sum = 0;
for (int i = total - 1; i >= 0; i--) {
if (verbose) {
printf("%lu,\n", latencies[i]);
}
if (min > latencies[i]) {
min = latencies[i];
}
if (max < latencies[i]) {
max = latencies[i];
}
sum += latencies[i];
}
double var = 0.0;
double avg = (double)sum / (double)total;
for (int i = total - 1; i >= 0; i--) {
var += pow(latencies[i] - avg, 2);
}
var = sqrt(var / avg);
printf("Avg: %lu cycles (%lu ns)\n"
"Std: %lu cycles (%lu ns)\n"
"Min: %lu cycles (%lu ns)\n"
"Max: %lu cycles (%lu ns)\n",
(uint64_t)avg, cyc2ns((uint64_t)avg),
(uint64_t)var, cyc2ns((uint64_t)var),
min, cyc2ns(min),
max, cyc2ns(max));
free(latencies);
return 0;
}