Compare commits
10 Commits
Author | SHA1 | Date | |
---|---|---|---|
a1f69bb3f8 | |||
7b4fc6f3ab | |||
1ec01d6c37 | |||
ff4946a699 | |||
f2be62a9be | |||
226449100d | |||
82e1098f3b | |||
855b9cf714 | |||
73c70a5c52 | |||
|
0500dc1c21 |
198
.clang-format
198
.clang-format
@ -1,198 +0,0 @@
|
||||
# $FreeBSD$
|
||||
# Basic .clang-format
|
||||
---
|
||||
BasedOnStyle: WebKit
|
||||
AlignAfterOpenBracket: DontAlign
|
||||
AlignConsecutiveAssignments: false
|
||||
AlignConsecutiveDeclarations: false
|
||||
AlignEscapedNewlines: Left
|
||||
AlignOperands: false
|
||||
AlignTrailingComments: true
|
||||
AllowAllArgumentsOnNextLine: false
|
||||
AllowAllParametersOfDeclarationOnNextLine: false
|
||||
AllowShortBlocksOnASingleLine: Never
|
||||
AllowShortCaseLabelsOnASingleLine: false
|
||||
AllowShortFunctionsOnASingleLine: InlineOnly
|
||||
AllowShortIfStatementsOnASingleLine: Never
|
||||
AllowShortLoopsOnASingleLine: false
|
||||
AlwaysBreakAfterReturnType: TopLevelDefinitions
|
||||
AlwaysBreakBeforeMultilineStrings: false
|
||||
AlwaysBreakTemplateDeclarations: MultiLine
|
||||
BinPackArguments: true
|
||||
BinPackParameters: true
|
||||
BreakBeforeBinaryOperators: None
|
||||
BreakBeforeBraces: WebKit
|
||||
BreakBeforeTernaryOperators: false
|
||||
# TODO: BreakStringLiterals can cause very strange formatting so turn it off?
|
||||
BreakStringLiterals: false
|
||||
# Prefer:
|
||||
# some_var = function(arg1,
|
||||
# arg2)
|
||||
# over:
|
||||
# some_var =
|
||||
# function(arg1, arg2)
|
||||
PenaltyBreakAssignment: 100
|
||||
# Prefer:
|
||||
# some_long_function(arg1, arg2
|
||||
# arg3)
|
||||
# over:
|
||||
# some_long_function(
|
||||
# arg1, arg2, arg3)
|
||||
PenaltyBreakBeforeFirstCallParameter: 100
|
||||
CompactNamespaces: true
|
||||
DerivePointerAlignment: false
|
||||
DisableFormat: false
|
||||
ForEachMacros:
|
||||
- ARB_ARRFOREACH
|
||||
- ARB_ARRFOREACH_REVWCOND
|
||||
- ARB_ARRFOREACH_REVERSE
|
||||
- ARB_FOREACH
|
||||
- ARB_FOREACH_FROM
|
||||
- ARB_FOREACH_SAFE
|
||||
- ARB_FOREACH_REVERSE
|
||||
- ARB_FOREACH_REVERSE_FROM
|
||||
- ARB_FOREACH_REVERSE_SAFE
|
||||
- BIT_FOREACH_ISCLR
|
||||
- BIT_FOREACH_ISSET
|
||||
- CPU_FOREACH
|
||||
- CPU_FOREACH_ISCLR
|
||||
- CPU_FOREACH_ISSET
|
||||
- FOREACH_THREAD_IN_PROC
|
||||
- FOREACH_PROC_IN_SYSTEM
|
||||
- FOREACH_PRISON_CHILD
|
||||
- FOREACH_PRISON_DESCENDANT
|
||||
- FOREACH_PRISON_DESCENDANT_LOCKED
|
||||
- FOREACH_PRISON_DESCENDANT_LOCKED_LEVEL
|
||||
- MNT_VNODE_FOREACH_ALL
|
||||
- MNT_VNODE_FOREACH_ACTIVE
|
||||
- RB_FOREACH
|
||||
- RB_FOREACH_FROM
|
||||
- RB_FOREACH_SAFE
|
||||
- RB_FOREACH_REVERSE
|
||||
- RB_FOREACH_REVERSE_FROM
|
||||
- RB_FOREACH_REVERSE_SAFE
|
||||
- SLIST_FOREACH
|
||||
- SLIST_FOREACH_FROM
|
||||
- SLIST_FOREACH_FROM_SAFE
|
||||
- SLIST_FOREACH_SAFE
|
||||
- SLIST_FOREACH_PREVPTR
|
||||
- SPLAY_FOREACH
|
||||
- LIST_FOREACH
|
||||
- LIST_FOREACH_FROM
|
||||
- LIST_FOREACH_FROM_SAFE
|
||||
- LIST_FOREACH_SAFE
|
||||
- STAILQ_FOREACH
|
||||
- STAILQ_FOREACH_FROM
|
||||
- STAILQ_FOREACH_FROM_SAFE
|
||||
- STAILQ_FOREACH_SAFE
|
||||
- TAILQ_FOREACH
|
||||
- TAILQ_FOREACH_FROM
|
||||
- TAILQ_FOREACH_FROM_SAFE
|
||||
- TAILQ_FOREACH_REVERSE
|
||||
- TAILQ_FOREACH_REVERSE_FROM
|
||||
- TAILQ_FOREACH_REVERSE_FROM_SAFE
|
||||
- TAILQ_FOREACH_REVERSE_SAFE
|
||||
- TAILQ_FOREACH_SAFE
|
||||
- VM_MAP_ENTRY_FOREACH
|
||||
- VM_PAGE_DUMP_FOREACH
|
||||
IndentCaseLabels: false
|
||||
IndentPPDirectives: None
|
||||
Language: Cpp
|
||||
NamespaceIndentation: None
|
||||
PointerAlignment: Right
|
||||
ContinuationIndentWidth: 4
|
||||
IndentWidth: 8
|
||||
TabWidth: 8
|
||||
ColumnLimit: 80
|
||||
UseTab: Always
|
||||
SpaceAfterCStyleCast: false
|
||||
IncludeBlocks: Regroup
|
||||
IncludeCategories:
|
||||
- Regex: '^\"opt_.*\.h\"'
|
||||
Priority: 1
|
||||
SortPriority: 10
|
||||
- Regex: '^<sys/cdefs\.h>'
|
||||
Priority: 2
|
||||
SortPriority: 20
|
||||
- Regex: '^<sys/types\.h>'
|
||||
Priority: 2
|
||||
SortPriority: 21
|
||||
- Regex: '^<sys/param\.h>'
|
||||
Priority: 2
|
||||
SortPriority: 22
|
||||
- Regex: '^<sys/systm\.h>'
|
||||
Priority: 2
|
||||
SortPriority: 23
|
||||
- Regex: '^<sys.*/'
|
||||
Priority: 2
|
||||
SortPriority: 24
|
||||
- Regex: '^<vm/vm\.h>'
|
||||
Priority: 3
|
||||
SortPriority: 30
|
||||
- Regex: '^<vm/'
|
||||
Priority: 3
|
||||
SortPriority: 31
|
||||
- Regex: '^<machine/'
|
||||
Priority: 4
|
||||
SortPriority: 40
|
||||
- Regex: '^<(x86|amd64|i386|xen)/'
|
||||
Priority: 5
|
||||
SortPriority: 50
|
||||
- Regex: '^<dev/'
|
||||
Priority: 6
|
||||
SortPriority: 60
|
||||
- Regex: '^<net.*/'
|
||||
Priority: 7
|
||||
SortPriority: 70
|
||||
- Regex: '^<protocols/'
|
||||
Priority: 7
|
||||
SortPriority: 71
|
||||
- Regex: '^<(fs|nfs(|client|server)|ufs)/'
|
||||
Priority: 8
|
||||
SortPriority: 80
|
||||
- Regex: '^<[^/].*\.h'
|
||||
Priority: 9
|
||||
SortPriority: 90
|
||||
- Regex: '^\".*\.h\"'
|
||||
Priority: 10
|
||||
SortPriority: 100
|
||||
# LLVM's header include ordering style is almost the exact opposite of ours.
|
||||
# Unfortunately, they have hard-coded their preferences into clang-format.
|
||||
# Clobbering this regular expression to avoid matching prevents non-system
|
||||
# headers from being forcibly moved to the top of the include list.
|
||||
# http://llvm.org/docs/CodingStandards.html#include-style
|
||||
IncludeIsMainRegex: 'BLAH_DONT_MATCH_ANYTHING'
|
||||
SortIncludes: true
|
||||
KeepEmptyLinesAtTheStartOfBlocks: true
|
||||
TypenameMacros:
|
||||
- ARB_ELMTYPE
|
||||
- ARB_HEAD
|
||||
- ARB8_HEAD
|
||||
- ARB16_HEAD
|
||||
- ARB32_HEAD
|
||||
- ARB_ENTRY
|
||||
- ARB8_ENTRY
|
||||
- ARB16_ENTRY
|
||||
- ARB32_ENTRY
|
||||
- LIST_CLASS_ENTRY
|
||||
- LIST_CLASS_HEAD
|
||||
- LIST_ENTRY
|
||||
- LIST_HEAD
|
||||
- QUEUE_TYPEOF
|
||||
- RB_ENTRY
|
||||
- RB_HEAD
|
||||
- SLIST_CLASS_HEAD
|
||||
- SLIST_CLASS_ENTRY
|
||||
- SLIST_HEAD
|
||||
- SLIST_ENTRY
|
||||
- SMR_POINTER
|
||||
- SPLAY_ENTRY
|
||||
- SPLAY_HEAD
|
||||
- STAILQ_CLASS_ENTRY
|
||||
- STAILQ_CLASS_HEAD
|
||||
- STAILQ_ENTRY
|
||||
- STAILQ_HEAD
|
||||
- TAILQ_CLASS_ENTRY
|
||||
- TAILQ_CLASS_HEAD
|
||||
- TAILQ_ENTRY
|
||||
- TAILQ_HEAD
|
@ -1 +0,0 @@
|
||||
Checks: "-*,clang-diagnostic-*,clang-analyzer-*,modernize*,performance*,-modernize-use-trailing-return-type,-modernize-avoid-c-arrays"
|
5
.gitignore
vendored
5
.gitignore
vendored
@ -268,7 +268,4 @@ cython_debug/
|
||||
# Executables
|
||||
*.exe
|
||||
*.out
|
||||
*.app
|
||||
|
||||
*.clangd
|
||||
compile_commands.json
|
||||
*.app
|
0
.gitmodules
vendored
0
.gitmodules
vendored
102
CMakeLists.txt
102
CMakeLists.txt
@ -1,86 +1,58 @@
|
||||
cmake_minimum_required(VERSION 3.0)
|
||||
|
||||
find_program(CC_GCC gcc)
|
||||
find_program(CXX_GCC g++)
|
||||
|
||||
set(CMAKE_C_COMPILER ${CC_GCC})
|
||||
set(CMAKE_CXX_COMPILER ${CXX_GCC})
|
||||
|
||||
project(khat)
|
||||
|
||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}")
|
||||
find_package(PkgConfig REQUIRED)
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY lib)
|
||||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY bin)
|
||||
|
||||
pkg_check_modules(DPDK libdpdk)
|
||||
pkg_check_modules(SPDK spdk_event_bdev spdk_env_dpdk)
|
||||
pkg_check_modules(SPDK_SYS spdk_syslibs)
|
||||
pkg_check_modules(UUID uuid)
|
||||
pkg_check_modules(TOPO bsdtopo)
|
||||
find_package(dpdk REQUIRED)
|
||||
find_package(Hwloc REQUIRED)
|
||||
|
||||
set(CC_FLAGS -O2 -g -Wall -Wextra -Werror -std=c++11
|
||||
-Wno-deprecated-declarations
|
||||
-Wno-packed-not-aligned
|
||||
-Wno-address-of-packed-member
|
||||
-Wno-zero-length-array
|
||||
-Wno-gnu-zero-variadic-macro-arguments
|
||||
-march=native)
|
||||
|
||||
set(C_FLAGS -O2 -g -Wall -Wextra -Werror -std=c2x
|
||||
-Wno-deprecated-declarations
|
||||
-Wno-address-of-packed-member
|
||||
-Wno-zero-length-array
|
||||
-Wno-gnu-zero-variadic-macro-arguments
|
||||
-march=native)
|
||||
-msse4
|
||||
-mavx)
|
||||
|
||||
|
||||
include_directories(${CMAKE_SOURCE_DIR}/inc)
|
||||
include_directories()
|
||||
include_directories(${dpdk_INCLUDE_DIRS})
|
||||
include_directories(${Hwloc_INCLUDE_DIRS})
|
||||
|
||||
set(LIBNTR_C_FLAGS -O3 -g -Wall -Wextra -Werror -std=c2x)
|
||||
set(LIBGEN_CC_FLAGS -O3 -g -Wall -Wextra -Werror -std=c++17)
|
||||
set(LIBNM_CC_FLAGS -O2 -g -Wall -Wextra -Werror -std=c++11)
|
||||
set(LIBNTR_C_FLAGS -O2 -g -Wall -Wextra -Werror -std=c11)
|
||||
set(LIBGEN_CC_FLAGS -O2 -g -Wall -Wextra -Werror -std=c++11)
|
||||
|
||||
add_library(ntr SHARED libntr/ntr.c)
|
||||
set(KHAT_LINKLIBS pthread nm ntr)
|
||||
set(CAT_LINKLIBS pthread nm ntr gen)
|
||||
set(RAT_LINKLIBS pthread nm ntr gen)
|
||||
|
||||
add_library(nm libnm/nm.cc)
|
||||
target_link_libraries(nm ${Hwloc_LIBRARIES})
|
||||
target_compile_options(nm PRIVATE ${LIBNM_CC_FLAGS})
|
||||
|
||||
add_library(ntr libntr/ntr.c)
|
||||
target_compile_options(ntr PRIVATE ${LIBNTR_C_FLAGS})
|
||||
|
||||
add_library(gen SHARED libgen/generator.cc libgen/loadgen.cc)
|
||||
target_link_libraries(gen PRIVATE pthread ntr ${TOPO_LINK_LIBRARIES} nms)
|
||||
target_compile_options(gen PRIVATE ${LIBGEN_CC_FLAGS} ${TOPO_CFLAGS})
|
||||
add_library(gen libgen/generator.cc)
|
||||
target_link_libraries(gen ${Hwloc_LIBRARIES})
|
||||
target_compile_options(gen PRIVATE ${LIBGEN_CC_FLAGS})
|
||||
|
||||
add_library(netsup SHARED net/libnetsup/dpdk.cc net/libnetsup/portconf.cc)
|
||||
target_link_libraries(netsup PRIVATE ntr ${DPDK_LINK_LIBRARIES})
|
||||
target_compile_options(netsup PRIVATE ${LIBGEN_CC_FLAGS} ${DPDK_CFLAGS})
|
||||
add_executable(khat khat/khat.cc)
|
||||
target_link_libraries(khat ${dpdk_LIBRARIES} ${KHAT_LINKLIBS})
|
||||
target_compile_options(khat PRIVATE ${CC_FLAGS})
|
||||
|
||||
add_library(nms SHARED libnms/alloc.c)
|
||||
target_link_libraries(nms PRIVATE ${TOPO_LINK_LIBRARIES})
|
||||
target_compile_options(nms PRIVATE ${TOPO_CFLAGS})
|
||||
add_executable(cat cat/cat.cc)
|
||||
target_link_libraries(cat ${dpdk_LIBRARIES} ${CAT_LINKLIBS})
|
||||
target_compile_options(cat PRIVATE ${CC_FLAGS})
|
||||
|
||||
add_executable(khat EXCLUDE_FROM_ALL net/khat.cc)
|
||||
target_link_libraries(khat PRIVATE pthread ntr gen netsup nms ${DPDK_LINK_LIBRARIES} ${TOPO_LINK_LIBRARIES})
|
||||
target_compile_options(khat PRIVATE ${CC_FLAGS} ${DPDK_CFLAGS} ${TOPO_CFLAGS})
|
||||
|
||||
add_executable(cat EXCLUDE_FROM_ALL net/cat.cc)
|
||||
target_link_libraries(cat PRIVATE pthread ntr gen netsup nms ${DPDK_LINK_LIBRARIES} ${TOPO_LINK_LIBRARIES})
|
||||
target_compile_options(cat PRIVATE ${CC_FLAGS} ${DPDK_CFLAGS} ${TOPO_CFLAGS})
|
||||
|
||||
add_executable(rat EXCLUDE_FROM_ALL net/rat.cc)
|
||||
target_link_libraries(rat PRIVATE pthread ntr gen netsup nms ${DPDK_LINK_LIBRARIES} ${TOPO_LINK_LIBRARIES})
|
||||
target_compile_options(rat PRIVATE ${CC_FLAGS} ${DPDK_CFLAGS} ${TOPO_CFLAGS})
|
||||
|
||||
add_executable(birb EXCLUDE_FROM_ALL storage/birb.cc storage/io_gen.cc storage/drivers/bdev.cc storage/drivers/bdev_thread.cc storage/drivers/nvme.cc storage/drivers/nvme_thread.cc)
|
||||
target_include_directories(birb PRIVATE ${SPDK_INCLUDE_DIRS} ${DPDK_INCLUDE_DIRS} ${UUID_INCLUDE_DIRS})
|
||||
target_compile_options(birb PRIVATE ${CC_FLAGS} ${SPDK_CFLAGS} ${UUID_CFLAGS})
|
||||
target_link_directories(birb PRIVATE ${SPDK_LIBRARY_DIRS} ${SPDK_SYS_STATIC_LIBRARY_DIRS} ${UUID_LIBRARY_DIRS})
|
||||
target_link_libraries(birb PRIVATE pthread ntr gen -Wl,--whole-archive ${SPDK_LIBRARIES} -Wl,--no-whole-archive ${SPDK_SYS_STATIC_LIBRARIES})
|
||||
|
||||
add_executable(birb_posix EXCLUDE_FROM_ALL storage/birb_posix.cc storage/io_gen.cc)
|
||||
target_compile_options(birb_posix PRIVATE ${CC_FLAGS})
|
||||
target_link_libraries(birb_posix PRIVATE pthread ntr gen)
|
||||
|
||||
add_executable(memloadgen util/memloadgen.cc)
|
||||
target_link_libraries(memloadgen PRIVATE pthread gen ntr nms ${TOPO_LINK_LIBRARIES})
|
||||
target_compile_options(memloadgen PRIVATE ${CC_FLAGS} ${TOPO_CFLAGS})
|
||||
|
||||
add_executable(mornafah util/mornafah.c)
|
||||
target_link_libraries(mornafah PRIVATE pthread gen ntr nms ${TOPO_LINK_LIBRARIES})
|
||||
target_compile_options(mornafah PRIVATE ${C_FLAGS} ${TOPO_CFLAGS})
|
||||
|
||||
add_executable(nms_test tests/nms_test.c)
|
||||
set_target_properties(nms_test PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/tests)
|
||||
target_link_libraries(nms_test PRIVATE nms)
|
||||
target_compile_options(nms_test PRIVATE ${C_FLAGS})
|
||||
add_executable(rat rat/rat.cc)
|
||||
target_link_libraries(rat ${dpdk_LIBRARIES} ${RAT_LINKLIBS})
|
||||
target_compile_options(rat PRIVATE ${CC_FLAGS})
|
213
FindHwloc.cmake
Normal file
213
FindHwloc.cmake
Normal file
@ -0,0 +1,213 @@
|
||||
#.rst:
|
||||
# FindHwloc
|
||||
# ----------
|
||||
#
|
||||
# Try to find Portable Hardware Locality (hwloc) libraries.
|
||||
# http://www.open-mpi.org/software/hwloc
|
||||
#
|
||||
# You may declare HWLOC_ROOT environment variable to tell where
|
||||
# your hwloc library is installed.
|
||||
#
|
||||
# Once done this will define::
|
||||
#
|
||||
# Hwloc_FOUND - True if hwloc was found
|
||||
# Hwloc_INCLUDE_DIRS - include directories for hwloc
|
||||
# Hwloc_LIBRARIES - link against these libraries to use hwloc
|
||||
# Hwloc_VERSION - version
|
||||
# Hwloc_CFLAGS - include directories as compiler flags
|
||||
# Hwloc_LDLFAGS - link paths and libs as compiler flags
|
||||
#
|
||||
|
||||
#=============================================================================
|
||||
# Copyright 2014 Mikael Lepistö
|
||||
#
|
||||
# Distributed under the OSI-approved BSD License (the "License");
|
||||
#
|
||||
# This software is distributed WITHOUT ANY WARRANTY; without even the
|
||||
# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
# See the License for more information.
|
||||
#=============================================================================
|
||||
|
||||
if(WIN32)
|
||||
find_path(Hwloc_INCLUDE_DIR
|
||||
NAMES
|
||||
hwloc.h
|
||||
PATHS
|
||||
ENV "PROGRAMFILES(X86)"
|
||||
ENV HWLOC_ROOT
|
||||
PATH_SUFFIXES
|
||||
include
|
||||
)
|
||||
|
||||
find_library(Hwloc_LIBRARY
|
||||
NAMES
|
||||
libhwloc.lib
|
||||
PATHS
|
||||
ENV "PROGRAMFILES(X86)"
|
||||
ENV HWLOC_ROOT
|
||||
PATH_SUFFIXES
|
||||
lib
|
||||
)
|
||||
|
||||
#
|
||||
# Check if the found library can be used to linking
|
||||
#
|
||||
SET (_TEST_SOURCE "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/linktest.c")
|
||||
FILE (WRITE "${_TEST_SOURCE}"
|
||||
"
|
||||
#include <hwloc.h>
|
||||
int main()
|
||||
{
|
||||
hwloc_topology_t topology;
|
||||
int nbcores;
|
||||
hwloc_topology_init(&topology);
|
||||
hwloc_topology_load(topology);
|
||||
nbcores = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_CORE);
|
||||
hwloc_topology_destroy(topology);
|
||||
return 0;
|
||||
}
|
||||
"
|
||||
)
|
||||
|
||||
TRY_COMPILE(_LINK_SUCCESS ${CMAKE_BINARY_DIR} "${_TEST_SOURCE}"
|
||||
CMAKE_FLAGS
|
||||
"-DINCLUDE_DIRECTORIES:STRING=${Hwloc_INCLUDE_DIR}"
|
||||
CMAKE_FLAGS
|
||||
"-DLINK_LIBRARIES:STRING=${Hwloc_LIBRARY}"
|
||||
)
|
||||
|
||||
IF(NOT _LINK_SUCCESS)
|
||||
if(CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||
message(STATUS "You are building 64bit target.")
|
||||
ELSE()
|
||||
message(STATUS "You are building 32bit code. If you like to build x64 use e.g. -G 'Visual Studio 12 Win64' generator." )
|
||||
ENDIF()
|
||||
message(FATAL_ERROR "Library found, but linking test program failed.")
|
||||
ENDIF()
|
||||
|
||||
#
|
||||
# Resolve version if some compiled binary found...
|
||||
#
|
||||
find_program(HWLOC_INFO_EXECUTABLE
|
||||
NAMES
|
||||
hwloc-info
|
||||
PATHS
|
||||
ENV HWLOC_ROOT
|
||||
PATH_SUFFIXES
|
||||
bin
|
||||
)
|
||||
|
||||
if(HWLOC_INFO_EXECUTABLE)
|
||||
execute_process(
|
||||
COMMAND ${HWLOC_INFO_EXECUTABLE} "--version"
|
||||
OUTPUT_VARIABLE HWLOC_VERSION_LINE
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||
)
|
||||
string(REGEX MATCH "([0-9]+.[0-9]+)$"
|
||||
Hwloc_VERSION "${HWLOC_VERSION_LINE}")
|
||||
unset(HWLOC_VERSION_LINE)
|
||||
endif()
|
||||
|
||||
#
|
||||
# All good
|
||||
#
|
||||
|
||||
set(Hwloc_LIBRARIES ${Hwloc_LIBRARY})
|
||||
set(Hwloc_INCLUDE_DIRS ${Hwloc_INCLUDE_DIR})
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(
|
||||
Hwloc
|
||||
FOUND_VAR Hwloc_FOUND
|
||||
REQUIRED_VARS Hwloc_LIBRARY Hwloc_INCLUDE_DIR Hwloc_VERSION_PARSED Hwloc_VERSION_MAJOR Hwloc_VERSION_MINOR
|
||||
VERSION_VAR Hwloc_VERSION)
|
||||
|
||||
mark_as_advanced(
|
||||
Hwloc_INCLUDE_DIR
|
||||
Hwloc_LIBRARY)
|
||||
|
||||
foreach(arg ${Hwloc_INCLUDE_DIRS})
|
||||
set(Hwloc_CFLAGS "${Hwloc_CFLAGS} /I${arg}")
|
||||
endforeach()
|
||||
|
||||
set(Hwloc_LDFLAGS "${Hwloc_LIBRARY}")
|
||||
|
||||
else()
|
||||
|
||||
if(CMAKE_CROSSCOMPILING)
|
||||
|
||||
find_path(Hwloc_INCLUDE_DIRS
|
||||
NAMES
|
||||
hwloc.h
|
||||
PATHS
|
||||
ENV HWLOC_ROOT
|
||||
)
|
||||
|
||||
find_library(Hwloc_LIBRARIES
|
||||
NAMES
|
||||
hwloc
|
||||
PATHS
|
||||
ENV HWLOC_ROOT
|
||||
)
|
||||
|
||||
if(Hwloc_INCLUDE_DIRS AND Hwloc_LIBRARIES)
|
||||
message(WARNING "HWLOC library found using find_library() - cannot determine version. Assuming 1.7.0")
|
||||
set(Hwloc_FOUND 1)
|
||||
set(Hwloc_VERSION "1.7.0")
|
||||
endif()
|
||||
|
||||
else() # Find with pkgconfig for non-crosscompile builds
|
||||
|
||||
find_package(PkgConfig)
|
||||
|
||||
if(HWLOC_ROOT)
|
||||
set(ENV{PKG_CONFIG_PATH} "${HWLOC_ROOT}/lib/pkgconfig")
|
||||
else()
|
||||
foreach(PREFIX ${CMAKE_PREFIX_PATH})
|
||||
set(PKG_CONFIG_PATH "${PKG_CONFIG_PATH}:${PREFIX}/lib/pkgconfig")
|
||||
endforeach()
|
||||
set(ENV{PKG_CONFIG_PATH} "${PKG_CONFIG_PATH}:$ENV{PKG_CONFIG_PATH}")
|
||||
endif()
|
||||
|
||||
if(hwloc_FIND_REQUIRED)
|
||||
set(_hwloc_OPTS "REQUIRED")
|
||||
elseif(hwloc_FIND_QUIETLY)
|
||||
set(_hwloc_OPTS "QUIET")
|
||||
else()
|
||||
set(_hwloc_output 1)
|
||||
endif()
|
||||
|
||||
if(hwloc_FIND_VERSION)
|
||||
if(hwloc_FIND_VERSION_EXACT)
|
||||
pkg_check_modules(Hwloc ${_hwloc_OPTS} hwloc=${hwloc_FIND_VERSION})
|
||||
else()
|
||||
pkg_check_modules(Hwloc ${_hwloc_OPTS} hwloc>=${hwloc_FIND_VERSION})
|
||||
endif()
|
||||
else()
|
||||
pkg_check_modules(Hwloc ${_hwloc_OPTS} hwloc)
|
||||
endif()
|
||||
|
||||
if(Hwloc_FOUND)
|
||||
string(REPLACE "." ";" Hwloc_VERSION_PARSED "${Hwloc_VERSION}")
|
||||
set(Hwloc_VERSION "${Hwloc_VERSION}" CACHE STRING "version of Hwloc as a list")
|
||||
list(GET Hwloc_VERSION_PARSED 0 Hwloc_VERSION_MAJOR)
|
||||
set(Hwloc_VERSION_MAJOR "${Hwloc_VERSION_MAJOR}" CACHE STRING "Major version of Hwloc")
|
||||
list(GET Hwloc_VERSION_PARSED 1 Hwloc_VERSION_MINOR)
|
||||
set(Hwloc_VERSION_MINOR "${Hwloc_VERSION_MINOR}" CACHE STRING "Minor version of Hwloc")
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(Hwloc DEFAULT_MSG Hwloc_LIBRARIES)
|
||||
|
||||
if(NOT ${Hwloc_VERSION} VERSION_LESS 1.7.0)
|
||||
set(Hwloc_GL_FOUND 1)
|
||||
endif()
|
||||
|
||||
if(_hwloc_output)
|
||||
message(STATUS
|
||||
"Found hwloc ${Hwloc_VERSION} in ${Hwloc_INCLUDE_DIRS}:${Hwloc_LIBRARIES}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
endif() # cross-compile else
|
||||
|
||||
endif()
|
142
Finddpdk.cmake
Normal file
142
Finddpdk.cmake
Normal file
@ -0,0 +1,142 @@
|
||||
# Try to find dpdk
|
||||
#
|
||||
# Once done, this will define
|
||||
#
|
||||
# dpdk::dpdk
|
||||
# dpdk_FOUND
|
||||
# dpdk_INCLUDE_DIR
|
||||
# dpdk_LIBRARIES
|
||||
|
||||
find_package(PkgConfig QUIET)
|
||||
if(PKG_CONFIG_FOUND)
|
||||
pkg_check_modules(dpdk QUIET libdpdk)
|
||||
endif()
|
||||
|
||||
if(dpdk_INCLUDE_DIRS)
|
||||
# good
|
||||
elseif(TARGET dpdk::dpdk)
|
||||
get_target_property(dpdk_INCLUDE_DIRS
|
||||
dpdk::dpdk INTERFACE_INCLUDE_DIRECTORIES)
|
||||
else()
|
||||
find_path(dpdk_config_INCLUDE_DIR rte_config.h
|
||||
HINTS
|
||||
ENV DPDK_DIR
|
||||
PATH_SUFFIXES
|
||||
dpdk
|
||||
include)
|
||||
find_path(dpdk_common_INCLUDE_DIR rte_common.h
|
||||
HINTS
|
||||
ENC DPDK_DIR
|
||||
PATH_SUFFIXES
|
||||
dpdk
|
||||
include)
|
||||
set(dpdk_INCLUDE_DIRS "${dpdk_config_INCLUDE_DIR}")
|
||||
if(NOT dpdk_config_INCLUDE_DIR EQUAL dpdk_common_INCLUDE_DIR)
|
||||
list(APPEND dpdk_INCLUDE_DIRS "${dpdk_common_INCLUDE_DIR}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set(components
|
||||
bus_pci
|
||||
bus_vdev
|
||||
cfgfile
|
||||
cmdline
|
||||
eal
|
||||
ethdev
|
||||
hash
|
||||
kvargs
|
||||
mbuf
|
||||
mempool
|
||||
mempool_ring
|
||||
mempool_stack
|
||||
net
|
||||
pci
|
||||
pmd_af_packet
|
||||
pmd_bnxt
|
||||
pmd_bond
|
||||
pmd_cxgbe
|
||||
pmd_e1000
|
||||
pmd_ena
|
||||
pmd_enic
|
||||
pmd_i40e
|
||||
pmd_ixgbe
|
||||
pmd_mlx5
|
||||
pmd_nfp
|
||||
pmd_qede
|
||||
pmd_ring
|
||||
pmd_sfc_efx
|
||||
pmd_vmxnet3_uio
|
||||
ring
|
||||
timer)
|
||||
|
||||
# for collecting dpdk library targets, it will be used when defining dpdk::dpdk
|
||||
set(_dpdk_libs)
|
||||
# for list of dpdk library archive paths
|
||||
set(dpdk_LIBRARIES)
|
||||
|
||||
foreach(c ${components})
|
||||
set(dpdk_lib dpdk::${c})
|
||||
if(TARGET ${dpdk_lib})
|
||||
get_target_property(DPDK_rte_${c}_LIBRARY
|
||||
${dpdk_lib} IMPORTED_LOCATION)
|
||||
else()
|
||||
find_library(DPDK_rte_${c}_LIBRARY rte_${c}
|
||||
HINTS
|
||||
ENV DPDK_DIR
|
||||
${dpdk_LIBRARY_DIRS}
|
||||
PATH_SUFFIXES lib)
|
||||
endif()
|
||||
if(DPDK_rte_${c}_LIBRARY)
|
||||
if (NOT TARGET ${dpdk_lib})
|
||||
add_library(${dpdk_lib} UNKNOWN IMPORTED)
|
||||
set_target_properties(${dpdk_lib} PROPERTIES
|
||||
INTERFACE_INCLUDE_DIRECTORIES "${dpdk_INCLUDE_DIRS}"
|
||||
IMPORTED_LOCATION "${DPDK_rte_${c}_LIBRARY}")
|
||||
if(c STREQUAL pmd_mlx5)
|
||||
find_package(verbs QUIET)
|
||||
if(verbs_FOUND)
|
||||
target_link_libraries(${dpdk_lib} INTERFACE IBVerbs::verbs)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
list(APPEND _dpdk_libs ${dpdk_lib})
|
||||
list(APPEND dpdk_LIBRARIES ${DPDK_rte_${c}_LIBRARY})
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
mark_as_advanced(dpdk_INCLUDE_DIRS ${dpdk_LIBRARIES})
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(dpdk DEFAULT_MSG
|
||||
dpdk_INCLUDE_DIRS
|
||||
dpdk_LIBRARIES)
|
||||
|
||||
if(dpdk_FOUND)
|
||||
if(NOT TARGET dpdk::cflags)
|
||||
if(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64|x86_64|AMD64")
|
||||
set(rte_cflags "-march=core2")
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm|ARM")
|
||||
set(rte_cflags "-march=armv7-a")
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64")
|
||||
set(rte_cflags "-march=armv8-a+crc")
|
||||
endif()
|
||||
add_library(dpdk::cflags INTERFACE IMPORTED)
|
||||
if (rte_cflags)
|
||||
set_target_properties(dpdk::cflags PROPERTIES
|
||||
INTERFACE_COMPILE_OPTIONS "${rte_cflags}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(NOT TARGET dpdk::dpdk)
|
||||
add_library(dpdk::dpdk INTERFACE IMPORTED)
|
||||
find_package(Threads QUIET)
|
||||
list(APPEND _dpdk_libs
|
||||
Threads::Threads
|
||||
dpdk::cflags)
|
||||
set_target_properties(dpdk::dpdk PROPERTIES
|
||||
INTERFACE_LINK_LIBRARIES "${_dpdk_libs}"
|
||||
INTERFACE_INCLUDE_DIRECTORIES "${dpdk_INCLUDE_DIRS}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
unset(_dpdk_libs)
|
621
cat/cat.cc
Normal file
621
cat/cat.cc
Normal file
@ -0,0 +1,621 @@
|
||||
#include <cstdio>
|
||||
#include <ctime>
|
||||
#include <netinet/in.h>
|
||||
#include <rte_config.h>
|
||||
#include <rte_common.h>
|
||||
#include <rte_eal.h>
|
||||
#include <rte_ethdev.h>
|
||||
#include <rte_cycles.h>
|
||||
#include <rte_lcore.h>
|
||||
#include <rte_mbuf.h>
|
||||
#include <rte_ether.h>
|
||||
#include <rte_launch.h>
|
||||
#include <rte_log.h>
|
||||
#include <rte_byteorder.h>
|
||||
#include <rte_ip.h>
|
||||
#include <atomic>
|
||||
#include <vector>
|
||||
#include <fstream>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "nm.h"
|
||||
#include "gen.h"
|
||||
#include "ntr.h"
|
||||
#include "pkt.h"
|
||||
#include "util.h"
|
||||
|
||||
constexpr static unsigned int MBUF_MAX_COUNT = 16384;
|
||||
constexpr static unsigned int MBUF_CACHE_SIZE = 512;
|
||||
constexpr static unsigned int RX_RING_SIZE = 4096;
|
||||
constexpr static unsigned int TX_RING_SIZE = 4096;
|
||||
constexpr static unsigned int BURST_SIZE = 32;
|
||||
|
||||
static const struct rte_eth_conf port_conf_default{};
|
||||
|
||||
struct datapt {
|
||||
uint32_t epoch;
|
||||
uint32_t valid;
|
||||
uint64_t clt_hw_tx;
|
||||
uint64_t clt_sw_tx;
|
||||
uint64_t clt_hw_rx;
|
||||
uint64_t clt_sw_rx;
|
||||
uint64_t srv_hw_tx;
|
||||
uint64_t srv_sw_tx;
|
||||
uint64_t srv_hw_rx;
|
||||
uint64_t srv_sw_rx;
|
||||
};
|
||||
|
||||
struct options_t {
|
||||
// parameters
|
||||
unsigned int run_time{5};
|
||||
unsigned int warmup_time{3};
|
||||
char output[256] = "output.txt";
|
||||
char ia_gen_str[256] = "fixed:0.01";
|
||||
struct rte_ether_addr server_mac;
|
||||
uint64_t cpu_mask{0x2}; // 2nd core
|
||||
std::vector<struct rte_ether_addr *> slaves;
|
||||
unsigned long rage_quit_time = (unsigned long)-1;
|
||||
unsigned long last_sent_ts = 0;
|
||||
|
||||
// states
|
||||
struct rte_mempool * mbuf_pool;
|
||||
struct rte_ether_addr s_host_mac;
|
||||
uint16_t s_portid;
|
||||
unsigned int s_rxqid;
|
||||
unsigned int s_txqid;
|
||||
unsigned int s_total_pkts{0};
|
||||
Generator * s_iagen{nullptr};
|
||||
std::vector<struct datapt *> s_data;
|
||||
struct datapt * s_last_datapt{nullptr};
|
||||
uint32_t s_epoch;
|
||||
std::atomic<bool> s_stop {false};
|
||||
std::atomic<uint32_t> s_record {0};
|
||||
};
|
||||
|
||||
static struct options_t options;
|
||||
|
||||
static uint16_t
|
||||
rx_add_timestamp(uint16_t port __rte_unused, uint16_t qidx __rte_unused,
|
||||
struct rte_mbuf **pkts, uint16_t nb_pkts, uint16_t max_pkts __rte_unused, void *_ __rte_unused)
|
||||
{
|
||||
uint64_t now = rte_rdtsc();
|
||||
struct pkt_hdr * pkt_data;
|
||||
struct timespec ts;
|
||||
int ret;
|
||||
|
||||
for (int i = 0; i < nb_pkts; i++) {
|
||||
pkt_data = check_valid_packet(pkts[i]);
|
||||
|
||||
if (pkt_data == NULL) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "rx_add_timestamp: ignoring invalid packet 0x%p.\n", (void*)pkts[i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (rte_be_to_cpu_16(pkt_data->type) == PKT_TYPE_PROBE_RESP) {
|
||||
uint32_t epoch = rte_be_to_cpu_32(((struct pkt_payload_epoch *)pkt_data->payload)->epoch);
|
||||
if (options.s_last_datapt != nullptr && options.s_last_datapt->epoch == epoch) {
|
||||
if ((ret = rte_eth_timesync_read_rx_timestamp(port, &ts, pkts[i]->timesync & 0x3)) == 0) {
|
||||
// has hw rx timestamp
|
||||
options.s_last_datapt->clt_hw_rx = ts.tv_sec * S2NS + ts.tv_nsec;
|
||||
options.s_last_datapt->clt_sw_rx = now;
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "rx_add_timestamp: tagged packet %p with sw: %llu hw: %llu.\n", (void*)pkts[i], now, options.s_last_datapt->clt_hw_rx);
|
||||
} else {
|
||||
rte_exit(EXIT_FAILURE, "rx_add_timestamp: packet %p not tagged - hw ts not available - %d.\n", (void*)pkts[i], ret);
|
||||
}
|
||||
} else {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING, "rx_add_timestamp: packet %p epoch %d != last epoch %d.\n", (void*)pkts[i], epoch, options.s_last_datapt->epoch);
|
||||
}
|
||||
} else {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "rx_add_timestamp: packet %p not tagged - type %d.\n", (void*)pkts[i], rte_be_to_cpu_16(pkt_data->type));
|
||||
}
|
||||
}
|
||||
|
||||
return nb_pkts;
|
||||
}
|
||||
|
||||
static uint16_t
|
||||
tx_add_timestamp(uint16_t port __rte_unused, uint16_t qidx __rte_unused,
|
||||
struct rte_mbuf **pkts, uint16_t nb_pkts, void *_ __rte_unused)
|
||||
{
|
||||
uint64_t now = rte_rdtsc();
|
||||
struct pkt_hdr * pkt_data;
|
||||
|
||||
for (int i = 0; i < nb_pkts; i++) {
|
||||
pkt_data = check_valid_packet(pkts[i]);
|
||||
|
||||
if (pkt_data == NULL) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "tx_add_timestamp: ignoring invalid packet 0x%p.\n", (void*)pkts[i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (rte_be_to_cpu_16(pkt_data->type) == PKT_TYPE_PROBE) {
|
||||
uint32_t epoch = rte_be_to_cpu_32(((struct pkt_payload_epoch *)pkt_data->payload)->epoch);
|
||||
|
||||
if (options.s_last_datapt == nullptr || epoch != options.s_last_datapt->epoch) {
|
||||
rte_exit(EXIT_FAILURE, "tx_add_timestamp: packet epoch %d != last epoch %d\n", epoch, options.s_last_datapt->epoch);
|
||||
}
|
||||
|
||||
options.s_last_datapt->clt_sw_tx = now;
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "tx_add_timestamp: tagged packet %p with sw: %llu.\n", (void*)pkts[i], now);
|
||||
} else {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "tx_add_timestamp: packet %p not tagged - type %d.\n", (void*)pkts[i], pkt_data->type);
|
||||
}
|
||||
}
|
||||
|
||||
return nb_pkts;
|
||||
}
|
||||
|
||||
static int
|
||||
locore_main(void * tif __rte_unused)
|
||||
{
|
||||
struct rte_mbuf *tx_buf;
|
||||
struct rte_mbuf *rx_bufs[BURST_SIZE];
|
||||
struct pkt_hdr *pkt_data;
|
||||
uint32_t core_id = rte_lcore_id();
|
||||
int32_t ret;
|
||||
|
||||
bool read_tx = true;
|
||||
bool recv_stat = true;
|
||||
bool recv_resp = true;
|
||||
|
||||
uint64_t next_ts;
|
||||
// XXX: check link status instead
|
||||
|
||||
sleep(1);
|
||||
if (rte_eth_dev_socket_id(options.s_portid) > 0 && rte_eth_dev_socket_id(options.s_portid) != (int)rte_socket_id()) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING, "locore_main: WARNING, port %d is on remote NUMA node to "
|
||||
"polling thread.\n\tPerformance will "
|
||||
"not be optimal.\n", options.s_portid);
|
||||
}
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "locore_main: core %d running...\n", core_id);
|
||||
|
||||
next_ts = get_time_us();
|
||||
|
||||
while(!options.s_stop.load()) {
|
||||
uint64_t now = get_time_us();
|
||||
// always pop incoming packets
|
||||
const uint16_t nb_rx = rte_eth_rx_burst(options.s_portid, 0, rx_bufs, BURST_SIZE);
|
||||
|
||||
if (nb_rx > 0) {
|
||||
for (int i = 0; i < nb_rx; i++) {
|
||||
struct pkt_hdr * each = check_valid_packet(rx_bufs[i]);
|
||||
|
||||
if (each == NULL) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "locore_main: ignoring invalid packet %p.\n", (void*)rx_bufs[i]);
|
||||
rte_pktmbuf_free(rx_bufs[i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
uint16_t type = rte_be_to_cpu_16(each->type);
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "locore_main: received packet %p type %d.\n", (void*)rx_bufs[i], type);
|
||||
switch (type) {
|
||||
struct pkt_payload_epoch * pld_epoch;
|
||||
struct pkt_payload_stat * pld_stat;
|
||||
uint32_t epoch;
|
||||
|
||||
case PKT_TYPE_PROBE_RESP:
|
||||
pld_epoch = (struct pkt_payload_epoch *)each->payload;
|
||||
epoch = rte_be_to_cpu_32(pld_epoch->epoch);
|
||||
|
||||
if (options.s_last_datapt == nullptr || epoch != options.s_last_datapt->epoch) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING, "locore_main: packet %p epoch %d doesn't match datapt %d.\n", (void*)rx_bufs[i], epoch, options.s_last_datapt->epoch);
|
||||
break;
|
||||
}
|
||||
|
||||
options.s_total_pkts++;
|
||||
|
||||
recv_resp = true;
|
||||
break;
|
||||
case PKT_TYPE_STAT:
|
||||
pld_stat = (struct pkt_payload_stat *)each->payload;
|
||||
epoch = rte_be_to_cpu_32(pld_stat->epoch);
|
||||
|
||||
if (options.s_last_datapt == nullptr || epoch != options.s_last_datapt->epoch) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING, "locore_main: packet %p epoch %d doesn't match datapt %d.\n", (void*)rx_bufs[i], epoch, options.s_last_datapt->epoch);
|
||||
break;
|
||||
}
|
||||
|
||||
options.s_last_datapt->srv_hw_tx = rte_be_to_cpu_64(pld_stat->hw_tx);
|
||||
options.s_last_datapt->srv_hw_rx = rte_be_to_cpu_64(pld_stat->hw_rx);
|
||||
options.s_last_datapt->srv_sw_tx = rte_be_to_cpu_64(pld_stat->sw_tx);
|
||||
options.s_last_datapt->srv_sw_rx = rte_be_to_cpu_64(pld_stat->sw_rx);
|
||||
|
||||
recv_stat = true;
|
||||
break;
|
||||
default:
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING, "locore_main: ignoring packet %p with unknown type %d.\n", (void*)rx_bufs[i], type);
|
||||
rte_pktmbuf_free(rx_bufs[i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
rte_pktmbuf_free(rx_bufs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
if (read_tx && recv_stat & recv_resp) {
|
||||
// if we have all the data
|
||||
|
||||
if (options.s_last_datapt != nullptr) {
|
||||
// push the data to the queue if we haven't done so already
|
||||
options.s_data.push_back(options.s_last_datapt);
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "locore_main: datapt for epoch %d dump:\n" \
|
||||
" Valid: %d\n"
|
||||
" client TX HW: %llu\n" \
|
||||
" client TX SW: %llu\n" \
|
||||
" client RX HW: %llu\n" \
|
||||
" client RX SW: %llu\n" \
|
||||
" server TX HW: %llu\n" \
|
||||
" server TX SW: %llu\n" \
|
||||
" server RX HW: %llu\n" \
|
||||
" server RX SW: %llu\n\n",
|
||||
options.s_last_datapt->epoch,
|
||||
options.s_last_datapt->valid,
|
||||
options.s_last_datapt->clt_hw_tx,
|
||||
options.s_last_datapt->clt_sw_tx,
|
||||
options.s_last_datapt->clt_hw_rx,
|
||||
options.s_last_datapt->clt_sw_rx,
|
||||
options.s_last_datapt->srv_hw_tx,
|
||||
options.s_last_datapt->srv_sw_tx,
|
||||
options.s_last_datapt->srv_hw_rx,
|
||||
options.s_last_datapt->srv_sw_rx);
|
||||
options.s_last_datapt = nullptr;
|
||||
}
|
||||
|
||||
if (now >= next_ts) {
|
||||
struct pkt_payload_epoch * pld_epoch;
|
||||
uint32_t epoch;
|
||||
|
||||
next_ts += (int)(options.s_iagen->generate() * 1000000.0);
|
||||
|
||||
// generate the packet
|
||||
tx_buf = rte_pktmbuf_alloc(options.mbuf_pool);
|
||||
|
||||
if (tx_buf == NULL) {
|
||||
rte_exit(EXIT_FAILURE, "cannot allocate tx_buf\n");
|
||||
}
|
||||
|
||||
pkt_data = construct_pkt_hdr(tx_buf, PKT_TYPE_PROBE,
|
||||
&options.s_host_mac, &options.server_mac);
|
||||
if (pkt_data == NULL) {
|
||||
rte_exit(EXIT_FAILURE, "cannot allocate space for packet_data in mbuf\n");
|
||||
}
|
||||
|
||||
epoch = options.s_epoch;
|
||||
options.s_epoch++;
|
||||
pld_epoch = (struct pkt_payload_epoch *)pkt_data->payload;
|
||||
pld_epoch->epoch = rte_cpu_to_be_32(epoch);
|
||||
options.s_last_datapt = new struct datapt;
|
||||
options.s_last_datapt->epoch = epoch;
|
||||
options.s_last_datapt->valid = options.s_record.load();
|
||||
|
||||
read_tx = false;
|
||||
recv_resp = false;
|
||||
recv_stat = false;
|
||||
options.last_sent_ts = get_time_us();
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "locore_main: sending packet %p with epoch %d\n", (void*)tx_buf, epoch);
|
||||
const uint16_t nb_tx = rte_eth_tx_burst(options.s_portid, options.s_txqid, &tx_buf, 1);
|
||||
|
||||
if (nb_tx != 1) {
|
||||
rte_exit(EXIT_FAILURE, "failed to send packet 0x%p, epoch %d\n", (void*)tx_buf, epoch);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!recv_stat) {
|
||||
// if we haven't recevied the stats get ready to rage quit
|
||||
if(get_time_us() - options.last_sent_ts > options.rage_quit_time * 1000) {
|
||||
rte_exit(EXIT_FAILURE, "waiting too long for resp. I QUIT!!\n");
|
||||
}
|
||||
}
|
||||
|
||||
if (!read_tx) {
|
||||
struct timespec ts;
|
||||
if ((ret = rte_eth_timesync_read_tx_timestamp(options.s_portid, &ts)) == 0) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "locore_main: read hw tx timestamp %lld.\n", ts.tv_nsec + ts.tv_sec * S2NS);
|
||||
options.s_last_datapt->clt_hw_tx = ts.tv_nsec + ts.tv_sec * S2NS;
|
||||
read_tx = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
rte_pktmbuf_free(tx_buf);
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "locore_main: core %d successfully stopped.\n", core_id);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
port_init(uint16_t portid, struct rte_mempool *mbuf_pool)
|
||||
{
|
||||
struct rte_eth_dev_info dev_info;
|
||||
struct rte_eth_conf port_conf = port_conf_default;
|
||||
struct rte_eth_txconf txconf;
|
||||
struct rte_eth_rxconf rxconf;
|
||||
|
||||
uint16_t nb_rxd = RX_RING_SIZE;
|
||||
uint16_t nb_txd = TX_RING_SIZE;
|
||||
port_conf.rxmode.max_rx_pkt_len = RTE_ETHER_MAX_LEN;
|
||||
|
||||
if(!rte_eth_dev_is_valid_port(portid)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
int ret = rte_eth_dev_info_get(portid, &dev_info);
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
port_conf.rxmode.max_rx_pkt_len = RTE_ETHER_MAX_LEN;
|
||||
port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_UDP_CKSUM;
|
||||
port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_IPV4_CKSUM;
|
||||
port_conf.txmode.offloads |= DEV_TX_OFFLOAD_UDP_CKSUM;
|
||||
port_conf.txmode.offloads |= DEV_TX_OFFLOAD_IPV4_CKSUM;
|
||||
port_conf.txmode.offloads |= DEV_TX_OFFLOAD_MBUF_FAST_FREE;
|
||||
|
||||
/* Configure the Ethernet device. */
|
||||
ret = rte_eth_dev_configure(portid, 1, 1, &port_conf);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
||||
ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd, &nb_txd);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
||||
/* Allocate and set up 1 RX queue per thread . */
|
||||
rxconf = dev_info.default_rxconf;
|
||||
rxconf.offloads = port_conf.rxmode.offloads;
|
||||
for (uint32_t i = 0; i < 1; i++) {
|
||||
ret = rte_eth_rx_queue_setup(portid, i, nb_rxd, rte_eth_dev_socket_id(portid), &rxconf, mbuf_pool);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
|
||||
txconf = dev_info.default_txconf;
|
||||
txconf.offloads = port_conf.txmode.offloads;
|
||||
/* Allocate and set up 1 TX queue per Ethernet port. */
|
||||
for (uint32_t i = 0; i < 1; i++) {
|
||||
ret = rte_eth_tx_queue_setup(portid, i, nb_txd, rte_eth_dev_socket_id(portid), &txconf);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = rte_eth_dev_start(portid);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
/* Display the port MAC address. */
|
||||
struct rte_ether_addr addr;
|
||||
ret = rte_eth_macaddr_get(portid, &addr);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
||||
ret = rte_eth_timesync_enable(portid);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
||||
/* Enable RX in promiscuous mode for the Ethernet device. */
|
||||
ret = rte_eth_promiscuous_enable(portid);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
||||
rte_eth_add_tx_callback(portid, 0, tx_add_timestamp, NULL);
|
||||
rte_eth_add_rx_callback(portid, 0, rx_add_timestamp, NULL);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void dump_options()
|
||||
{
|
||||
fprintf(stdout, "Configuration:\n" \
|
||||
" run time = %d\n" \
|
||||
" warmup time = %d\n" \
|
||||
" output file = %s\n" \
|
||||
" rage quit time = %ld\n"\
|
||||
" host MAC = %x:%x:%x:%x:%x:%x\n",
|
||||
options.run_time,
|
||||
options.warmup_time,
|
||||
options.output,
|
||||
options.rage_quit_time,
|
||||
options.server_mac.addr_bytes[0],
|
||||
options.server_mac.addr_bytes[1],
|
||||
options.server_mac.addr_bytes[2],
|
||||
options.server_mac.addr_bytes[3],
|
||||
options.server_mac.addr_bytes[4],
|
||||
options.server_mac.addr_bytes[5]);
|
||||
}
|
||||
|
||||
static void usage()
|
||||
{
|
||||
fprintf(stdout,
|
||||
"Usage:\n " \
|
||||
" -v(vv): verbose mode\n" \
|
||||
" -s: server's mac\n" \
|
||||
" -S: slave(rat)'s mac\n" \
|
||||
" -t: run time\n" \
|
||||
" -T: warmup time\n" \
|
||||
" -h: display the information\n" \
|
||||
" -o: output filename\n" \
|
||||
" -A: affinity mask\n" \
|
||||
" -i: inter-arrival time distribution\n" \
|
||||
" -r: rage quit time (in ms)\n");
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
unsigned int nb_ports;
|
||||
struct rte_mempool *mbuf_pool;
|
||||
std::ofstream log_file;
|
||||
|
||||
ntr_init();
|
||||
if (nm_init() != 0)
|
||||
rte_exit(EXIT_FAILURE, "failed to init libnm\n");
|
||||
|
||||
// create default generator
|
||||
options.s_iagen = createGenerator(options.ia_gen_str);
|
||||
|
||||
// init dpdk
|
||||
int ret = rte_eal_init(argc, argv);
|
||||
if (ret < 0) {
|
||||
rte_exit(EXIT_FAILURE, "rte_eal_init failed!\n");
|
||||
}
|
||||
|
||||
argc -= ret;
|
||||
argv += ret;
|
||||
|
||||
// set warning level
|
||||
ntr_set_level(NTR_DEP_USER1, NTR_LEVEL_WARNING);
|
||||
{
|
||||
int c;
|
||||
// parse arguments
|
||||
while((c = getopt(argc, argv, "vs:S:t:T:ho:A:i:r:")) != -1) {
|
||||
switch (c) {
|
||||
struct rte_ether_addr * addr;
|
||||
case 'v':
|
||||
ntr_set_level(NTR_DEP_USER1, ntr_get_level(NTR_DEP_USER1) + 1);
|
||||
break;
|
||||
case 's':
|
||||
if (rte_ether_unformat_addr(optarg, &options.server_mac) == -1) {
|
||||
rte_exit(EXIT_FAILURE, "cannot parse %s as mac address.\n", optarg);
|
||||
}
|
||||
break;
|
||||
case 'S':
|
||||
addr = new struct rte_ether_addr;
|
||||
if (rte_ether_unformat_addr(optarg, addr) == -1) {
|
||||
rte_exit(EXIT_FAILURE, "cannot parse %s as mac address.\n", optarg);
|
||||
}
|
||||
options.slaves.push_back(addr);
|
||||
break;
|
||||
case 't':
|
||||
options.run_time = atoi(optarg);
|
||||
break;
|
||||
case 'T':
|
||||
options.warmup_time = atoi(optarg);
|
||||
break;
|
||||
case 'h':
|
||||
usage();
|
||||
rte_exit(EXIT_SUCCESS, "\n");
|
||||
case 'o':
|
||||
strncpy(options.output, optarg, sizeof(options.output) - 1);
|
||||
break;
|
||||
case 'A':
|
||||
options.cpu_mask = strtoull(optarg, nullptr, 16);
|
||||
break;
|
||||
case 'i':
|
||||
strncpy(options.ia_gen_str, optarg, sizeof(options.ia_gen_str) - 1);
|
||||
if (options.s_iagen != nullptr) {
|
||||
delete options.s_iagen;
|
||||
}
|
||||
options.s_iagen = createGenerator(options.ia_gen_str);
|
||||
if (options.s_iagen == nullptr) {
|
||||
rte_exit(EXIT_FAILURE, "invalid generator string %s\n", options.ia_gen_str);
|
||||
}
|
||||
break;
|
||||
case 'r':
|
||||
options.rage_quit_time = atoi(optarg);
|
||||
break;
|
||||
default:
|
||||
usage();
|
||||
rte_exit(EXIT_FAILURE, "unknown argument: %c\n", c);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// open log file for writing
|
||||
log_file.open(options.output, std::ofstream::out);
|
||||
if (!log_file) {
|
||||
rte_exit(EXIT_FAILURE, "failed to open log file %s\n", options.output);
|
||||
}
|
||||
|
||||
nb_ports = rte_eth_dev_count_avail();
|
||||
if (nb_ports == 0) {
|
||||
rte_exit(EXIT_FAILURE, "number of ports must be > 0\n");
|
||||
}
|
||||
|
||||
uint16_t portid = rte_eth_find_next(0);
|
||||
if (portid == RTE_MAX_ETHPORTS) {
|
||||
rte_exit(EXIT_FAILURE, "cannot find an available port\n");
|
||||
}
|
||||
options.s_portid = portid;
|
||||
|
||||
// create a mbuf memory pool on the socket
|
||||
mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", MBUF_MAX_COUNT, MBUF_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_eth_dev_socket_id(options.s_portid));
|
||||
if (mbuf_pool == nullptr) {
|
||||
rte_exit(EXIT_FAILURE, "cannot create mbuf pool\n");
|
||||
}
|
||||
options.mbuf_pool = mbuf_pool;
|
||||
|
||||
if (port_init(portid, mbuf_pool) != 0) {
|
||||
rte_exit(EXIT_FAILURE, "cannot init port %d\n", portid);
|
||||
}
|
||||
|
||||
if (rte_eth_macaddr_get(portid, &options.s_host_mac) != 0) {
|
||||
rte_exit(EXIT_FAILURE, "cannot get mac address of port %d\n", portid);
|
||||
}
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "Configured port %d with mac addr %x:%x:%x:%x:%x:%x\n", portid,
|
||||
options.s_host_mac.addr_bytes[0],
|
||||
options.s_host_mac.addr_bytes[1],
|
||||
options.s_host_mac.addr_bytes[2],
|
||||
options.s_host_mac.addr_bytes[3],
|
||||
options.s_host_mac.addr_bytes[4],
|
||||
options.s_host_mac.addr_bytes[5]);
|
||||
|
||||
dump_options();
|
||||
|
||||
sleep(1);
|
||||
|
||||
uint64_t cmask = options.cpu_mask;
|
||||
const int16_t core_id = cmask_get_next_cpu(&cmask);
|
||||
if (core_id == NEXT_CPU_NULL) {
|
||||
rte_exit(EXIT_FAILURE, "invalid cpu mask 0x%lx\n", cmask);
|
||||
}
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: launching thread on core %d\n", core_id);
|
||||
if (rte_eal_remote_launch(locore_main, nullptr, core_id) != 0) {
|
||||
rte_exit(EXIT_FAILURE, "failed to launch function on locore\n");
|
||||
}
|
||||
|
||||
// XXX: poor man's timer
|
||||
uint32_t second = 0;
|
||||
while(true) {
|
||||
if (second >= options.warmup_time) {
|
||||
options.s_record.store(1);
|
||||
}
|
||||
if (second >= options.run_time + options.warmup_time) {
|
||||
options.s_stop.store(true);
|
||||
break;
|
||||
}
|
||||
usleep(S2US);
|
||||
second++;
|
||||
}
|
||||
|
||||
if (rte_eal_wait_lcore(core_id) < 0)
|
||||
rte_exit(EXIT_FAILURE, "failed to wait for job completion\n");
|
||||
|
||||
|
||||
uint32_t qps = 0;
|
||||
// dump stats
|
||||
for (auto it : options.s_data) {
|
||||
if (it->valid) {
|
||||
qps++;
|
||||
log_file << it->clt_sw_rx << ',' << it->clt_sw_tx << ','
|
||||
<< it->clt_hw_rx << ',' << it->clt_hw_tx << ','
|
||||
<< it->srv_sw_rx << ',' << it->srv_sw_tx << ','
|
||||
<< it->srv_hw_rx << ',' << it->srv_hw_tx << std::endl;
|
||||
}
|
||||
}
|
||||
log_file.close();
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "Processed %d packets in %d seconds, QPS: %d\n", qps, options.run_time, qps);
|
||||
|
||||
// clean up
|
||||
rte_eth_dev_stop(portid);
|
||||
rte_eth_dev_close(portid);
|
||||
|
||||
return 0;
|
||||
}
|
13
compile_flags.txt
Normal file
13
compile_flags.txt
Normal file
@ -0,0 +1,13 @@
|
||||
-xc++
|
||||
-O2
|
||||
-std=c++11
|
||||
-Wall
|
||||
-Wextra
|
||||
-Werror
|
||||
-I/usr/include/dpdk
|
||||
-Iinc
|
||||
-Wno-deprecated-declarations
|
||||
-Wno-packed-not-aligned
|
||||
-Wno-address-of-packed-member
|
||||
-Wno-zero-length-array
|
||||
-Wno-gnu-zero-variadic-macro-arguments
|
61
inc/defs.hh
61
inc/defs.hh
@ -1,61 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <ctime>
|
||||
#include <cstdio>
|
||||
#include <sys/types.h>
|
||||
#include <sys/cpuset.h>
|
||||
|
||||
#define DISALLOW_EVIL_CONSTRUCTORS(TypeName) \
|
||||
TypeName(const TypeName &) = delete; \
|
||||
void operator=(const TypeName &) = delete
|
||||
|
||||
#define UNUSED __attribute__((unused))
|
||||
|
||||
constexpr static unsigned long S2NS = 1000000000UL;
|
||||
constexpr static unsigned long S2US = 1000000UL;
|
||||
constexpr static unsigned long MS2NS = 1000000UL;
|
||||
|
||||
constexpr static int NEXT_CPU_NULL = -1;
|
||||
|
||||
|
||||
#if defined(__x86_64__)
|
||||
static inline int
|
||||
cmask_get_next_cpu(uint64_t *mask)
|
||||
{
|
||||
int ffs = ffsll(*mask);
|
||||
*mask &= ~(1ul << (ffs - 1));
|
||||
return ffs - 1;
|
||||
}
|
||||
|
||||
static inline int
|
||||
cmask_get_num_cpus(const uint64_t mask)
|
||||
{
|
||||
return __builtin_popcount(mask);
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline uint64_t
|
||||
get_uptime()
|
||||
{
|
||||
struct timespec tp;
|
||||
clock_gettime(CLOCK_MONOTONIC, &tp);
|
||||
return (tp.tv_sec * S2NS + tp.tv_nsec);
|
||||
}
|
||||
|
||||
static inline void
|
||||
cpulist_to_cpuset(char * cpulist, cpuset_t * cpuset)
|
||||
{
|
||||
char * cpu = strtok(cpulist, ",");
|
||||
CPU_ZERO(cpuset);
|
||||
|
||||
while (cpu != nullptr) {
|
||||
CPU_SET(atoi(cpu), cpuset);
|
||||
cpu = strtok(nullptr, ",");
|
||||
}
|
||||
}
|
||||
|
||||
#define ATTR_UNUSED __attribute__((unused))
|
||||
|
234
inc/gen.h
Normal file
234
inc/gen.h
Normal file
@ -0,0 +1,234 @@
|
||||
// modified from mutilate
|
||||
// -*- c++ -*-
|
||||
|
||||
// 1. implement "fixed" generator
|
||||
// 2. implement discrete generator
|
||||
// 3. implement combine generator?
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <netinet/in.h>
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
|
||||
#include <assert.h>
|
||||
#include <inttypes.h>
|
||||
#include <limits.h>
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/param.h>
|
||||
|
||||
#include "util.h"
|
||||
|
||||
#define D(fmt, ...)
|
||||
#define DIE(fmt, ...) (void)0;
|
||||
|
||||
#define FNV_64_PRIME (0x100000001b3ULL)
|
||||
#define FNV1_64_INIT (0xcbf29ce484222325ULL)
|
||||
static inline uint64_t fnv_64_buf(const void* buf, size_t len) {
|
||||
uint64_t hval = FNV1_64_INIT;
|
||||
|
||||
unsigned char *bp = (unsigned char *)buf; /* start of buffer */
|
||||
unsigned char *be = bp + len; /* beyond end of buffer */
|
||||
|
||||
while (bp < be) {
|
||||
hval ^= (uint64_t)*bp++;
|
||||
hval *= FNV_64_PRIME;
|
||||
}
|
||||
|
||||
return hval;
|
||||
}
|
||||
|
||||
static inline uint64_t fnv_64(uint64_t in) { return fnv_64_buf(&in, sizeof(in)); }
|
||||
|
||||
|
||||
// Generator syntax:
|
||||
//
|
||||
// \d+ == fixed
|
||||
// n[ormal]:mean,sd
|
||||
// e[xponential]:lambda
|
||||
// p[areto]:scale,shape
|
||||
// g[ev]:loc,scale,shape
|
||||
// fb_value, fb_key, fb_rate
|
||||
|
||||
class Generator {
|
||||
public:
|
||||
Generator() {}
|
||||
// Generator(const Generator &g) = delete;
|
||||
// virtual Generator& operator=(const Generator &g) = delete;
|
||||
virtual ~Generator() {}
|
||||
|
||||
virtual double generate(double U = -1.0) = 0;
|
||||
virtual void set_lambda(double) {DIE("set_lambda() not implemented");}
|
||||
protected:
|
||||
std::string type;
|
||||
};
|
||||
|
||||
class Fixed : public Generator {
|
||||
public:
|
||||
Fixed(double _value = 1.0) : value(_value) { D("Fixed(%f)", value); }
|
||||
virtual double generate(double) { return value; }
|
||||
virtual void set_lambda(double lambda) {
|
||||
if (lambda > 0.0) value = 1.0 / lambda;
|
||||
else value = 0.0;
|
||||
}
|
||||
|
||||
private:
|
||||
double value;
|
||||
};
|
||||
|
||||
class Uniform : public Generator {
|
||||
public:
|
||||
Uniform(double _scale) : scale(_scale) { D("Uniform(%f)", scale); }
|
||||
|
||||
virtual double generate(double U = -1.0) {
|
||||
if (U < 0.0) U = drand48();
|
||||
return scale * U;
|
||||
}
|
||||
|
||||
virtual void set_lambda(double lambda) {
|
||||
if (lambda > 0.0) scale = 2.0 / lambda;
|
||||
else scale = 0.0;
|
||||
}
|
||||
|
||||
private:
|
||||
double scale;
|
||||
};
|
||||
|
||||
class Normal : public Generator {
|
||||
public:
|
||||
Normal(double _mean = 1.0, double _sd = 1.0) : mean(_mean), sd(_sd) {
|
||||
D("Normal(mean=%f, sd=%f)", mean, sd);
|
||||
}
|
||||
|
||||
virtual double generate(double U = -1.0) {
|
||||
if (U < 0.0) U = drand48();
|
||||
double V = U; // drand48();
|
||||
double N = sqrt(-2 * log(U)) * cos(2 * M_PI * V);
|
||||
return mean + sd * N;
|
||||
}
|
||||
|
||||
virtual void set_lambda(double lambda) {
|
||||
if (lambda > 0.0) mean = 1.0 / lambda;
|
||||
else mean = 0.0;
|
||||
}
|
||||
|
||||
private:
|
||||
double mean, sd;
|
||||
};
|
||||
|
||||
class Exponential : public Generator {
|
||||
public:
|
||||
Exponential(double _lambda = 1.0) : lambda(_lambda) {
|
||||
D("Exponential(lambda=%f)", lambda);
|
||||
}
|
||||
|
||||
virtual double generate(double U = -1.0) {
|
||||
if (lambda <= 0.0) return 0.0;
|
||||
if (U < 0.0) U = drand48();
|
||||
return -log(U) / lambda;
|
||||
}
|
||||
|
||||
virtual void set_lambda(double lambda) { this->lambda = lambda; }
|
||||
|
||||
private:
|
||||
double lambda;
|
||||
};
|
||||
|
||||
class GPareto : public Generator {
|
||||
public:
|
||||
GPareto(double _loc = 0.0, double _scale = 1.0, double _shape = 1.0) :
|
||||
loc(_loc), scale(_scale), shape(_shape) {
|
||||
assert(shape != 0.0);
|
||||
D("GPareto(loc=%f, scale=%f, shape=%f)", loc, scale, shape);
|
||||
}
|
||||
|
||||
virtual double generate(double U = -1.0) {
|
||||
if (U < 0.0) U = drand48();
|
||||
return loc + scale * (pow(U, -shape) - 1) / shape;
|
||||
}
|
||||
|
||||
virtual void set_lambda(double lambda) {
|
||||
if (lambda <= 0.0) scale = 0.0;
|
||||
else scale = (1 - shape) / lambda - (1 - shape) * loc;
|
||||
}
|
||||
|
||||
private:
|
||||
double loc /* mu */;
|
||||
double scale /* sigma */, shape /* k */;
|
||||
};
|
||||
|
||||
class GEV : public Generator {
|
||||
public:
|
||||
GEV(double _loc = 0.0, double _scale = 1.0, double _shape = 1.0) :
|
||||
e(1.0), loc(_loc), scale(_scale), shape(_shape) {
|
||||
assert(shape != 0.0);
|
||||
D("GEV(loc=%f, scale=%f, shape=%f)", loc, scale, shape);
|
||||
}
|
||||
|
||||
virtual double generate(double U = -1.0) {
|
||||
return loc + scale * (pow(e.generate(U), -shape) - 1) / shape;
|
||||
}
|
||||
|
||||
private:
|
||||
Exponential e;
|
||||
double loc /* mu */, scale /* sigma */, shape /* k */;
|
||||
};
|
||||
|
||||
class Discrete : public Generator {
|
||||
public:
|
||||
~Discrete() { delete def; }
|
||||
Discrete(Generator* _def = NULL) : def(_def) {
|
||||
if (def == NULL) def = new Fixed(0.0);
|
||||
}
|
||||
|
||||
virtual double generate(double U = -1.0) {
|
||||
double Uc = U;
|
||||
if (pv.size() > 0 && U < 0.0) U = drand48();
|
||||
|
||||
double sum = 0;
|
||||
|
||||
for (auto p: pv) {
|
||||
sum += p.first;
|
||||
if (U < sum) return p.second;
|
||||
}
|
||||
|
||||
return def->generate(Uc);
|
||||
}
|
||||
|
||||
void add(double p, double v) {
|
||||
pv.push_back(std::pair<double,double>(p, v));
|
||||
}
|
||||
|
||||
private:
|
||||
Generator *def;
|
||||
std::vector< std::pair<double,double> > pv;
|
||||
};
|
||||
|
||||
class KeyGenerator {
|
||||
public:
|
||||
KeyGenerator(Generator* _g, double _max = 10000) : g(_g), max(_max) {}
|
||||
std::string generate(uint64_t ind) {
|
||||
uint64_t h = fnv_64(ind);
|
||||
double U = (double) h / (double)ULLONG_MAX;
|
||||
double G = g->generate(U);
|
||||
int keylen = MAX(round(G), floor(log10(max)) + 1);
|
||||
char key[256];
|
||||
snprintf(key, 256, "%0*" PRIu64, keylen, ind);
|
||||
|
||||
// D("%d = %s", ind, key);
|
||||
return std::string(key);
|
||||
}
|
||||
private:
|
||||
Generator* g;
|
||||
double max;
|
||||
};
|
||||
|
||||
Generator* createGenerator(std::string str);
|
||||
Generator* createFacebookKey();
|
||||
Generator* createFacebookValue();
|
||||
Generator* createFacebookIA();
|
346
inc/gen.hh
346
inc/gen.hh
@ -1,346 +0,0 @@
|
||||
// modified from mutilate
|
||||
// -*- c++ -*-
|
||||
|
||||
// 1. implement "fixed" generator
|
||||
// 2. implement discrete generator
|
||||
// 3. implement combine generator?
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <assert.h>
|
||||
#include <inttypes.h>
|
||||
#include <limits.h>
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <sys/_pthreadtypes.h>
|
||||
#include <sys/param.h>
|
||||
|
||||
#include "defs.hh"
|
||||
|
||||
#define D(fmt, ...)
|
||||
#define DIE(fmt, ...) (void)0;
|
||||
|
||||
#define FNV_64_PRIME (0x100000001b3ULL)
|
||||
#define FNV1_64_INIT (0xcbf29ce484222325ULL)
|
||||
static inline uint64_t
|
||||
fnv_64_buf(const void *buf, size_t len)
|
||||
{
|
||||
uint64_t hval = FNV1_64_INIT;
|
||||
|
||||
unsigned char *bp = (unsigned char *)buf; /* start of buffer */
|
||||
unsigned char *be = bp + len; /* beyond end of buffer */
|
||||
|
||||
while (bp < be) {
|
||||
hval ^= (uint64_t)*bp++;
|
||||
hval *= FNV_64_PRIME;
|
||||
}
|
||||
|
||||
return hval;
|
||||
}
|
||||
|
||||
static inline uint64_t
|
||||
fnv_64(uint64_t in)
|
||||
{
|
||||
return fnv_64_buf(&in, sizeof(in));
|
||||
}
|
||||
|
||||
// Generator syntax:
|
||||
//
|
||||
// \d+ == fixed
|
||||
// n[ormal]:mean,sd
|
||||
// e[xponential]:lambda
|
||||
// p[areto]:scale,shape
|
||||
// g[ev]:loc,scale,shape
|
||||
// fb_value, fb_key, fb_rate
|
||||
|
||||
class Generator {
|
||||
public:
|
||||
Generator() { }
|
||||
// Generator(const Generator &g) = delete;
|
||||
// virtual Generator& operator=(const Generator &g) = delete;
|
||||
virtual ~Generator() { }
|
||||
|
||||
virtual double generate(double U = -1.0) = 0;
|
||||
virtual void set_lambda(double) { DIE("set_lambda() not implemented"); }
|
||||
|
||||
protected:
|
||||
std::string type;
|
||||
};
|
||||
|
||||
class Fixed : public Generator {
|
||||
public:
|
||||
Fixed(double _value = 1.0)
|
||||
: value(_value)
|
||||
{
|
||||
D("Fixed(%f)", value);
|
||||
}
|
||||
virtual double generate(double) { return value; }
|
||||
virtual void set_lambda(double lambda)
|
||||
{
|
||||
if (lambda > 0.0)
|
||||
value = 1.0 / lambda;
|
||||
else
|
||||
value = 0.0;
|
||||
}
|
||||
|
||||
private:
|
||||
double value;
|
||||
};
|
||||
|
||||
class Uniform : public Generator {
|
||||
public:
|
||||
Uniform(double _scale)
|
||||
: scale(_scale)
|
||||
{
|
||||
D("Uniform(%f)", scale);
|
||||
}
|
||||
|
||||
virtual double generate(double U = -1.0)
|
||||
{
|
||||
if (U < 0.0)
|
||||
U = drand48();
|
||||
return scale * U;
|
||||
}
|
||||
|
||||
virtual void set_lambda(double lambda)
|
||||
{
|
||||
if (lambda > 0.0)
|
||||
scale = 2.0 / lambda;
|
||||
else
|
||||
scale = 0.0;
|
||||
}
|
||||
|
||||
private:
|
||||
double scale;
|
||||
};
|
||||
|
||||
class Normal : public Generator {
|
||||
public:
|
||||
Normal(double _mean = 1.0, double _sd = 1.0)
|
||||
: mean(_mean)
|
||||
, sd(_sd)
|
||||
{
|
||||
D("Normal(mean=%f, sd=%f)", mean, sd);
|
||||
}
|
||||
|
||||
virtual double generate(double U = -1.0)
|
||||
{
|
||||
if (U < 0.0)
|
||||
U = drand48();
|
||||
double V = U; // drand48();
|
||||
double N = sqrt(-2 * log(U)) * cos(2 * M_PI * V);
|
||||
return mean + sd * N;
|
||||
}
|
||||
|
||||
virtual void set_lambda(double lambda)
|
||||
{
|
||||
if (lambda > 0.0)
|
||||
mean = 1.0 / lambda;
|
||||
else
|
||||
mean = 0.0;
|
||||
}
|
||||
|
||||
private:
|
||||
double mean, sd;
|
||||
};
|
||||
|
||||
class Exponential : public Generator {
|
||||
public:
|
||||
Exponential(double _lambda = 1.0)
|
||||
: lambda(_lambda)
|
||||
{
|
||||
D("Exponential(lambda=%f)", lambda);
|
||||
}
|
||||
|
||||
virtual double generate(double U = -1.0)
|
||||
{
|
||||
if (lambda <= 0.0)
|
||||
return 0.0;
|
||||
if (U < 0.0)
|
||||
U = drand48();
|
||||
return -log(U) / lambda;
|
||||
}
|
||||
|
||||
virtual void set_lambda(double lambda) { this->lambda = lambda; }
|
||||
|
||||
private:
|
||||
double lambda;
|
||||
};
|
||||
|
||||
class GPareto : public Generator {
|
||||
public:
|
||||
GPareto(double _loc = 0.0, double _scale = 1.0, double _shape = 1.0)
|
||||
: loc(_loc)
|
||||
, scale(_scale)
|
||||
, shape(_shape)
|
||||
{
|
||||
assert(shape != 0.0);
|
||||
D("GPareto(loc=%f, scale=%f, shape=%f)", loc, scale, shape);
|
||||
}
|
||||
|
||||
virtual double generate(double U = -1.0)
|
||||
{
|
||||
if (U < 0.0)
|
||||
U = drand48();
|
||||
return loc + scale * (pow(U, -shape) - 1) / shape;
|
||||
}
|
||||
|
||||
virtual void set_lambda(double lambda)
|
||||
{
|
||||
if (lambda <= 0.0)
|
||||
scale = 0.0;
|
||||
else
|
||||
scale = (1 - shape) / lambda - (1 - shape) * loc;
|
||||
}
|
||||
|
||||
private:
|
||||
double loc /* mu */;
|
||||
double scale /* sigma */, shape /* k */;
|
||||
};
|
||||
|
||||
class GEV : public Generator {
|
||||
public:
|
||||
GEV(double _loc = 0.0, double _scale = 1.0, double _shape = 1.0)
|
||||
: e(1.0)
|
||||
, loc(_loc)
|
||||
, scale(_scale)
|
||||
, shape(_shape)
|
||||
{
|
||||
assert(shape != 0.0);
|
||||
D("GEV(loc=%f, scale=%f, shape=%f)", loc, scale, shape);
|
||||
}
|
||||
|
||||
virtual double generate(double U = -1.0)
|
||||
{
|
||||
return loc + scale * (pow(e.generate(U), -shape) - 1) / shape;
|
||||
}
|
||||
|
||||
private:
|
||||
Exponential e;
|
||||
double loc /* mu */, scale /* sigma */, shape /* k */;
|
||||
};
|
||||
|
||||
class Discrete : public Generator {
|
||||
public:
|
||||
~Discrete() { delete def; }
|
||||
Discrete(Generator *_def = NULL)
|
||||
: def(_def)
|
||||
{
|
||||
if (def == NULL)
|
||||
def = new Fixed(0.0);
|
||||
}
|
||||
|
||||
virtual double generate(double U = -1.0)
|
||||
{
|
||||
double Uc = U;
|
||||
if (pv.size() > 0 && U < 0.0)
|
||||
U = drand48();
|
||||
|
||||
double sum = 0;
|
||||
|
||||
for (auto p : pv) {
|
||||
sum += p.first;
|
||||
if (U < sum)
|
||||
return p.second;
|
||||
}
|
||||
|
||||
return def->generate(Uc);
|
||||
}
|
||||
|
||||
void add(double p, double v)
|
||||
{
|
||||
pv.push_back(std::pair<double, double>(p, v));
|
||||
}
|
||||
|
||||
private:
|
||||
Generator *def;
|
||||
std::vector<std::pair<double, double>> pv;
|
||||
};
|
||||
|
||||
class KeyGenerator {
|
||||
public:
|
||||
KeyGenerator(Generator *_g, double _max = 10000)
|
||||
: g(_g)
|
||||
, max(_max)
|
||||
{
|
||||
}
|
||||
std::string generate(uint64_t ind)
|
||||
{
|
||||
uint64_t h = fnv_64(ind);
|
||||
double U = (double)h / (double)ULLONG_MAX;
|
||||
double G = g->generate(U);
|
||||
int keylen = MAX(round(G), floor(log10(max)) + 1);
|
||||
char key[256];
|
||||
snprintf(key, 256, "%0*" PRIu64, keylen, ind);
|
||||
|
||||
// D("%d = %s", ind, key);
|
||||
return std::string(key);
|
||||
}
|
||||
|
||||
private:
|
||||
Generator *g;
|
||||
double max;
|
||||
};
|
||||
|
||||
Generator *createGenerator(std::string str);
|
||||
Generator *createFacebookKey();
|
||||
Generator *createFacebookValue();
|
||||
Generator *createFacebookIA();
|
||||
|
||||
// memload generator
|
||||
class memload_generator {
|
||||
public:
|
||||
struct memload_generator_options {
|
||||
size_t transaction_size {4096};
|
||||
size_t buffer_size {64*1024*1024};
|
||||
char ia_dist[64]{"fixed"};
|
||||
int verbose {0};
|
||||
uint64_t trans_per_second;
|
||||
bool shared_buffer {true};
|
||||
};
|
||||
|
||||
private:
|
||||
DISALLOW_EVIL_CONSTRUCTORS(memload_generator);
|
||||
struct thread_info {
|
||||
pthread_t pthr;
|
||||
void *from_buffer;
|
||||
void *to_buffer;
|
||||
std::atomic<bool> reset_ts;
|
||||
int tid;
|
||||
int pull;
|
||||
int coreid;
|
||||
int target_dom;
|
||||
struct memload_generator_options * opts;
|
||||
Generator * ia_gen;
|
||||
|
||||
// stat keeping
|
||||
std::atomic<uint32_t> num_trans;
|
||||
std::atomic<int> * state;
|
||||
std::atomic<int> init_status;
|
||||
};
|
||||
|
||||
std::vector<struct thread_info *> thr_infos;
|
||||
std::atomic<int> state;
|
||||
static constexpr int STATE_RUN = 0;
|
||||
static constexpr int STATE_RDY = 1;
|
||||
static constexpr int STATE_END = 2;
|
||||
static constexpr int STATE_INIT = 3;
|
||||
|
||||
static void *worker_thrd(void *_tinfo);
|
||||
struct memload_generator_options opts;
|
||||
|
||||
public:
|
||||
memload_generator(cpuset_t * threads, cpuset_t * modes, cpuset_t * target_domain, struct memload_generator_options * opt, bool *success);
|
||||
uint64_t get_transactions();
|
||||
bool start();
|
||||
bool stop();
|
||||
bool set_transactions(uint64_t tps);
|
||||
~memload_generator();
|
||||
};
|
@ -1,133 +0,0 @@
|
||||
#pragma once
|
||||
#include <cstdint>
|
||||
|
||||
#include "rte_ethdev.h"
|
||||
#include "rte_ether.h"
|
||||
|
||||
#define MAX_NUMA_NODES (64)
|
||||
|
||||
struct device_conf {
|
||||
int portid;
|
||||
uint16_t tx_ring_sz;
|
||||
uint16_t rx_ring_sz;
|
||||
cpuset_t core_affinity;
|
||||
int mtu;
|
||||
uint64_t rx_offloads;
|
||||
uint64_t tx_offloads;
|
||||
uint64_t rss_hf;
|
||||
|
||||
rte_tx_callback_fn tx_fn;
|
||||
void * tx_user;
|
||||
|
||||
rte_rx_callback_fn rx_fn;
|
||||
void * rx_user;
|
||||
|
||||
bool timesync;
|
||||
};
|
||||
|
||||
struct mem_conf {
|
||||
int num_elements;
|
||||
int cache_size;
|
||||
int data_room_size;
|
||||
int priv_size;
|
||||
unsigned int max_pools;
|
||||
};
|
||||
|
||||
constexpr static uint16_t MIN_RANDOM_PORT = 1000;
|
||||
constexpr static uint16_t DEFAULT_RAT_PORT = 1234;
|
||||
constexpr static unsigned int INIT_DELAY = 3;
|
||||
constexpr static unsigned int MAX_NODES = 64;
|
||||
|
||||
void
|
||||
dpdk_init(struct device_conf *dconf, struct mem_conf *mconf);
|
||||
|
||||
void
|
||||
dpdk_cleanup(struct device_conf *dconf);
|
||||
|
||||
struct rte_mempool *
|
||||
mempool_get(int nodeid);
|
||||
|
||||
struct port_conf {
|
||||
const char * driver_name;
|
||||
uint64_t rxoffload;
|
||||
uint64_t txoffload;
|
||||
uint64_t rss_hf;
|
||||
bool timesync;
|
||||
};
|
||||
|
||||
int
|
||||
portconf_get(int portid, struct port_conf * out);
|
||||
|
||||
|
||||
// constexpr static int LATENCY_MEASURE_TIMES = 10000;
|
||||
|
||||
// static inline void
|
||||
// sync_port_clock(uint16_t portid)
|
||||
//{
|
||||
// int64_t lat = 0;
|
||||
// int64_t get_time_lat;
|
||||
// int64_t write_time_lat;
|
||||
// struct timespec dum;
|
||||
// struct timespec start;
|
||||
// struct timespec end;
|
||||
//
|
||||
// // measure clock_gettime latency
|
||||
// for(int i = 0; i < LATENCY_MEASURE_TIMES; i++) {
|
||||
// // end - start ~= 2x clock_gettime's latency
|
||||
// clock_gettime(CLOCK_REALTIME, &start);
|
||||
// clock_gettime(CLOCK_REALTIME, &dum);
|
||||
// clock_gettime(CLOCK_REALTIME, &end);
|
||||
//
|
||||
// if (end.tv_sec != start.tv_sec) {
|
||||
// rte_exit(EXIT_FAILURE, "clock_gettime too slow\n");
|
||||
// }
|
||||
//
|
||||
// // shouldn't overflow
|
||||
// lat += (end.tv_nsec - start.tv_nsec) / 2;
|
||||
// }
|
||||
// get_time_lat = lat / LATENCY_MEASURE_TIMES;
|
||||
//
|
||||
// // measure rte_eth_timesync_write_time latency
|
||||
// lat = 0;
|
||||
// for(int i = 0; i < LATENCY_MEASURE_TIMES; i++) {
|
||||
// // end - start ~= rte_eth_timesync latency + clock_gettime's latency
|
||||
// clock_gettime(CLOCK_REALTIME, &dum);
|
||||
// clock_gettime(CLOCK_REALTIME, &start);
|
||||
// if (rte_eth_timesync_write_time(portid, &dum) != 0) {
|
||||
// rte_exit(EXIT_FAILURE, "failed to write time\n");
|
||||
// }
|
||||
// clock_gettime(CLOCK_REALTIME, &end);
|
||||
//
|
||||
// if (end.tv_sec != start.tv_sec) {
|
||||
// rte_exit(EXIT_FAILURE, "clock_gettime too slow!\n");
|
||||
// }
|
||||
//
|
||||
// // shouldn't overflow
|
||||
// int64_t elat = (end.tv_nsec - start.tv_nsec) - get_time_lat;
|
||||
// if (elat < 0) {
|
||||
// rte_exit(EXIT_FAILURE, "something is wrong with lat \n");
|
||||
// }
|
||||
// lat += elat;
|
||||
// }
|
||||
// write_time_lat = lat / LATENCY_MEASURE_TIMES;
|
||||
//
|
||||
// int64_t delta = (get_time_lat + write_time_lat) / 2;
|
||||
// int64_t s2ns = (int64_t)S2NS;
|
||||
// // sync the clock
|
||||
// while (true) {
|
||||
// clock_gettime(CLOCK_REALTIME, &dum);
|
||||
// dum.tv_nsec += delta;
|
||||
// if (dum.tv_nsec > s2ns) {
|
||||
// // try again if overflow
|
||||
// continue;
|
||||
// }
|
||||
// if (rte_eth_timesync_write_time(portid, &dum) != 0) {
|
||||
// rte_exit(EXIT_FAILURE, "failed to write time\n");
|
||||
// }
|
||||
// break;
|
||||
// }
|
||||
// rte_eth_timesync_enable(portid);
|
||||
//
|
||||
// printf("Sync-ed time: get lat %ld write lat %ld\n", get_time_lat,
|
||||
// write_time_lat);
|
||||
//}
|
490
inc/net/pkt.hh
490
inc/net/pkt.hh
@ -1,490 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <sys/endian.h>
|
||||
#include <rte_byteorder.h>
|
||||
#include <rte_ether.h>
|
||||
#include <rte_flow.h>
|
||||
#include <rte_ip.h>
|
||||
#include <rte_mbuf.h>
|
||||
#include <rte_mbuf_core.h>
|
||||
#include <rte_net.h>
|
||||
#include <rte_udp.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "defs.hh"
|
||||
|
||||
#include <random>
|
||||
|
||||
#define IP_DEFTTL 64 /* from RFC 1340. */
|
||||
#define IP_VERSION 0x40
|
||||
#define IP_HDRLEN 0x05 /* default IP header length == five 32-bits words. */
|
||||
#define IP_VHL_DEF (IP_VERSION | IP_HDRLEN)
|
||||
#define IP_ADDR_FMT_SIZE 15
|
||||
|
||||
constexpr static uint32_t MAX_JUMBO_MTU = 9000;
|
||||
constexpr static uint32_t MAX_STANDARD_MTU = 1500;
|
||||
|
||||
static inline int
|
||||
mtu_to_pkt_size(int mtu)
|
||||
{
|
||||
return mtu + RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN;
|
||||
}
|
||||
|
||||
static inline void
|
||||
tx_burst_all(int portid, int txqid, struct rte_mbuf ** tx_bufs, int sz)
|
||||
{
|
||||
int remaining = sz;
|
||||
while(remaining > 0) {
|
||||
remaining -= rte_eth_tx_burst(
|
||||
portid, txqid, &tx_bufs[sz - remaining],
|
||||
remaining);
|
||||
}
|
||||
}
|
||||
|
||||
constexpr static uint32_t ETHER_FRAME_MAGIC = 0xDCDCE5E5;
|
||||
const static struct rte_ether_addr POU_MAC {
|
||||
0x01, 0x00, 0x5e, 0x00, 0x01, 0x81
|
||||
};
|
||||
const static uint32_t POU_IP = RTE_IPV4(224, 0, 1, 129);
|
||||
const static uint16_t POU_PORT = 320;
|
||||
/* Khat Protocol:
|
||||
* khat only processes two kinds of packets - LOAD and PROBE
|
||||
* rat:
|
||||
* rat -> LOAD -> khat
|
||||
* khat -> LOAD_RESP -> rat
|
||||
* cat:
|
||||
* cat -> PROBE -> khat (cat tx timestamps)
|
||||
* khat -> PROBE_RESP -> cat (cat rx timestamps and khat tx/rx
|
||||
* timestamps) khat -> STAT -> cat (khat sends its tx/rx timestamps)
|
||||
*/
|
||||
|
||||
/* Rat Protocol:
|
||||
* cat & rat:
|
||||
* 1. both launch with full parameters
|
||||
* rat with slave flag
|
||||
* cat with master flag
|
||||
* 2. rats create threads and wait for cat's signal
|
||||
* 3. cat creates threads
|
||||
* 4. cat -> rats SYNC
|
||||
* 5. rats -> cat SYNC_ACK and start running
|
||||
* 6. cat start running after received all SYNC_ACKs
|
||||
* 7. cat stops running, cat -> rats FIN
|
||||
* 8. rats stops running, rats -> cat FIN_ACK with QPS
|
||||
* 9. cat exits after receiving all FIN_ACKs and flushing statsGG
|
||||
*/
|
||||
|
||||
struct ptp_hdr {
|
||||
uint8_t ptp_msg_type;
|
||||
uint8_t ptp_ver;
|
||||
uint8_t unused[34];
|
||||
} __attribute__((packed));
|
||||
|
||||
struct pkt_hdr {
|
||||
struct rte_ether_hdr eth_hdr;
|
||||
struct rte_ipv4_hdr ipv4_hdr;
|
||||
struct rte_udp_hdr udp_hdr;
|
||||
struct ptp_hdr ptp_hdr;
|
||||
uint16_t type;
|
||||
uint32_t magic;
|
||||
char payload[0];
|
||||
} __attribute__((packed));
|
||||
|
||||
struct net_spec {
|
||||
uint32_t ip;
|
||||
rte_ether_addr mac_addr;
|
||||
};
|
||||
|
||||
static inline void
|
||||
pkt_hdr_to_netspec(struct pkt_hdr *pkt, struct net_spec *src,
|
||||
uint16_t *src_port, struct net_spec *dst, uint16_t *dst_port)
|
||||
{
|
||||
if (src != nullptr) {
|
||||
rte_ether_addr_copy(&pkt->eth_hdr.src_addr, &src->mac_addr);
|
||||
src->ip = rte_be_to_cpu_32(pkt->ipv4_hdr.src_addr);
|
||||
}
|
||||
|
||||
if (src_port != nullptr) {
|
||||
*src_port = rte_be_to_cpu_16(pkt->udp_hdr.src_port);
|
||||
}
|
||||
|
||||
if (dst != nullptr) {
|
||||
rte_ether_addr_copy(&pkt->eth_hdr.dst_addr, &dst->mac_addr);
|
||||
dst->ip = rte_be_to_cpu_32(pkt->ipv4_hdr.dst_addr);
|
||||
}
|
||||
|
||||
if (dst_port != nullptr) {
|
||||
*dst_port = rte_be_to_cpu_16(pkt->udp_hdr.dst_port);
|
||||
}
|
||||
};
|
||||
|
||||
struct conn_spec {
|
||||
struct net_spec *src;
|
||||
uint16_t src_port;
|
||||
struct net_spec *dst;
|
||||
uint16_t dst_port;
|
||||
};
|
||||
|
||||
// returns 0 on success
|
||||
static inline int
|
||||
str_to_netspec(char *str, struct net_spec *out)
|
||||
{
|
||||
const char *tok = "@";
|
||||
char *token;
|
||||
char *ptr;
|
||||
uint32_t a, b, c, d;
|
||||
|
||||
token = strtok_r(str, tok, &ptr);
|
||||
|
||||
if (token == nullptr ||
|
||||
sscanf(token, "%d.%d.%d.%d", &a, &b, &c, &d) != 4) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
out->ip = RTE_IPV4(a, b, c, d);
|
||||
|
||||
// mac next
|
||||
token = strtok_r(nullptr, tok, &ptr);
|
||||
if (token == nullptr ||
|
||||
rte_ether_unformat_addr(token, &out->mac_addr) != 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
constexpr static uint16_t PKT_TYPE_LOAD = 0;
|
||||
constexpr static uint32_t LOAD_TYPE_CPU = 0; // arg0 = cpu time in us. arg1 = unused
|
||||
constexpr static uint32_t LOAD_TYPE_MEM = 1; // arg0 = which thread to access. arg1 = how many cachelines to access
|
||||
constexpr static uint32_t LOAD_TYPE_MAX = LOAD_TYPE_MEM + 1;
|
||||
struct pkt_payload_load {
|
||||
uint32_t epoch;
|
||||
uint32_t type; // type of load
|
||||
uint32_t arg0;
|
||||
uint32_t arg1;
|
||||
};
|
||||
|
||||
constexpr static uint16_t PKT_TYPE_PROBE = 1;
|
||||
constexpr static uint16_t PKT_TYPE_LOAD_RESP = 2;
|
||||
constexpr static uint16_t PKT_TYPE_PROBE_RESP = 3;
|
||||
struct pkt_payload_epoch {
|
||||
uint32_t epoch;
|
||||
};
|
||||
|
||||
constexpr static uint16_t PKT_TYPE_STAT = 4;
|
||||
struct pkt_payload_stat {
|
||||
uint32_t epoch;
|
||||
uint64_t hw_rx;
|
||||
uint64_t hw_tx;
|
||||
uint64_t sw_rx;
|
||||
uint64_t sw_tx;
|
||||
};
|
||||
|
||||
constexpr static uint16_t PKT_TYPE_SYNC = 5;
|
||||
constexpr static uint16_t PKT_TYPE_SYNC_ACK = 6;
|
||||
constexpr static uint16_t PKT_TYPE_FIN = 7;
|
||||
constexpr static uint16_t PKT_TYPE_FIN_ACK = 8;
|
||||
struct pkt_payload_qps {
|
||||
uint32_t qps;
|
||||
uint32_t recved_pkts;
|
||||
uint32_t lost_pkts;
|
||||
};
|
||||
|
||||
constexpr static uint16_t NUM_PKT_TYPES = PKT_TYPE_FIN_ACK + 1;
|
||||
// for fast packet verification
|
||||
static const uint32_t expected_payload_size[NUM_PKT_TYPES] {
|
||||
sizeof(struct pkt_payload_load), // LOAD
|
||||
sizeof(struct pkt_payload_epoch), // PROBE
|
||||
sizeof(struct pkt_payload_epoch), // LOAD_RESP
|
||||
sizeof(struct pkt_payload_epoch), // PROBE_RESP
|
||||
sizeof(struct pkt_payload_stat), // STAT
|
||||
0, // SYNC
|
||||
0, // SYNC_ACK
|
||||
0, // FIN
|
||||
sizeof(struct pkt_payload_qps) // FIN_ACK
|
||||
};
|
||||
|
||||
class rdport_generator {
|
||||
private:
|
||||
DISALLOW_EVIL_CONSTRUCTORS(rdport_generator);
|
||||
constexpr static uint32_t MAX_PORT = 65535;
|
||||
uint32_t min_port;
|
||||
uint32_t cur;
|
||||
std::random_device rd;
|
||||
std::default_random_engine gen;
|
||||
std::uniform_int_distribution<uint32_t> dist;
|
||||
|
||||
public:
|
||||
rdport_generator(uint32_t mport)
|
||||
: min_port(mport)
|
||||
, cur(0)
|
||||
, dist(0, MAX_PORT - min_port)
|
||||
{
|
||||
gen.seed(get_uptime());
|
||||
cur = dist(gen);
|
||||
}
|
||||
uint16_t next()
|
||||
{
|
||||
uint16_t ret = ((cur) % (MAX_PORT - min_port)) + min_port;
|
||||
cur++;
|
||||
return ret;
|
||||
}
|
||||
};
|
||||
|
||||
#define NTR_PKT(dep, level, pkt, prefix_fmt, ...) \
|
||||
ntr(dep, level, \
|
||||
prefix_fmt \
|
||||
"src: %d.%d.%d.%d:%d@%02x:%02x:%02x:%02x:%02x:%02x dst: %d.%d.%d.%d:%d@%02x:%02x:%02x:%02x:%02x:%02x type: %d\n", \
|
||||
##__VA_ARGS__, \
|
||||
(rte_be_to_cpu_32(pkt->ipv4_hdr.src_addr) >> 24) & 0xff, \
|
||||
(rte_be_to_cpu_32(pkt->ipv4_hdr.src_addr) >> 16) & 0xff, \
|
||||
(rte_be_to_cpu_32(pkt->ipv4_hdr.src_addr) >> 8) & 0xff, \
|
||||
(rte_be_to_cpu_32(pkt->ipv4_hdr.src_addr) >> 0) & 0xff, \
|
||||
rte_be_to_cpu_16(pkt->udp_hdr.src_port), \
|
||||
pkt->eth_hdr.src_addr.addr_bytes[0], \
|
||||
pkt->eth_hdr.src_addr.addr_bytes[1], \
|
||||
pkt->eth_hdr.src_addr.addr_bytes[2], \
|
||||
pkt->eth_hdr.src_addr.addr_bytes[3], \
|
||||
pkt->eth_hdr.src_addr.addr_bytes[4], \
|
||||
pkt->eth_hdr.src_addr.addr_bytes[5], \
|
||||
(rte_be_to_cpu_32(pkt->ipv4_hdr.dst_addr) >> 24) & 0xff, \
|
||||
(rte_be_to_cpu_32(pkt->ipv4_hdr.dst_addr) >> 16) & 0xff, \
|
||||
(rte_be_to_cpu_32(pkt->ipv4_hdr.dst_addr) >> 8) & 0xff, \
|
||||
(rte_be_to_cpu_32(pkt->ipv4_hdr.dst_addr) >> 0) & 0xff, \
|
||||
rte_be_to_cpu_16(pkt->udp_hdr.dst_port), \
|
||||
pkt->eth_hdr.dst_addr.addr_bytes[0], \
|
||||
pkt->eth_hdr.dst_addr.addr_bytes[1], \
|
||||
pkt->eth_hdr.dst_addr.addr_bytes[2], \
|
||||
pkt->eth_hdr.dst_addr.addr_bytes[3], \
|
||||
pkt->eth_hdr.dst_addr.addr_bytes[4], \
|
||||
pkt->eth_hdr.dst_addr.addr_bytes[5], rte_be_to_cpu_16(pkt->type))
|
||||
|
||||
static inline void
|
||||
print_mac(struct rte_ether_addr *mac)
|
||||
{
|
||||
printf("%x:%x:%x:%x:%x:%x", mac->addr_bytes[0], mac->addr_bytes[1],
|
||||
mac->addr_bytes[2], mac->addr_bytes[3], mac->addr_bytes[4],
|
||||
mac->addr_bytes[5]);
|
||||
}
|
||||
|
||||
static inline void
|
||||
print_ipv4(uint32_t ip)
|
||||
{
|
||||
printf("%d.%d.%d.%d", (ip >> 24) & 0xff, (ip >> 16) & 0xff,
|
||||
(ip >> 8) & 0xff, (ip >> 0) & 0xff);
|
||||
}
|
||||
|
||||
static inline void
|
||||
dump_pkt(struct rte_mbuf *pkt)
|
||||
{
|
||||
if (rte_pktmbuf_data_len(pkt) < sizeof(struct rte_ether_hdr)) {
|
||||
return;
|
||||
}
|
||||
|
||||
struct rte_ether_hdr _eth_hdr;
|
||||
auto eth_hdr = (struct rte_ether_hdr *)rte_pktmbuf_read(
|
||||
pkt, 0, sizeof(struct rte_ether_hdr), &_eth_hdr);
|
||||
if (eth_hdr == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
// ethernet frame
|
||||
printf(
|
||||
"Packet %p: Length 0x%x\n", (void *)pkt, rte_pktmbuf_data_len(pkt));
|
||||
printf(" Ethernet header:\n");
|
||||
printf(" Src:");
|
||||
print_mac(ð_hdr->src_addr);
|
||||
printf("\n");
|
||||
printf(" Dst:");
|
||||
print_mac(ð_hdr->dst_addr);
|
||||
printf("\n");
|
||||
printf(" Type: 0x%x\n", rte_be_to_cpu_16(eth_hdr->ether_type));
|
||||
|
||||
uint16_t ether_type = rte_be_to_cpu_16(eth_hdr->ether_type);
|
||||
if (ether_type != RTE_ETHER_TYPE_IPV4) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (rte_pktmbuf_data_len(pkt) <
|
||||
sizeof(struct rte_ether_hdr) + sizeof(struct rte_ipv4_hdr)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// dump ip header
|
||||
auto ipv4_hdr = (struct rte_ipv4_hdr *)(eth_hdr + 1);
|
||||
printf(" IPv4 header:\n");
|
||||
printf(" Src:");
|
||||
print_ipv4(rte_be_to_cpu_32(ipv4_hdr->src_addr));
|
||||
printf("\n");
|
||||
printf(" Dst:");
|
||||
print_ipv4(rte_be_to_cpu_32(ipv4_hdr->dst_addr));
|
||||
printf("\n");
|
||||
printf(" Protocol: 0x%x\n", ipv4_hdr->next_proto_id);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
is_l2ts_pkt(uint16_t type)
|
||||
{
|
||||
return type == PKT_TYPE_PROBE || type == PKT_TYPE_PROBE_RESP;
|
||||
}
|
||||
|
||||
// fills the packet with the information except for the payload itself
|
||||
static inline struct pkt_hdr *
|
||||
construct_pkt_hdr(
|
||||
struct rte_mbuf *buf, uint16_t type, const struct conn_spec *conn, int pkt_pad_sz)
|
||||
{
|
||||
rte_pktmbuf_reset(buf);
|
||||
|
||||
int total_sz = sizeof(struct pkt_hdr) +
|
||||
expected_payload_size[type];
|
||||
|
||||
if (pkt_pad_sz > total_sz) {
|
||||
total_sz = pkt_pad_sz;
|
||||
}
|
||||
|
||||
auto pkt_data = (struct pkt_hdr *)rte_pktmbuf_append(buf, total_sz);
|
||||
if (pkt_data == nullptr)
|
||||
return nullptr;
|
||||
|
||||
struct rte_ether_hdr *eth_hdr;
|
||||
struct rte_ipv4_hdr *ipv4_hdr;
|
||||
struct rte_udp_hdr *udp_hdr;
|
||||
bool is_ts_pkt = is_l2ts_pkt(type);
|
||||
|
||||
// single segment
|
||||
buf->nb_segs = 1;
|
||||
|
||||
// construct l2 header
|
||||
eth_hdr = &pkt_data->eth_hdr;
|
||||
rte_ether_addr_copy(&conn->src->mac_addr, ð_hdr->src_addr);
|
||||
if (is_ts_pkt) {
|
||||
rte_ether_addr_copy(&POU_MAC, ð_hdr->dst_addr);
|
||||
} else {
|
||||
rte_ether_addr_copy(&conn->dst->mac_addr, ð_hdr->dst_addr);
|
||||
}
|
||||
eth_hdr->ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
|
||||
buf->l2_len = sizeof(struct rte_ether_hdr);
|
||||
|
||||
// construct l3 header
|
||||
ipv4_hdr = &pkt_data->ipv4_hdr;
|
||||
memset(ipv4_hdr, 0, sizeof(struct rte_ipv4_hdr));
|
||||
ipv4_hdr->version_ihl = IP_VHL_DEF;
|
||||
ipv4_hdr->type_of_service = 0;
|
||||
ipv4_hdr->fragment_offset = 0;
|
||||
ipv4_hdr->time_to_live = IP_DEFTTL;
|
||||
ipv4_hdr->next_proto_id = IPPROTO_UDP;
|
||||
ipv4_hdr->packet_id = 0;
|
||||
ipv4_hdr->src_addr = rte_cpu_to_be_32(conn->src->ip);
|
||||
if (is_ts_pkt) {
|
||||
ipv4_hdr->dst_addr = rte_cpu_to_be_32(POU_IP);
|
||||
} else {
|
||||
ipv4_hdr->dst_addr = rte_cpu_to_be_32(conn->dst->ip);
|
||||
}
|
||||
ipv4_hdr->total_length = rte_cpu_to_be_16(total_sz - sizeof(struct rte_ether_hdr) - sizeof(struct rte_ipv4_hdr));
|
||||
ipv4_hdr->hdr_checksum = 0;
|
||||
buf->l3_len = sizeof(struct rte_ipv4_hdr);
|
||||
|
||||
// construct l4 header
|
||||
udp_hdr = &pkt_data->udp_hdr;
|
||||
udp_hdr->src_port = rte_cpu_to_be_16(conn->src_port);
|
||||
if (is_ts_pkt) {
|
||||
udp_hdr->dst_port = rte_cpu_to_be_16(POU_PORT);
|
||||
} else {
|
||||
udp_hdr->dst_port = rte_cpu_to_be_16(conn->dst_port);
|
||||
}
|
||||
udp_hdr->dgram_cksum = 0; /* No UDP checksum. */
|
||||
udp_hdr->dgram_len = total_sz - sizeof(struct rte_ether_hdr) - sizeof(struct rte_ipv4_hdr) - sizeof(struct rte_udp_hdr);
|
||||
buf->l4_len = sizeof(struct rte_udp_hdr);
|
||||
buf->ol_flags |= RTE_MBUF_F_TX_IPV4;
|
||||
buf->ol_flags |= RTE_MBUF_F_TX_IP_CKSUM;
|
||||
buf->ol_flags |= RTE_MBUF_F_TX_UDP_CKSUM;
|
||||
|
||||
if (is_ts_pkt) {
|
||||
// set misc flags
|
||||
buf->ol_flags |= RTE_MBUF_F_TX_IEEE1588_TMST;
|
||||
pkt_data->ptp_hdr.ptp_ver = 0x2; // VER 2
|
||||
pkt_data->ptp_hdr.ptp_msg_type = 0x0; // SYNC
|
||||
} else {
|
||||
pkt_data->ptp_hdr.ptp_ver = 0xff; // invalid ver
|
||||
}
|
||||
|
||||
pkt_data->type = rte_cpu_to_be_16(type);
|
||||
pkt_data->magic = rte_cpu_to_be_32(ETHER_FRAME_MAGIC);
|
||||
|
||||
return pkt_data;
|
||||
}
|
||||
|
||||
// returns 0 on success
|
||||
static inline int
|
||||
alloc_pkt_hdr(struct rte_mempool *pool, uint16_t type,
|
||||
const struct conn_spec *conn, int pkt_pad_sz, struct rte_mbuf **mbuf_out,
|
||||
struct pkt_hdr **hdr_out)
|
||||
{
|
||||
struct pkt_hdr *hdr;
|
||||
struct rte_mbuf *pkt = rte_pktmbuf_alloc(pool);
|
||||
if (pkt == nullptr) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// printf("alloc_pkt_hdr:\n");
|
||||
// printf("from ");
|
||||
// print_mac(&conn->src->mac_addr);
|
||||
// printf("\nto ");
|
||||
// print_mac(&conn->dst->mac_addr);
|
||||
// printf("\n");
|
||||
|
||||
hdr = construct_pkt_hdr(pkt, type, conn, pkt_pad_sz);
|
||||
if (hdr == nullptr) {
|
||||
rte_pktmbuf_free(pkt);
|
||||
return -1;
|
||||
}
|
||||
|
||||
*mbuf_out = pkt;
|
||||
*hdr_out = hdr;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline struct pkt_hdr *
|
||||
check_valid_packet(struct rte_mbuf *pkt, const struct rte_ether_addr *host_mac)
|
||||
{
|
||||
struct pkt_hdr *pkt_data = nullptr;
|
||||
const struct rte_ether_addr *expected_mac = nullptr;
|
||||
uint16_t type;
|
||||
const uint32_t data_len = rte_pktmbuf_data_len(pkt);
|
||||
|
||||
if (data_len < sizeof(struct pkt_hdr)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
pkt_data = rte_pktmbuf_mtod(pkt, struct pkt_hdr *);
|
||||
|
||||
// check MAGIC
|
||||
if (rte_be_to_cpu_32(pkt_data->magic) != ETHER_FRAME_MAGIC) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
type = rte_be_to_cpu_16(pkt_data->type);
|
||||
// check type and payload size
|
||||
if ((type >= NUM_PKT_TYPES) ||
|
||||
(data_len <
|
||||
(sizeof(struct pkt_hdr) +
|
||||
expected_payload_size[rte_be_to_cpu_16(pkt_data->type)]))) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// strict dest mac filter
|
||||
if (host_mac != nullptr) {
|
||||
if (is_l2ts_pkt(type)) {
|
||||
// dst mac must be the broadcast addr
|
||||
expected_mac = &POU_MAC;
|
||||
} else {
|
||||
// dst mac must match the host mac
|
||||
expected_mac = host_mac;
|
||||
}
|
||||
|
||||
if (!rte_is_same_ether_addr(
|
||||
expected_mac, &pkt_data->eth_hdr.dst_addr))
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return pkt_data;
|
||||
}
|
16
inc/nm.h
Normal file
16
inc/nm.h
Normal file
@ -0,0 +1,16 @@
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
|
||||
constexpr static int NM_LEVEL_NUMA = 0;
|
||||
constexpr static int NM_LEVEL_CPU = 1;
|
||||
constexpr static int NM_LEVEL_CORE = 2;
|
||||
|
||||
|
||||
std::vector<struct nm_obj *> * nm_get_nodes();
|
||||
std::vector<struct nm_obj *> * nm_get_cpus();
|
||||
std::vector<struct nm_obj *> * nm_get_cores();
|
||||
|
||||
// 0 on success
|
||||
// -1 on error
|
||||
int nm_init();
|
26
inc/nms.h
26
inc/nms.h
@ -1,26 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
int
|
||||
nms_init(int verbose);
|
||||
|
||||
void *
|
||||
nms_malloc(int nodeid, size_t sz);
|
||||
|
||||
void *
|
||||
nms_alloc_static(int nodeid, size_t sz);
|
||||
|
||||
void
|
||||
nms_free_static(void * buf, size_t sz);
|
||||
|
||||
void
|
||||
nms_free(int nodeid, void * addr);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif // __cplusplus
|
@ -1,7 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <stdarg.h>
|
||||
|
||||
#define NTR_LEVEL_NONE (0)
|
||||
#define NTR_LEVEL_ERROR (1)
|
||||
@ -20,16 +20,15 @@
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
#endif
|
||||
|
||||
void ntr_init();
|
||||
|
||||
__attribute__((format(printf, 3, 4))) void ntr(
|
||||
int dep, int level, const char *fmt, ...);
|
||||
void ntr(int dep, int level, const char * fmt, ...);
|
||||
|
||||
void ntr_set_level(int dep, int level);
|
||||
|
||||
void ntr_set_output(FILE *f);
|
||||
void ntr_set_output(FILE * f);
|
||||
|
||||
int ntr_get_level(int dep);
|
||||
|
||||
|
190
inc/pkt.h
Normal file
190
inc/pkt.h
Normal file
@ -0,0 +1,190 @@
|
||||
#pragma once
|
||||
|
||||
#include <rte_mbuf_core.h>
|
||||
#include <rte_mbuf.h>
|
||||
#include <rte_udp.h>
|
||||
#include <rte_byteorder.h>
|
||||
#include <rte_ip.h>
|
||||
#include <stdint.h>
|
||||
#include <rte_flow.h>
|
||||
#include <rte_ether.h>
|
||||
#include <unistd.h>
|
||||
#include <rte_net.h>
|
||||
#include <rte_vxlan.h>
|
||||
|
||||
constexpr static uint32_t ETHER_FRAME_MAGIC = 0xDCDCE5E5;
|
||||
const static struct rte_ether_addr PROBE_MAC_ADDR {0x01,0x1B,0x19,0x00,0x00,0x00};
|
||||
const static uint16_t ETHER_TYPE_LOCAL_EXP = 0x88b5;
|
||||
|
||||
struct ptp_hdr {
|
||||
uint8_t ptp_msg_type;
|
||||
uint8_t ptp_ver;
|
||||
uint8_t unused[34];
|
||||
} __attribute__((packed));
|
||||
|
||||
struct pkt_hdr {
|
||||
struct rte_ether_hdr eth_hdr;
|
||||
struct ptp_hdr ptp_hdr;
|
||||
uint16_t type;
|
||||
uint32_t magic;
|
||||
char payload[0];
|
||||
} __attribute__((packed));
|
||||
|
||||
constexpr static uint16_t PKT_TYPE_LOAD = 0;
|
||||
constexpr static uint16_t PKT_TYPE_PROBE = 1;
|
||||
constexpr static uint16_t PKT_TYPE_LOAD_RESP = 2;
|
||||
constexpr static uint16_t PKT_TYPE_PROBE_RESP = 3;
|
||||
struct pkt_payload_epoch {
|
||||
uint32_t epoch;
|
||||
};
|
||||
|
||||
constexpr static uint16_t PKT_TYPE_STAT = 4;
|
||||
struct pkt_payload_stat {
|
||||
uint32_t epoch;
|
||||
uint64_t hw_rx;
|
||||
uint64_t hw_tx;
|
||||
uint64_t sw_rx;
|
||||
uint64_t sw_tx;
|
||||
};
|
||||
|
||||
constexpr static uint16_t NUM_PKT_TYPES = PKT_TYPE_STAT + 1;
|
||||
// for fast packet verification
|
||||
static const uint32_t expected_payload_size[NUM_PKT_TYPES] {
|
||||
sizeof(struct pkt_payload_epoch), // LOAD
|
||||
sizeof(struct pkt_payload_epoch), // PROBE
|
||||
sizeof(struct pkt_payload_epoch), // LOAD_RESP
|
||||
sizeof(struct pkt_payload_epoch), // PROBE_RESP
|
||||
sizeof(struct pkt_payload_stat) //STAT
|
||||
};
|
||||
|
||||
static inline void
|
||||
print_mac(struct rte_ether_addr * mac)
|
||||
{
|
||||
printf("%x:%x:%x:%x:%x:%x", mac->addr_bytes[0],
|
||||
mac->addr_bytes[1],
|
||||
mac->addr_bytes[2],
|
||||
mac->addr_bytes[3],
|
||||
mac->addr_bytes[4],
|
||||
mac->addr_bytes[5]);
|
||||
}
|
||||
|
||||
static inline void
|
||||
print_ipv4(uint32_t ip)
|
||||
{
|
||||
printf("%d-%d-%d-%d", (ip >> 24) & 0xff,
|
||||
(ip >> 16) & 0xff,
|
||||
(ip >> 8) & 0xff,
|
||||
(ip >> 0) & 0xff);
|
||||
}
|
||||
|
||||
static inline void
|
||||
dump_pkt(struct rte_mbuf *pkt)
|
||||
{
|
||||
if(rte_pktmbuf_data_len(pkt) < sizeof(struct rte_ether_hdr)) {
|
||||
return;
|
||||
}
|
||||
|
||||
struct rte_ether_hdr _eth_hdr;
|
||||
struct rte_ether_hdr * eth_hdr = (struct rte_ether_hdr *)rte_pktmbuf_read(pkt, 0, sizeof(struct rte_ether_hdr), &_eth_hdr);
|
||||
if (eth_hdr == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
// ethernet frame
|
||||
printf("Packet %p: Length 0x%x\n", (void*)pkt, rte_pktmbuf_data_len(pkt));
|
||||
printf(" Ethernet header:\n");
|
||||
printf(" Src:");
|
||||
print_mac(ð_hdr->s_addr);
|
||||
printf("\n");
|
||||
printf(" Dst:");
|
||||
print_mac(ð_hdr->d_addr);
|
||||
printf("\n");
|
||||
printf(" Type: 0x%x\n", rte_be_to_cpu_16(eth_hdr->ether_type));
|
||||
|
||||
uint16_t ether_type = rte_be_to_cpu_16(eth_hdr->ether_type);
|
||||
if (ether_type != RTE_ETHER_TYPE_IPV4) {
|
||||
return;
|
||||
}
|
||||
|
||||
if(rte_pktmbuf_data_len(pkt) < sizeof(struct rte_ether_hdr) + sizeof(struct rte_ipv4_hdr)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// dump ip header
|
||||
struct rte_ipv4_hdr * ipv4_hdr = (struct rte_ipv4_hdr *)(eth_hdr + 1);
|
||||
printf(" IPv4 header:\n");
|
||||
printf(" Src:");
|
||||
print_ipv4(rte_be_to_cpu_32(ipv4_hdr->src_addr));
|
||||
printf("\n");
|
||||
printf(" Dst:");
|
||||
print_ipv4(rte_be_to_cpu_32(ipv4_hdr->dst_addr));
|
||||
printf("\n");
|
||||
printf(" Protocol: 0x%x\n", ipv4_hdr->next_proto_id);
|
||||
|
||||
}
|
||||
|
||||
|
||||
// fills the packet with the information except for the payload itself
|
||||
static inline
|
||||
struct pkt_hdr * construct_pkt_hdr(struct rte_mbuf * buf, uint16_t type,
|
||||
struct rte_ether_addr * src_mac, struct rte_ether_addr * dst_mac)
|
||||
{
|
||||
rte_pktmbuf_reset(buf);
|
||||
|
||||
const uint32_t total_sz = sizeof(struct pkt_hdr) + expected_payload_size[type];
|
||||
struct pkt_hdr * pkt_data = (struct pkt_hdr *)rte_pktmbuf_append(buf, total_sz);
|
||||
struct rte_ether_hdr * eth_hdr;
|
||||
|
||||
if (pkt_data == NULL)
|
||||
return NULL;
|
||||
|
||||
// single segment
|
||||
buf->nb_segs = 1;
|
||||
|
||||
// construct l2 header
|
||||
eth_hdr = &pkt_data->eth_hdr;
|
||||
rte_ether_addr_copy(src_mac, ð_hdr->s_addr);
|
||||
if (type == PKT_TYPE_PROBE || type == PKT_TYPE_PROBE_RESP) {
|
||||
rte_ether_addr_copy(&PROBE_MAC_ADDR, ð_hdr->d_addr);
|
||||
eth_hdr->ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_1588);
|
||||
pkt_data->ptp_hdr.ptp_ver = 0x2; // VER 2
|
||||
buf->ol_flags |= PKT_TX_IEEE1588_TMST;
|
||||
} else {
|
||||
rte_ether_addr_copy(dst_mac, ð_hdr->d_addr);
|
||||
eth_hdr->ether_type = rte_cpu_to_be_16(ETHER_TYPE_LOCAL_EXP);
|
||||
pkt_data->ptp_hdr.ptp_ver = 0xff;
|
||||
}
|
||||
buf->l2_len = sizeof(struct rte_ether_hdr);
|
||||
|
||||
pkt_data->ptp_hdr.ptp_msg_type = 0x0; // SYNC
|
||||
pkt_data->type = rte_cpu_to_be_16(type);
|
||||
pkt_data->magic = rte_cpu_to_be_32(ETHER_FRAME_MAGIC);
|
||||
|
||||
return pkt_data;
|
||||
}
|
||||
|
||||
static inline
|
||||
struct pkt_hdr * check_valid_packet(struct rte_mbuf * pkt)
|
||||
{
|
||||
struct pkt_hdr * pkt_data = NULL;
|
||||
const uint32_t data_len = rte_pktmbuf_data_len(pkt);
|
||||
|
||||
if (data_len < sizeof(struct pkt_hdr)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
pkt_data = rte_pktmbuf_mtod(pkt, struct pkt_hdr *);
|
||||
|
||||
// check MAGIC
|
||||
if (rte_be_to_cpu_32(pkt_data->magic) != ETHER_FRAME_MAGIC) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// check type and payload size
|
||||
if ((rte_be_to_cpu_16(pkt_data->type) < NUM_PKT_TYPES) &&
|
||||
(data_len >= (sizeof(struct pkt_hdr) + expected_payload_size[rte_be_to_cpu_16(pkt_data->type)]))) {
|
||||
return pkt_data;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
@ -1,56 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "storage/drivers/driver.hh"
|
||||
#include "spdk/bdev.h"
|
||||
#include "spdk/bdev_zone.h"
|
||||
#include "spdk/thread.h"
|
||||
|
||||
class birb_bdev_driver : public birb_driver
|
||||
{
|
||||
public:
|
||||
birb_bdev_driver(const char * dev_name);
|
||||
~birb_bdev_driver() override;
|
||||
size_t get_capacity() override;
|
||||
birb_driver_status get_status() override;
|
||||
struct spdk_bdev * get_bdev();
|
||||
struct spdk_bdev_desc * get_bdev_desc();
|
||||
birb_driver_type get_type() override;
|
||||
size_t get_align() override;
|
||||
|
||||
private:
|
||||
DISALLOW_EVIL_CONSTRUCTORS(birb_bdev_driver);
|
||||
struct spdk_bdev_desc * bdev_desc;
|
||||
struct spdk_bdev * bdev;
|
||||
size_t block_sz;
|
||||
size_t block_num;
|
||||
birb_driver_status status;
|
||||
|
||||
static void print_all_bdev();
|
||||
static void bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev * bdev,
|
||||
void * event_ctx);
|
||||
};
|
||||
|
||||
|
||||
class birb_bdev_thread_context : public birb_driver_thread_context
|
||||
{
|
||||
public:
|
||||
birb_bdev_thread_context(birb_bdev_driver * driver);
|
||||
~birb_bdev_thread_context() override;
|
||||
int read(size_t offset, size_t size, char * buffer, callback callback, void * context) override;
|
||||
int write(size_t offset, size_t size, char * buffer, callback callback, void * context) override;
|
||||
void poll() override;
|
||||
birb_driver::birb_driver_status get_status() override;
|
||||
|
||||
private:
|
||||
struct cb_context {
|
||||
callback cb;
|
||||
void * ctx;
|
||||
};
|
||||
|
||||
DISALLOW_EVIL_CONSTRUCTORS(birb_bdev_thread_context);
|
||||
spdk_io_channel * io_channel;
|
||||
birb_driver::birb_driver_status status;
|
||||
birb_bdev_driver * driver;
|
||||
|
||||
static void io_callback(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg);
|
||||
};
|
@ -1,47 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "defs.hh"
|
||||
|
||||
#include "spdk/thread.h"
|
||||
#include <cstdlib>
|
||||
|
||||
class birb_driver
|
||||
{
|
||||
private:
|
||||
DISALLOW_EVIL_CONSTRUCTORS(birb_driver);
|
||||
|
||||
public:
|
||||
enum birb_driver_status{
|
||||
BIRB_SUCCESS,
|
||||
BIRB_FAIL
|
||||
};
|
||||
enum birb_driver_type{
|
||||
BIRB_DRV_NVME,
|
||||
BIRB_DRV_BDEV
|
||||
};
|
||||
virtual size_t get_capacity() = 0;
|
||||
virtual birb_driver_status get_status() = 0;
|
||||
virtual size_t get_align() = 0;
|
||||
virtual birb_driver_type get_type() = 0;
|
||||
virtual ~birb_driver() = default;
|
||||
protected:
|
||||
birb_driver() = default;
|
||||
};
|
||||
|
||||
|
||||
class birb_driver_thread_context
|
||||
{
|
||||
private:
|
||||
DISALLOW_EVIL_CONSTRUCTORS(birb_driver_thread_context);
|
||||
|
||||
public:
|
||||
using callback = void (*)(bool, void *);
|
||||
virtual int read(size_t offset, size_t size, char * buffer, callback callback, void * context) = 0;
|
||||
virtual int write(size_t offset, size_t size, char * buffer, callback callback, void * context) = 0;
|
||||
virtual void poll() = 0;
|
||||
virtual birb_driver::birb_driver_status get_status() = 0;
|
||||
virtual ~birb_driver_thread_context() = default;
|
||||
protected:
|
||||
birb_driver_thread_context() = default;
|
||||
};
|
||||
|
@ -1,65 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "storage/drivers/driver.hh"
|
||||
#include "spdk/nvme.h"
|
||||
#include "spdk/thread.h"
|
||||
|
||||
class birb_nvme_driver : public birb_driver
|
||||
{
|
||||
public:
|
||||
birb_nvme_driver(const char * dev_name);
|
||||
~birb_nvme_driver() override;
|
||||
size_t get_capacity() override;
|
||||
birb_driver_status get_status() override;
|
||||
birb_driver_type get_type() override;
|
||||
size_t get_align() override;
|
||||
|
||||
spdk_nvme_ctrlr * get_ctrlr();
|
||||
spdk_nvme_ns * get_ns();
|
||||
spdk_nvme_io_qpair_opts * get_io_qpair_opts();
|
||||
|
||||
private:
|
||||
struct attach_context {
|
||||
spdk_nvme_ctrlr ** ctrlr;
|
||||
spdk_nvme_ns ** ns;
|
||||
const char * dev_name;
|
||||
int valid;
|
||||
};
|
||||
|
||||
DISALLOW_EVIL_CONSTRUCTORS(birb_nvme_driver);
|
||||
birb_driver_status status;
|
||||
spdk_nvme_ctrlr * ctrlr;
|
||||
spdk_nvme_ns * ns;
|
||||
spdk_nvme_io_qpair_opts opts;
|
||||
|
||||
static bool probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, struct spdk_nvme_ctrlr_opts *opts);
|
||||
static void attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
|
||||
struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts);
|
||||
};
|
||||
|
||||
|
||||
class birb_nvme_thread_context : public birb_driver_thread_context
|
||||
{
|
||||
public:
|
||||
birb_nvme_thread_context(birb_nvme_driver * driver);
|
||||
~birb_nvme_thread_context() override;
|
||||
int read(size_t offset, size_t size, char * buffer, callback callback, void * context) override;
|
||||
int write(size_t offset, size_t size, char * buffer, callback callback, void * context) override;
|
||||
void poll() override;
|
||||
birb_driver::birb_driver_status get_status() override;
|
||||
|
||||
private:
|
||||
struct cb_context {
|
||||
callback cb;
|
||||
void * ctx;
|
||||
};
|
||||
|
||||
DISALLOW_EVIL_CONSTRUCTORS(birb_nvme_thread_context);
|
||||
birb_driver::birb_driver_status status;
|
||||
birb_nvme_driver * driver;
|
||||
struct spdk_nvme_qpair * qpair;
|
||||
|
||||
static void io_callback(void *arg, const struct spdk_nvme_cpl *completion);
|
||||
static uint32_t size_to_lba(size_t size, int lba_size);
|
||||
static uint64_t addr_to_lba(size_t addr, int lba_size);
|
||||
};
|
@ -1,47 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "defs.hh"
|
||||
|
||||
#include "spdk/thread.h"
|
||||
#include <cstdlib>
|
||||
|
||||
class birb_driver
|
||||
{
|
||||
private:
|
||||
DISALLOW_EVIL_CONSTRUCTORS(birb_driver);
|
||||
|
||||
public:
|
||||
enum birb_driver_status{
|
||||
BIRB_SUCCESS,
|
||||
BIRB_FAIL
|
||||
};
|
||||
enum birb_driver_type{
|
||||
BIRB_DRV_NVME,
|
||||
BIRB_DRV_BDEV
|
||||
};
|
||||
virtual size_t get_capacity() = 0;
|
||||
virtual birb_driver_status get_status() = 0;
|
||||
virtual size_t get_align() = 0;
|
||||
virtual birb_driver_type get_type() = 0;
|
||||
virtual ~birb_driver() = default;
|
||||
protected:
|
||||
birb_driver() = default;
|
||||
};
|
||||
|
||||
|
||||
class birb_driver_thread_context
|
||||
{
|
||||
private:
|
||||
DISALLOW_EVIL_CONSTRUCTORS(birb_driver_thread_context);
|
||||
|
||||
public:
|
||||
using callback = void (*)(bool, void *);
|
||||
virtual int read(size_t offset, size_t size, char * buffer, callback callback, void * context) = 0;
|
||||
virtual int write(size_t offset, size_t size, char * buffer, callback callback, void * context) = 0;
|
||||
virtual void poll() = 0;
|
||||
virtual birb_driver::birb_driver_status get_status() = 0;
|
||||
virtual ~birb_driver_thread_context() = default;
|
||||
protected:
|
||||
birb_driver_thread_context() = default;
|
||||
};
|
||||
|
@ -1,53 +0,0 @@
|
||||
#pragma once
|
||||
#include <sys/endian.h>
|
||||
#include <sys/types.h>
|
||||
#include "defs.hh"
|
||||
#include "gen.hh"
|
||||
#include <random>
|
||||
|
||||
enum io_generator_opcode {
|
||||
IOGEN_READ,
|
||||
IOGEN_WRITE
|
||||
};
|
||||
|
||||
enum io_generator_address_mode {
|
||||
IOGEN_ADDR_MONOTONIC_INCREASING,
|
||||
IOGEN_ADDR_UNIFORM_RANDOM
|
||||
};
|
||||
|
||||
struct io_generator_ctx {
|
||||
unsigned long size;
|
||||
uint64_t offset;
|
||||
io_generator_opcode op;
|
||||
};
|
||||
|
||||
//
|
||||
// cur_offset is aligned to req_size
|
||||
//
|
||||
class io_generator {
|
||||
public:
|
||||
int issue(struct io_generator_ctx * ctx, char * buf);
|
||||
io_generator(unsigned long req_size,
|
||||
unsigned long capacity,
|
||||
unsigned int read_pct,
|
||||
io_generator_address_mode addr_mode);
|
||||
io_generator() = delete;
|
||||
|
||||
private:
|
||||
unsigned long cur_offset;
|
||||
|
||||
const unsigned long capacity;
|
||||
const unsigned long req_size;
|
||||
const unsigned int read_pct;
|
||||
const io_generator_address_mode addr_mode;
|
||||
|
||||
std::random_device rd;
|
||||
std::mt19937 rng;
|
||||
std::uniform_int_distribution<int> dist;
|
||||
|
||||
std::random_device addr_rd;
|
||||
std::mt19937 addr_rng;
|
||||
std::uniform_int_distribution<uint64_t> addr_dist;
|
||||
|
||||
DISALLOW_EVIL_CONSTRUCTORS(io_generator);
|
||||
};
|
33
inc/util.h
Normal file
33
inc/util.h
Normal file
@ -0,0 +1,33 @@
|
||||
#pragma once
|
||||
#include <stdint.h>
|
||||
#include <time.h>
|
||||
#include <rte_ip.h>
|
||||
|
||||
constexpr static unsigned long S2NS = 100000000UL;
|
||||
constexpr static unsigned long S2US = 1000000L;
|
||||
constexpr static uint16_t SERVER_LOAD_PORT = 1234;
|
||||
constexpr static uint16_t SERVER_PROBE_PORT = 319;
|
||||
constexpr static uint32_t SERVER_IP = RTE_IPV4(192,168,123,0);
|
||||
|
||||
static inline uint64_t
|
||||
get_time_us()
|
||||
{
|
||||
struct timespec ts;
|
||||
clock_gettime(CLOCK_REALTIME, &ts);
|
||||
return ts.tv_sec * 1000000 + ts.tv_nsec / 1000;
|
||||
}
|
||||
|
||||
constexpr static int NEXT_CPU_NULL = -1;
|
||||
static inline int
|
||||
cmask_get_next_cpu(uint64_t * mask)
|
||||
{
|
||||
int ffs = ffsll(*mask);
|
||||
*mask &= ~(1 << (ffs - 1));
|
||||
return ffs - 1;
|
||||
}
|
||||
|
||||
static inline int
|
||||
cmask_get_num_cpus(const uint64_t mask)
|
||||
{
|
||||
return _mm_popcnt_u64(mask);
|
||||
}
|
557
khat/khat.cc
Normal file
557
khat/khat.cc
Normal file
@ -0,0 +1,557 @@
|
||||
#include <cstdio>
|
||||
#include <cassert>
|
||||
#include <ctime>
|
||||
#include <netinet/in.h>
|
||||
#include <rte_config.h>
|
||||
#include <rte_common.h>
|
||||
#include <rte_eal.h>
|
||||
#include <rte_ethdev.h>
|
||||
#include <rte_cycles.h>
|
||||
#include <rte_lcore.h>
|
||||
#include <rte_mbuf.h>
|
||||
#include <rte_ether.h>
|
||||
#include <rte_launch.h>
|
||||
#include <rte_log.h>
|
||||
#include <atomic>
|
||||
#include <vector>
|
||||
#include <fstream>
|
||||
#include <unistd.h>
|
||||
#include <signal.h>
|
||||
|
||||
#include "nm.h"
|
||||
#include "pkt.h"
|
||||
#include "ntr.h"
|
||||
#include "util.h"
|
||||
|
||||
|
||||
/* Protocol:
|
||||
* regular client:
|
||||
* client -> LOAD -> server
|
||||
* server -> LOAD_RESP -> client
|
||||
* measuring client:
|
||||
* client -> PROBE -> server (client tx timestamps)
|
||||
* server -> PROBE_RESP -> client (client rx timestamps and server tx/rx timestamps)
|
||||
* server -> STAT -> client (server sends its tx/rx timestamps)
|
||||
*/
|
||||
|
||||
static void * const PROBE_MAGIC = (void*)0x12344444;
|
||||
constexpr static unsigned int MBUF_MAX_COUNT = 65536;
|
||||
constexpr static unsigned int MBUF_CACHE_SIZE = 512;
|
||||
constexpr static unsigned int RX_RING_SIZE = 4096;
|
||||
constexpr static unsigned int TX_RING_SIZE = 4096;
|
||||
constexpr static unsigned int BURST_SIZE = 32;
|
||||
|
||||
|
||||
static const struct rte_eth_conf port_conf_default{};
|
||||
|
||||
// keep track of the probe state
|
||||
// when a probe packet first arrives this state is set to be influx and the rte_mbuf's userdata is set to PROBE_MAGIC
|
||||
// which prevents other probe packets to be processed
|
||||
// when the server sends the probe stats back to user influx is released
|
||||
// this is to guarantee that the server only processes one probe packet at the time
|
||||
// XXX: also this can be attached to the mbuf itself and processed by the lcore thread
|
||||
// I kept this global because globally there could be only one pending probe request
|
||||
// and rx_add_timestamp can save their shit here too
|
||||
struct probe_state_t {
|
||||
struct rte_ether_hdr hdr;
|
||||
uint32_t epoch;
|
||||
uint32_t timesync;
|
||||
uint64_t last_sw_rx;
|
||||
uint64_t last_sw_tx;
|
||||
uint64_t last_hw_rx;
|
||||
};
|
||||
|
||||
struct thread_info {
|
||||
int tid;
|
||||
int rxqid;
|
||||
int txqid;
|
||||
int lcore_id;
|
||||
};
|
||||
|
||||
// state machine:
|
||||
constexpr static int SERVER_STATE_WAIT = 0;
|
||||
constexpr static int SERVER_STATE_PROBE = 1;
|
||||
|
||||
struct options_t {
|
||||
//config
|
||||
int num_threads{1};
|
||||
uint64_t cpuset{0b010}; //2nd core
|
||||
|
||||
//states
|
||||
uint16_t s_portid;
|
||||
struct rte_ether_addr s_host_mac;
|
||||
struct rte_mempool * s_pkt_mempool;
|
||||
std::atomic<int> s_state {SERVER_STATE_WAIT};
|
||||
struct probe_state_t s_probe_info;
|
||||
std::vector<struct thread_info *> s_thr_info;
|
||||
};
|
||||
|
||||
static struct options_t options;
|
||||
|
||||
static uint16_t
|
||||
rx_add_timestamp(uint16_t port __rte_unused, uint16_t qidx __rte_unused,
|
||||
struct rte_mbuf **pkts, uint16_t nb_pkts, uint16_t max_pkts __rte_unused, void *_ __rte_unused)
|
||||
{
|
||||
uint64_t now = rte_rdtsc();
|
||||
struct timespec ts;
|
||||
struct pkt_hdr * pkt_data;
|
||||
for (int i = 0; i < nb_pkts; i++) {
|
||||
pkt_data = check_valid_packet(pkts[i]);
|
||||
|
||||
if (pkt_data == NULL) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "rx_add_timestamp: ignoring invalid packet %p.\n", (void*)pkts[i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (rte_be_to_cpu_16(pkt_data->type) == PKT_TYPE_PROBE) {
|
||||
int state_wait = SERVER_STATE_WAIT;
|
||||
pkts[i]->userdata = nullptr;
|
||||
if (rte_eth_timesync_read_rx_timestamp(port, &ts, pkts[i]->timesync & 0x3) == 0) {
|
||||
if (options.s_state.compare_exchange_strong(state_wait, SERVER_STATE_PROBE)) {
|
||||
// mark the mbuf as probe packet being processed
|
||||
// only the locore that receives the pkt w/ userdata != nullptr processes that packet
|
||||
pkts[i]->userdata = PROBE_MAGIC;
|
||||
// tag with timestamps
|
||||
options.s_probe_info.last_hw_rx = ts.tv_nsec + ts.tv_sec * S2NS;
|
||||
options.s_probe_info.last_sw_rx = now;
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "rx_add_timestamp: tagged packet %p epoch %d with sw: %llu hw:%llu.\n", (void*)pkts[i], options.s_probe_info.epoch, now, options.s_probe_info.last_hw_rx);
|
||||
} else
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING, "rx_add_timestamp: packet %p not tagged - server is processing a probe.\n", (void*)pkts[i]);
|
||||
} else
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING, "rx_add_timestamp: packet %p not tagged - hw rx timestamp not available.\n", (void*)pkts[i]);
|
||||
} else
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "rx_add_timestamp: packet %p not tagged - type %d.\n", (void*)pkts[i], rte_be_to_cpu_16(pkt_data->type));
|
||||
}
|
||||
|
||||
return nb_pkts;
|
||||
}
|
||||
|
||||
static uint16_t
|
||||
tx_add_timestamp(uint16_t port __rte_unused, uint16_t qidx __rte_unused,
|
||||
struct rte_mbuf **pkts, uint16_t nb_pkts, void *_ __rte_unused)
|
||||
{
|
||||
uint64_t now = rte_rdtsc();
|
||||
struct pkt_hdr * pkt_data;
|
||||
|
||||
for (int i = 0; i < nb_pkts; i++) {
|
||||
|
||||
pkt_data = check_valid_packet(pkts[i]);
|
||||
|
||||
if (pkt_data == NULL) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "tx_add_timestamp: ignoring invalid packet %p.\n", (void*)pkts[i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (rte_be_to_cpu_16(pkt_data->type) == PKT_TYPE_PROBE_RESP) {
|
||||
// this packet is the response to PROBE packets
|
||||
|
||||
// at this time the packet is not sent to the NIC yet so
|
||||
// the state must be waiting stats
|
||||
// XXX: this should be an assert
|
||||
if(options.s_state.load() != SERVER_STATE_PROBE || pkts[i]->userdata != PROBE_MAGIC) {
|
||||
rte_exit(EXIT_FAILURE, "packet %p sent to NIC before sw callback\n", (void*)pkts[i]);
|
||||
}
|
||||
|
||||
options.s_probe_info.last_sw_tx = now;
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "tx_add_timestamp: tagged packet %p with sw tx %llu\n", (void*)pkts[i], options.s_probe_info.last_sw_tx);
|
||||
} else {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "tx_add_timestamp: packet %p not tagged - type %d\n", (void*)pkts[i], pkt_data->type);
|
||||
}
|
||||
}
|
||||
|
||||
return nb_pkts;
|
||||
}
|
||||
|
||||
static int
|
||||
locore_main(void * ti)
|
||||
{
|
||||
struct thread_info * tinfo = (struct thread_info *)ti;
|
||||
struct rte_mbuf *bufs[BURST_SIZE];
|
||||
// + 1 because it might involve an extra PKT_TYPE_STAT packet
|
||||
// when all tx timestamps are ready
|
||||
struct rte_mbuf *tx_bufs[BURST_SIZE];
|
||||
struct pkt_hdr *pkt_data;
|
||||
|
||||
bool pending_probe = false;
|
||||
|
||||
if (rte_eth_dev_socket_id(options.s_portid) > 0 && rte_eth_dev_socket_id(options.s_portid) != (int)rte_socket_id()) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING, "locore_main <thread %d>: WARNING, port %d is on remote NUMA node to "
|
||||
"polling thread.\n\tPerformance will "
|
||||
"not be optimal.\n", tinfo->tid, options.s_portid);
|
||||
}
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "locore_main <thread %d>: running on locore %d with txidx %d and rxidx %d.\n", tinfo->tid, rte_lcore_id(), tinfo->txqid, tinfo->rxqid);
|
||||
|
||||
while(true) {
|
||||
uint16_t nb_tx = 0;
|
||||
const uint16_t nb_rx = rte_eth_rx_burst(options.s_portid, tinfo->rxqid, bufs, BURST_SIZE);
|
||||
struct rte_mbuf * pkt_buf;
|
||||
struct pkt_hdr * tx_data;
|
||||
|
||||
for(int i = 0; i < nb_rx; i++) {
|
||||
// XXX: optimization: in rx_add_timestamp every packet is already validated once
|
||||
// can just mark valid packet with a value so we can avoid this redundant check
|
||||
pkt_data = check_valid_packet(bufs[i]);
|
||||
|
||||
if (pkt_data == NULL) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "locore_main <thread %d>: skipping invalid packet %p.\n", tinfo->tid, (void*)bufs[i]);
|
||||
//dump_pkt(bufs[i]);
|
||||
rte_pktmbuf_free(bufs[i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "locore_main <thread %d>: packet %p from %x:%x:%x:%x:%x:%x to %x:%x:%x:%x:%x:%x, type %d\n",
|
||||
tinfo->tid,
|
||||
(void*)bufs[i],
|
||||
pkt_data->eth_hdr.s_addr.addr_bytes[0],
|
||||
pkt_data->eth_hdr.s_addr.addr_bytes[1],
|
||||
pkt_data->eth_hdr.s_addr.addr_bytes[2],
|
||||
pkt_data->eth_hdr.s_addr.addr_bytes[3],
|
||||
pkt_data->eth_hdr.s_addr.addr_bytes[4],
|
||||
pkt_data->eth_hdr.s_addr.addr_bytes[5],
|
||||
pkt_data->eth_hdr.d_addr.addr_bytes[0],
|
||||
pkt_data->eth_hdr.d_addr.addr_bytes[1],
|
||||
pkt_data->eth_hdr.d_addr.addr_bytes[2],
|
||||
pkt_data->eth_hdr.d_addr.addr_bytes[3],
|
||||
pkt_data->eth_hdr.d_addr.addr_bytes[4],
|
||||
pkt_data->eth_hdr.d_addr.addr_bytes[5],
|
||||
rte_be_to_cpu_16(pkt_data->type));
|
||||
|
||||
|
||||
switch (rte_be_to_cpu_16(pkt_data->type)) {
|
||||
case PKT_TYPE_PROBE: {
|
||||
if (options.s_state.load() == SERVER_STATE_PROBE && bufs[i]->userdata == PROBE_MAGIC) {
|
||||
// send back probe_resp pkt to probe for return latency
|
||||
pending_probe = true;
|
||||
|
||||
// book keep probe results
|
||||
options.s_probe_info.epoch = rte_be_to_cpu_32(((struct pkt_payload_epoch *)pkt_data->payload)->epoch);
|
||||
options.s_probe_info.timesync = bufs[i]->timesync;
|
||||
rte_memcpy(&options.s_probe_info.hdr, &pkt_data->eth_hdr, sizeof(struct rte_ether_hdr));
|
||||
|
||||
pkt_buf = rte_pktmbuf_alloc(options.s_pkt_mempool);
|
||||
|
||||
if (pkt_buf == NULL) {
|
||||
rte_exit(EXIT_FAILURE, "failed to allocate memory for pkt_buf\n");
|
||||
}
|
||||
|
||||
tx_data = construct_pkt_hdr(pkt_buf, PKT_TYPE_PROBE_RESP,
|
||||
&options.s_host_mac,
|
||||
&pkt_data->eth_hdr.s_addr);
|
||||
|
||||
if (tx_data == NULL) {
|
||||
rte_exit(EXIT_FAILURE, "failed to construct tx packet %p", (void*)pkt_buf);
|
||||
}
|
||||
|
||||
rte_memcpy(tx_data->payload, pkt_data->payload, sizeof(struct pkt_payload_epoch));
|
||||
|
||||
pkt_buf->userdata = PROBE_MAGIC;
|
||||
|
||||
// queue for burst send
|
||||
tx_bufs[nb_tx++] = pkt_buf;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PKT_TYPE_LOAD: {
|
||||
// we reply to load packet regardless of the server state
|
||||
pkt_buf = rte_pktmbuf_alloc(options.s_pkt_mempool);
|
||||
|
||||
if (pkt_buf == NULL) {
|
||||
rte_exit(EXIT_FAILURE, "failed to allocate memory for pkt_buf\n");
|
||||
}
|
||||
|
||||
tx_data = construct_pkt_hdr(pkt_buf, PKT_TYPE_LOAD_RESP,
|
||||
&options.s_host_mac,
|
||||
&pkt_data->eth_hdr.s_addr);
|
||||
|
||||
if (tx_data == NULL) {
|
||||
rte_exit(EXIT_FAILURE, "failed to construct tx packet %p", (void*)pkt_buf);
|
||||
}
|
||||
|
||||
rte_memcpy(tx_data->payload, pkt_data->payload, sizeof(struct pkt_payload_epoch));
|
||||
|
||||
// queue for burst send
|
||||
tx_bufs[nb_tx++] = pkt_buf;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
rte_pktmbuf_free(bufs[i]);
|
||||
}
|
||||
|
||||
// send the packets
|
||||
if (nb_tx > 0) {
|
||||
const uint16_t nb_tx_succ = rte_eth_tx_burst(options.s_portid, tinfo->txqid, tx_bufs, nb_tx);
|
||||
if (nb_tx_succ < nb_tx) {
|
||||
rte_exit(EXIT_FAILURE, "failed to send some packets.\n");
|
||||
}
|
||||
}
|
||||
|
||||
// we wanna check every loop not only when there are packets
|
||||
if (pending_probe) {
|
||||
struct timespec ts;
|
||||
struct pkt_payload_stat * stat;
|
||||
if (rte_eth_timesync_read_tx_timestamp(options.s_portid, &ts) == 0) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "locore_main <thread %d>: obtained hw tx timestamp %lld.\n", tinfo->tid, ts.tv_sec * S2NS + ts.tv_nsec);
|
||||
// now we have everything we need
|
||||
pkt_buf = rte_pktmbuf_alloc(options.s_pkt_mempool);
|
||||
if (pkt_buf == NULL) {
|
||||
rte_exit(EXIT_FAILURE, "failed to allocate memory for pkt_buf\n");
|
||||
}
|
||||
|
||||
tx_data = construct_pkt_hdr(pkt_buf, PKT_TYPE_STAT,
|
||||
&options.s_host_mac,
|
||||
&options.s_probe_info.hdr.s_addr);
|
||||
|
||||
// populate stats
|
||||
stat = (struct pkt_payload_stat *)tx_data->payload;
|
||||
stat->epoch = rte_cpu_to_be_32(options.s_probe_info.epoch);
|
||||
stat->hw_rx = rte_cpu_to_be_64(options.s_probe_info.last_hw_rx);
|
||||
stat->hw_tx = rte_cpu_to_be_64(ts.tv_nsec + ts.tv_sec * S2NS);
|
||||
stat->sw_rx = rte_cpu_to_be_64(options.s_probe_info.last_sw_rx);
|
||||
stat->sw_tx = rte_cpu_to_be_64(options.s_probe_info.last_sw_tx);
|
||||
|
||||
// send the packet
|
||||
if (rte_eth_tx_burst(options.s_portid, 0, &pkt_buf, 1) < 1) {
|
||||
rte_exit(EXIT_FAILURE, "failed to send some packets.\n");
|
||||
}
|
||||
|
||||
// release flux
|
||||
pending_probe = false;
|
||||
|
||||
int expected = SERVER_STATE_PROBE;
|
||||
if (!options.s_state.compare_exchange_strong(expected, SERVER_STATE_WAIT)) {
|
||||
rte_exit(EXIT_FAILURE, "s_state changed unexpectedly!");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
port_init(uint16_t portid, struct rte_mempool *mbuf_pool)
|
||||
{
|
||||
struct rte_eth_dev_info dev_info;
|
||||
struct rte_eth_conf port_conf = port_conf_default;
|
||||
struct rte_eth_txconf txconf;
|
||||
struct rte_eth_rxconf rxconf;
|
||||
|
||||
uint16_t nb_rxd = RX_RING_SIZE;
|
||||
uint16_t nb_txd = TX_RING_SIZE;
|
||||
|
||||
if(!rte_eth_dev_is_valid_port(portid)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
int ret = rte_eth_dev_info_get(portid, &dev_info);
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
port_conf.rxmode.max_rx_pkt_len = RTE_ETHER_MAX_LEN;
|
||||
port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_UDP_CKSUM;
|
||||
port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_IPV4_CKSUM;
|
||||
port_conf.txmode.offloads |= DEV_TX_OFFLOAD_UDP_CKSUM;
|
||||
port_conf.txmode.offloads |= DEV_TX_OFFLOAD_IPV4_CKSUM;
|
||||
port_conf.txmode.offloads |= DEV_TX_OFFLOAD_MBUF_FAST_FREE;
|
||||
|
||||
/* Configure the Ethernet device. */
|
||||
ret = rte_eth_dev_configure(portid, options.num_threads, options.num_threads, &port_conf);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
||||
ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd, &nb_txd);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
||||
/* Allocate and set up 1 RX queue per thread per Ethernet port. */
|
||||
rxconf = dev_info.default_rxconf;
|
||||
for (int i = 0; i < options.num_threads; i++) {
|
||||
ret = rte_eth_rx_queue_setup(portid, i, nb_rxd, rte_eth_dev_socket_id(portid), &rxconf, mbuf_pool);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
options.s_thr_info.at(i)->rxqid = i;
|
||||
}
|
||||
|
||||
txconf = dev_info.default_txconf;
|
||||
txconf.offloads = port_conf.txmode.offloads;
|
||||
/* Allocate and set up 1 TX queue per thread per Ethernet port. */
|
||||
for (int i = 0; i < options.num_threads; i++) {
|
||||
ret = rte_eth_tx_queue_setup(portid, i, nb_txd, rte_eth_dev_socket_id(portid), &txconf);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
options.s_thr_info.at(i)->txqid = i;
|
||||
}
|
||||
|
||||
ret = rte_eth_dev_start(portid);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
/* Display the port MAC address. */
|
||||
struct rte_ether_addr addr;
|
||||
ret = rte_eth_macaddr_get(portid, &addr);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
||||
ret = rte_eth_timesync_enable(portid);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
||||
/* Enable RX in promiscuous mode for the Ethernet device. */
|
||||
ret = rte_eth_promiscuous_enable(portid);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
||||
for (int i = 0; i < options.num_threads; i++) {
|
||||
if (rte_eth_add_tx_callback(portid, options.s_thr_info.at(i)->txqid, tx_add_timestamp, NULL) == NULL ||
|
||||
rte_eth_add_rx_callback(portid, options.s_thr_info.at(i)->rxqid, rx_add_timestamp, NULL) == NULL) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void usage()
|
||||
{
|
||||
fprintf(stdout,
|
||||
"Usage:\n" \
|
||||
" -v(vv): verbose mode\n" \
|
||||
" -h: seek help\n" \
|
||||
" -A: cpu mask for worker threads\n");
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
static void dump_options()
|
||||
{
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO,
|
||||
"main: khat configuration:\n" \
|
||||
" verbosity: +%d\n" \
|
||||
" thread count: %d\n" \
|
||||
" thread mask: %lld\n\n",
|
||||
ntr_get_level(NTR_DEP_USER1) - NTR_LEVEL_DEFAULT,
|
||||
options.num_threads,
|
||||
options.cpuset);
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
unsigned int nb_ports;
|
||||
struct rte_mempool *mbuf_pool;
|
||||
|
||||
ntr_init();
|
||||
|
||||
if (nm_init() != 0) {
|
||||
rte_exit(EXIT_FAILURE, "nm init failed!\n");
|
||||
}
|
||||
|
||||
// init dpdk
|
||||
int ret = rte_eal_init(argc, argv);
|
||||
if (ret < 0) {
|
||||
rte_exit(EXIT_FAILURE, "rte_eal_init failed!\n");
|
||||
}
|
||||
|
||||
argc -= ret;
|
||||
argv += ret;
|
||||
|
||||
// set warning level
|
||||
ntr_set_level(NTR_DEP_USER1, NTR_LEVEL_WARNING);
|
||||
{
|
||||
int c;
|
||||
// parse arguments
|
||||
while((c = getopt(argc, argv, "hvA:")) != -1) {
|
||||
switch (c) {
|
||||
case 'v':
|
||||
ntr_set_level(NTR_DEP_USER1, ntr_get_level(NTR_DEP_USER1) + 1);
|
||||
break;
|
||||
case 'h':
|
||||
usage();
|
||||
rte_exit(EXIT_SUCCESS, "\n");
|
||||
break;
|
||||
case 'A':
|
||||
options.cpuset = strtoull(optarg, nullptr, 16);
|
||||
options.num_threads = cmask_get_num_cpus(options.cpuset);
|
||||
if (options.num_threads == 0) {
|
||||
rte_exit(EXIT_FAILURE, "must run at least one thread\n");
|
||||
}
|
||||
break;
|
||||
default:
|
||||
usage();
|
||||
rte_exit(EXIT_SUCCESS, "unknown argument: %c", c);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
dump_options();
|
||||
|
||||
nb_ports = rte_eth_dev_count_avail();
|
||||
if (nb_ports == 0) {
|
||||
rte_exit(EXIT_FAILURE, "number of ports must be > 0\n");
|
||||
}
|
||||
|
||||
uint16_t portid = rte_eth_find_next(0);
|
||||
if (portid == RTE_MAX_ETHPORTS) {
|
||||
rte_exit(EXIT_FAILURE, "cannot find an available port\n");
|
||||
}
|
||||
options.s_portid = portid;
|
||||
|
||||
// create a mbuf memory pool on the socket
|
||||
mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", MBUF_MAX_COUNT * nb_ports, MBUF_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_eth_dev_socket_id(portid));
|
||||
if (mbuf_pool == nullptr) {
|
||||
rte_exit(EXIT_FAILURE, "cannot create mbuf pool\n");
|
||||
}
|
||||
|
||||
options.s_pkt_mempool = mbuf_pool;
|
||||
|
||||
// init threads
|
||||
uint64_t cpuset = options.cpuset;
|
||||
for(int i = 0; i < options.num_threads; i++) {
|
||||
struct thread_info * tinfo = new thread_info;
|
||||
tinfo->tid = i;
|
||||
tinfo->lcore_id = cmask_get_next_cpu(&cpuset);
|
||||
options.s_thr_info.push_back(tinfo);
|
||||
}
|
||||
|
||||
if (port_init(portid, mbuf_pool) != 0) {
|
||||
rte_exit(EXIT_FAILURE, "cannot init port %d\n", portid);
|
||||
}
|
||||
|
||||
if (rte_eth_macaddr_get(portid, &options.s_host_mac) != 0) {
|
||||
rte_exit(EXIT_FAILURE, "cannot get mac address of port %d\n", portid);
|
||||
}
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "Configured port %d on socket %d with mac addr %x:%x:%x:%x:%x:%x\n", portid, rte_eth_dev_socket_id(portid),
|
||||
options.s_host_mac.addr_bytes[0],
|
||||
options.s_host_mac.addr_bytes[1],
|
||||
options.s_host_mac.addr_bytes[2],
|
||||
options.s_host_mac.addr_bytes[3],
|
||||
options.s_host_mac.addr_bytes[4],
|
||||
options.s_host_mac.addr_bytes[5]);
|
||||
|
||||
usleep(S2US);
|
||||
|
||||
for(int i = 0; i < options.num_threads; i++) {
|
||||
struct thread_info * tinfo = options.s_thr_info.at(i);
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: launching thread %d on locore %d\n", tinfo->tid, tinfo->lcore_id);
|
||||
if (rte_eal_remote_launch(locore_main, (void *)options.s_thr_info.at(i), tinfo->lcore_id) != 0) {
|
||||
rte_exit(EXIT_FAILURE, "failed to launch function on locore %d\n", tinfo->lcore_id);
|
||||
}
|
||||
}
|
||||
|
||||
for(int i = 0; i < options.num_threads; i++) {
|
||||
struct thread_info * tinfo = options.s_thr_info.at(i);
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: waiting for locore %d...\n", tinfo->lcore_id);
|
||||
if (rte_eal_wait_lcore(tinfo->lcore_id) != 0) {
|
||||
rte_exit(EXIT_FAILURE, "failed to wait for locore %d\n", tinfo->lcore_id);
|
||||
}
|
||||
}
|
||||
|
||||
// shouldn't get here
|
||||
|
||||
return 0;
|
||||
}
|
@ -1,95 +1,74 @@
|
||||
// modified from mutilate
|
||||
|
||||
#include "gen.hh"
|
||||
#include "gen.h"
|
||||
|
||||
Generator *
|
||||
createFacebookKey()
|
||||
{
|
||||
return new GEV(30.7984, 8.20449, 0.078688);
|
||||
Generator* createFacebookKey() { return new GEV(30.7984, 8.20449, 0.078688); }
|
||||
|
||||
Generator* createFacebookValue() {
|
||||
Generator* g = new GPareto(15.0, 214.476, 0.348238);
|
||||
|
||||
Discrete* d = new Discrete(g);
|
||||
d->add(0.00536, 0.0);
|
||||
d->add(0.00047, 1.0);
|
||||
d->add(0.17820, 2.0);
|
||||
d->add(0.09239, 3.0);
|
||||
d->add(0.00018, 4.0);
|
||||
d->add(0.02740, 5.0);
|
||||
d->add(0.00065, 6.0);
|
||||
d->add(0.00606, 7.0);
|
||||
d->add(0.00023, 8.0);
|
||||
d->add(0.00837, 9.0);
|
||||
d->add(0.00837, 10.0);
|
||||
d->add(0.08989, 11.0);
|
||||
d->add(0.00092, 12.0);
|
||||
d->add(0.00326, 13.0);
|
||||
d->add(0.01980, 14.0);
|
||||
|
||||
return d;
|
||||
}
|
||||
|
||||
Generator *
|
||||
createFacebookValue()
|
||||
{
|
||||
Generator *g = new GPareto(15.0, 214.476, 0.348238);
|
||||
Generator* createFacebookIA() { return new GPareto(0, 16.0292, 0.154971); }
|
||||
|
||||
Discrete *d = new Discrete(g);
|
||||
d->add(0.00536, 0.0);
|
||||
d->add(0.00047, 1.0);
|
||||
d->add(0.17820, 2.0);
|
||||
d->add(0.09239, 3.0);
|
||||
d->add(0.00018, 4.0);
|
||||
d->add(0.02740, 5.0);
|
||||
d->add(0.00065, 6.0);
|
||||
d->add(0.00606, 7.0);
|
||||
d->add(0.00023, 8.0);
|
||||
d->add(0.00837, 9.0);
|
||||
d->add(0.00837, 10.0);
|
||||
d->add(0.08989, 11.0);
|
||||
d->add(0.00092, 12.0);
|
||||
d->add(0.00326, 13.0);
|
||||
d->add(0.01980, 14.0);
|
||||
Generator* createGenerator(std::string str) {
|
||||
if (!strcmp(str.c_str(), "fb_key")) return createFacebookKey();
|
||||
else if (!strcmp(str.c_str(), "fb_value")) return createFacebookValue();
|
||||
else if (!strcmp(str.c_str(), "fb_ia")) return createFacebookIA();
|
||||
|
||||
return d;
|
||||
}
|
||||
char *s_copy = new char[str.length() + 1];
|
||||
strcpy(s_copy, str.c_str());
|
||||
char *saveptr = NULL;
|
||||
|
||||
Generator *
|
||||
createFacebookIA()
|
||||
{
|
||||
return new GPareto(0, 16.0292, 0.154971);
|
||||
}
|
||||
if (atoi(s_copy) != 0 || !strcmp(s_copy, "0")) {
|
||||
double v = atof(s_copy);
|
||||
delete[] s_copy;
|
||||
return new Fixed(v);
|
||||
}
|
||||
|
||||
Generator *
|
||||
createGenerator(std::string str)
|
||||
{
|
||||
if (!strcmp(str.c_str(), "fb_key"))
|
||||
return createFacebookKey();
|
||||
else if (!strcmp(str.c_str(), "fb_value"))
|
||||
return createFacebookValue();
|
||||
else if (!strcmp(str.c_str(), "fb_ia"))
|
||||
return createFacebookIA();
|
||||
char *t_ptr = strtok_r(s_copy, ":", &saveptr);
|
||||
char *a_ptr = strtok_r(NULL, ":", &saveptr);
|
||||
|
||||
char *s_copy = new char[str.length() + 1];
|
||||
strcpy(s_copy, str.c_str());
|
||||
char *saveptr = NULL;
|
||||
if (t_ptr == NULL) // || a_ptr == NULL)
|
||||
DIE("strtok(.., \":\") failed to parse %s", str.c_str());
|
||||
|
||||
if (atoi(s_copy) != 0 || !strcmp(s_copy, "0")) {
|
||||
double v = atof(s_copy);
|
||||
delete[] s_copy;
|
||||
return new Fixed(v);
|
||||
}
|
||||
saveptr = NULL;
|
||||
char *s1 = strtok_r(a_ptr, ",", &saveptr);
|
||||
char *s2 = strtok_r(NULL, ",", &saveptr);
|
||||
char *s3 = strtok_r(NULL, ",", &saveptr);
|
||||
|
||||
char *t_ptr = strtok_r(s_copy, ":", &saveptr);
|
||||
char *a_ptr = strtok_r(NULL, ":", &saveptr);
|
||||
double a1 = s1 ? atof(s1) : 0.0;
|
||||
double a2 = s2 ? atof(s2) : 0.0;
|
||||
double a3 = s3 ? atof(s3) : 0.0;
|
||||
|
||||
if (t_ptr == NULL) // || a_ptr == NULL)
|
||||
DIE("strtok(.., \":\") failed to parse %s", str.c_str());
|
||||
delete[] s_copy;
|
||||
|
||||
saveptr = NULL;
|
||||
char *s1 = strtok_r(a_ptr, ",", &saveptr);
|
||||
char *s2 = strtok_r(NULL, ",", &saveptr);
|
||||
char *s3 = strtok_r(NULL, ",", &saveptr);
|
||||
if (strcasestr(str.c_str(), "fixed")) return new Fixed(a1);
|
||||
else if (strcasestr(str.c_str(), "normal")) return new Normal(a1, a2);
|
||||
else if (strcasestr(str.c_str(), "exponential")) return new Exponential(a1);
|
||||
else if (strcasestr(str.c_str(), "pareto")) return new GPareto(a1, a2, a3);
|
||||
else if (strcasestr(str.c_str(), "gev")) return new GEV(a1, a2, a3);
|
||||
else if (strcasestr(str.c_str(), "uniform")) return new Uniform(a1);
|
||||
|
||||
double a1 = s1 ? atof(s1) : 0.0;
|
||||
double a2 = s2 ? atof(s2) : 0.0;
|
||||
double a3 = s3 ? atof(s3) : 0.0;
|
||||
DIE("Unable to create Generator '%s'", str.c_str());
|
||||
|
||||
delete[] s_copy;
|
||||
|
||||
if (strcasestr(str.c_str(), "fixed"))
|
||||
return new Fixed(a1);
|
||||
else if (strcasestr(str.c_str(), "normal"))
|
||||
return new Normal(a1, a2);
|
||||
else if (strcasestr(str.c_str(), "exponential"))
|
||||
return new Exponential(a1);
|
||||
else if (strcasestr(str.c_str(), "pareto"))
|
||||
return new GPareto(a1, a2, a3);
|
||||
else if (strcasestr(str.c_str(), "gev"))
|
||||
return new GEV(a1, a2, a3);
|
||||
else if (strcasestr(str.c_str(), "uniform"))
|
||||
return new Uniform(a1);
|
||||
|
||||
DIE("Unable to create Generator '%s'", str.c_str());
|
||||
|
||||
return NULL;
|
||||
return NULL;
|
||||
}
|
@ -1,276 +0,0 @@
|
||||
#include <sys/types.h>
|
||||
#include <sys/cpuset.h>
|
||||
#include <sys/domainset.h>
|
||||
#include <sys/endian.h>
|
||||
#include <sys/thr.h>
|
||||
|
||||
#include <pthread.h>
|
||||
#include <pthread_np.h>
|
||||
#include <topo.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "nms.h"
|
||||
#include "gen.hh"
|
||||
|
||||
#include <atomic>
|
||||
|
||||
void *
|
||||
memload_generator::worker_thrd(void *_tinfo)
|
||||
{
|
||||
auto *tinfo = (struct thread_info *)_tinfo;
|
||||
void *from_buffer, *to_buffer, *tmp;
|
||||
|
||||
if (tinfo->opts->shared_buffer) {
|
||||
from_buffer = tinfo->from_buffer;
|
||||
to_buffer = tinfo->to_buffer;
|
||||
} else {
|
||||
if (tinfo->opts->verbose) {
|
||||
fprintf(stdout,
|
||||
"memload_generator <thread %d>: allocating fbuf %lu bytes on domain %d...\n",
|
||||
tinfo->tid, tinfo->opts->buffer_size,
|
||||
topo_core_to_numa(tinfo->coreid));
|
||||
}
|
||||
from_buffer = nms_alloc_static(topo_core_to_numa(
|
||||
tinfo->coreid),
|
||||
tinfo->opts->buffer_size);
|
||||
if (tinfo->opts->verbose) {
|
||||
fprintf(stdout,
|
||||
"memload_generator <thread %d>: allocating tbuf %lu bytes on domain %d...\n",
|
||||
tinfo->tid, tinfo->opts->buffer_size, tinfo->target_dom);
|
||||
}
|
||||
to_buffer = nms_alloc_static(tinfo->target_dom,
|
||||
tinfo->opts->buffer_size);
|
||||
}
|
||||
|
||||
if (from_buffer == nullptr || to_buffer == nullptr) {
|
||||
if (tinfo->opts->verbose) {
|
||||
fprintf(stderr,
|
||||
"memload_generator <thread %d>: failed to allocate memory\n",
|
||||
tinfo->tid);
|
||||
}
|
||||
tinfo->init_status.store(-1);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (tinfo->pull) {
|
||||
tmp = from_buffer;
|
||||
from_buffer = to_buffer;
|
||||
to_buffer = tmp;
|
||||
}
|
||||
|
||||
// wait for other threads to init
|
||||
if (tinfo->opts->verbose) {
|
||||
fprintf(stdout, "memload_generator <thread %d, pull %d>: running...\n", tinfo->tid, tinfo->pull);
|
||||
}
|
||||
tinfo->init_status.store(1);
|
||||
|
||||
uint64_t next_ts = topo_uptime_ns();
|
||||
size_t cur_offset = 0;
|
||||
uint64_t cur_ts = 0;
|
||||
while (true) {
|
||||
switch (tinfo->state->load()) {
|
||||
case STATE_RUN:
|
||||
cur_ts = topo_uptime_ns();
|
||||
if (cur_ts >= next_ts) {
|
||||
if (cur_offset + tinfo->opts->transaction_size >
|
||||
tinfo->opts->buffer_size) {
|
||||
cur_offset = 0;
|
||||
}
|
||||
// for (uint i = 0; i < tinfo->opts->transaction_size; i++) {
|
||||
// ((char *)to_buffer)[cur_offset + i] = ((char *)from_buffer)[cur_offset + i];
|
||||
// }
|
||||
memcpy((char *)to_buffer + cur_offset,
|
||||
(char *)from_buffer + cur_offset,
|
||||
tinfo->opts->transaction_size);
|
||||
tinfo->num_trans.fetch_add(1);
|
||||
|
||||
if (tinfo->reset_ts.load(
|
||||
std::memory_order_relaxed)) {
|
||||
tinfo->reset_ts.store(false,
|
||||
std::memory_order_relaxed);
|
||||
next_ts = cur_ts;
|
||||
}
|
||||
next_ts += tinfo->ia_gen->generate() *
|
||||
(double)S2NS;
|
||||
cur_offset += tinfo->opts->transaction_size;
|
||||
}
|
||||
break;
|
||||
case STATE_END:
|
||||
goto end;
|
||||
case STATE_RDY:
|
||||
next_ts = topo_uptime_ns();
|
||||
break;
|
||||
case STATE_INIT:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
end:
|
||||
if (tinfo->opts->verbose) {
|
||||
fprintf(stdout, "memload_generator <thread %d>: exiting...\n",
|
||||
tinfo->tid);
|
||||
}
|
||||
|
||||
if (!tinfo->opts->shared_buffer) {
|
||||
nms_free_static(from_buffer, tinfo->opts->buffer_size);
|
||||
nms_free_static(to_buffer, tinfo->opts->buffer_size);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
memload_generator::memload_generator(cpuset_t *threads, cpuset_t * modes, cpuset_t *target_domain,
|
||||
struct memload_generator_options *opt, bool *success)
|
||||
{
|
||||
*success = false;
|
||||
state.store(STATE_INIT);
|
||||
std::memcpy(&this->opts, opt, sizeof(memload_generator_options));
|
||||
|
||||
int nextcore = CPU_FFS(threads) - 1;
|
||||
int target_domain_id = CPU_FFS(target_domain) - 1;
|
||||
int num_cores = CPU_COUNT(threads);
|
||||
if (target_domain_id < 0 || num_cores == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
double thread_tps = (double)opt->trans_per_second / (double)num_cores;
|
||||
void *local_buffer = nullptr;
|
||||
void *target_buffer = nullptr;
|
||||
int tid = 0;
|
||||
|
||||
if (opts.shared_buffer) {
|
||||
local_buffer = nms_alloc_static(topo_core_to_numa(nextcore),
|
||||
opt->buffer_size);
|
||||
target_buffer = nms_alloc_static(target_domain_id,
|
||||
opt->buffer_size);
|
||||
if (local_buffer == nullptr || target_buffer == nullptr) {
|
||||
*success = false;
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
|
||||
while (nextcore != -1) {
|
||||
auto info = new struct thread_info;
|
||||
cpuset_t cpuset;
|
||||
pthread_attr_t attr;
|
||||
|
||||
info->ia_gen = createGenerator(opts.ia_dist);
|
||||
if (info->ia_gen == nullptr) {
|
||||
goto end;
|
||||
}
|
||||
info->ia_gen->set_lambda(thread_tps);
|
||||
info->init_status.store(0);
|
||||
info->state = &this->state;
|
||||
info->reset_ts.store(false, std::memory_order_relaxed);
|
||||
info->num_trans.store(0);
|
||||
info->opts = &this->opts;
|
||||
info->tid = tid;
|
||||
info->coreid = nextcore;
|
||||
info->target_dom = target_domain_id;
|
||||
info->from_buffer = local_buffer;
|
||||
info->to_buffer = target_buffer;
|
||||
info->pull = CPU_ISSET(nextcore, modes);
|
||||
|
||||
CPU_ZERO(&cpuset);
|
||||
CPU_SET(nextcore, &cpuset);
|
||||
pthread_attr_init(&attr);
|
||||
pthread_attr_setaffinity_np(&attr, sizeof(cpuset_t), &cpuset);
|
||||
pthread_create(&info->pthr, &attr, worker_thrd, info);
|
||||
|
||||
if (opts.verbose) {
|
||||
fprintf(stdout,
|
||||
"memload_generator: created thread %d on core %d target domain %d\n",
|
||||
tid, nextcore, target_domain_id);
|
||||
}
|
||||
|
||||
thr_infos.push_back(info);
|
||||
|
||||
CPU_CLR(nextcore, threads);
|
||||
nextcore = CPU_FFS(threads) - 1;
|
||||
tid++;
|
||||
}
|
||||
|
||||
for (auto tinfo : thr_infos) {
|
||||
int status;
|
||||
while ((status = tinfo->init_status.load()) != 1) {
|
||||
if (status == -1) {
|
||||
state.store(STATE_END);
|
||||
*success = false;
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
state.store(STATE_RDY);
|
||||
|
||||
*success = true;
|
||||
end:
|
||||
if (opts.verbose) {
|
||||
fprintf(stdout,
|
||||
"memload_generator: exiting constructor. Success: %d...\n",
|
||||
success ? 1 : 0);
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
memload_generator::start()
|
||||
{
|
||||
if (this->state.load() == STATE_RDY) {
|
||||
this->state.store(memload_generator::STATE_RUN);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
memload_generator::stop()
|
||||
{
|
||||
if (this->state.load() == STATE_RUN) {
|
||||
this->state.store(memload_generator::STATE_RDY);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
memload_generator::set_transactions(uint64_t tps)
|
||||
{
|
||||
if (this->state.load() != STATE_END &&
|
||||
this->state.load() != STATE_INIT) {
|
||||
for (unsigned int i = 0; i < thr_infos.size(); i++) {
|
||||
thr_infos.at(i)->ia_gen->set_lambda(
|
||||
(double)tps / (double)thr_infos.size());
|
||||
thr_infos.at(i)->reset_ts.store(true,
|
||||
std::memory_order_relaxed);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
memload_generator::get_transactions()
|
||||
{
|
||||
uint64_t total_transactions = 0;
|
||||
for (auto i : thr_infos) {
|
||||
total_transactions += i->num_trans.load();
|
||||
}
|
||||
return total_transactions;
|
||||
}
|
||||
|
||||
memload_generator::~memload_generator()
|
||||
{
|
||||
void *buf1, *buf2;
|
||||
this->state.store(STATE_END);
|
||||
for (auto i : thr_infos) {
|
||||
// XXX: nms_free regions
|
||||
pthread_join(i->pthr, NULL);
|
||||
buf1 = i->from_buffer;
|
||||
buf2 = i->to_buffer;
|
||||
delete i;
|
||||
}
|
||||
|
||||
if (opts.shared_buffer) {
|
||||
nms_free_static(buf1, opts.buffer_size);
|
||||
nms_free_static(buf2, opts.buffer_size);
|
||||
}
|
||||
}
|
127
libnm/nm.cc
Normal file
127
libnm/nm.cc
Normal file
@ -0,0 +1,127 @@
|
||||
#include <hwloc.h>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
|
||||
#include "nm.h"
|
||||
|
||||
struct nm_obj {
|
||||
int level;
|
||||
int id;
|
||||
struct nm_obj *parent;
|
||||
std::vector<struct nm_obj *> children;
|
||||
};
|
||||
|
||||
static bool nm_obj_comparator(struct nm_obj * a, struct nm_obj * b)
|
||||
{
|
||||
return a->id < b->id;
|
||||
}
|
||||
|
||||
static std::vector<struct nm_obj *> nodes;
|
||||
static std::vector<struct nm_obj *> cores;
|
||||
static std::vector<struct nm_obj *> cpus;
|
||||
|
||||
std::vector<struct nm_obj *> * nm_get_nodes()
|
||||
{
|
||||
return &nodes;
|
||||
}
|
||||
|
||||
std::vector<struct nm_obj *> * nm_get_cpus()
|
||||
{
|
||||
return &cpus;
|
||||
}
|
||||
|
||||
std::vector<struct nm_obj *> * nm_get_cores()
|
||||
{
|
||||
return &cores;
|
||||
}
|
||||
|
||||
hwloc_obj_t get_parent_type(hwloc_obj_t obj, hwloc_obj_type_t type)
|
||||
{
|
||||
while(obj != nullptr) {
|
||||
if (obj->type == type) {
|
||||
break;
|
||||
}
|
||||
obj = obj->parent;
|
||||
}
|
||||
return obj;
|
||||
}
|
||||
|
||||
// 0 on success
|
||||
// -1 on error
|
||||
int nm_init()
|
||||
{
|
||||
int ret;
|
||||
|
||||
hwloc_topology * topo;
|
||||
if ((ret = hwloc_topology_init(&topo)) != 0) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
if ((ret = hwloc_topology_load(topo)) != 0)
|
||||
return ret;
|
||||
|
||||
// populate numa nodes
|
||||
hwloc_obj_t obj = nullptr;
|
||||
while(1) {
|
||||
obj = hwloc_get_next_obj_by_type(topo, HWLOC_OBJ_PACKAGE, obj);
|
||||
if (obj == nullptr) {
|
||||
break;
|
||||
}
|
||||
|
||||
struct nm_obj * each = new struct nm_obj;
|
||||
each->id = obj->logical_index;
|
||||
each->level = NM_LEVEL_NUMA;
|
||||
each->parent = nullptr;
|
||||
nodes.push_back(each);
|
||||
printf("libnm: identified NUMA node %d\n", each->id);
|
||||
}
|
||||
std::sort(nodes.begin(), nodes.end(), nm_obj_comparator);
|
||||
|
||||
// populate cpus
|
||||
obj = nullptr;
|
||||
while(1) {
|
||||
obj = hwloc_get_next_obj_by_type(topo, HWLOC_OBJ_CORE, obj);
|
||||
if (obj == nullptr) {
|
||||
break;
|
||||
}
|
||||
struct nm_obj * each = new struct nm_obj;
|
||||
each->id = obj->logical_index;
|
||||
each->level = NM_LEVEL_CPU;
|
||||
hwloc_obj_t parent = get_parent_type(obj, HWLOC_OBJ_PACKAGE);
|
||||
if (parent == nullptr) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// XXX: this faults if the OS decides to be stupid
|
||||
each->parent = nodes.at(parent->logical_index);
|
||||
each->parent->children.push_back(each);
|
||||
cpus.push_back(each);
|
||||
printf("libnm: identified CPU %d on NUMA node %d\n", each->id, each->parent->id);
|
||||
}
|
||||
std::sort(cpus.begin(), cpus.end(), nm_obj_comparator);
|
||||
|
||||
// populate cores
|
||||
obj = nullptr;
|
||||
while(1) {
|
||||
obj = hwloc_get_next_obj_by_type(topo, HWLOC_OBJ_PU, obj);
|
||||
if (obj == nullptr) {
|
||||
break;
|
||||
}
|
||||
struct nm_obj * each = new struct nm_obj;
|
||||
each->id = obj->logical_index;
|
||||
each->level = NM_LEVEL_CORE;
|
||||
hwloc_obj_t parent = get_parent_type(obj, HWLOC_OBJ_CORE);
|
||||
if (parent == nullptr) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// XXX: this faults if the OS decides to be stupid
|
||||
each->parent = cpus.at(parent->logical_index);
|
||||
each->parent->children.push_back(each);
|
||||
cores.push_back(each);
|
||||
printf("libnm: identified core %d on CPU %d, NUMA node %d\n", each->id, each->parent->id, each->parent->parent->id);
|
||||
}
|
||||
std::sort(cores.begin(), cores.end(), nm_obj_comparator);
|
||||
|
||||
return ret;
|
||||
}
|
205
libnms/alloc.c
205
libnms/alloc.c
@ -1,205 +0,0 @@
|
||||
#include <pthread.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/cpuset.h>
|
||||
#include <sys/domainset.h>
|
||||
#include <sys/thr.h>
|
||||
#include <sys/mman.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <errno.h>
|
||||
#include <stdatomic.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include <nms.h>
|
||||
|
||||
#define MAX_NUMA_DOMAINS (64)
|
||||
#define MAX_REGIONS (64)
|
||||
#define REGION_SIZE (1024 * 1024 * 1024)
|
||||
#define PAGE_SIZE (4096)
|
||||
|
||||
struct nms_region {
|
||||
uintptr_t start_addr;
|
||||
size_t size;
|
||||
size_t occupied;
|
||||
};
|
||||
|
||||
struct nms_desc {
|
||||
// alloc
|
||||
pthread_mutex_t alloc_lock;
|
||||
|
||||
struct nms_region regions[MAX_NUMA_DOMAINS][MAX_REGIONS];
|
||||
int region_sz[MAX_NUMA_DOMAINS];
|
||||
};
|
||||
|
||||
static _Atomic(int) initialized = 0;
|
||||
static struct nms_desc g_desc;
|
||||
|
||||
void
|
||||
nms_free_static(void * buf, size_t sz)
|
||||
{
|
||||
munmap(buf, sz);
|
||||
return;
|
||||
}
|
||||
|
||||
void *
|
||||
nms_alloc_static(int node_id, size_t sz)
|
||||
{
|
||||
long tid;
|
||||
domainset_t orig_dom;
|
||||
int orig_policy;
|
||||
void * region;
|
||||
|
||||
thr_self(&tid);
|
||||
DOMAINSET_ZERO(&orig_dom);
|
||||
|
||||
// save existing thread's allocation strategy
|
||||
int ret = cpuset_getdomain(CPU_LEVEL_WHICH, CPU_WHICH_TID, tid, sizeof(orig_dom), &orig_dom, &orig_policy);
|
||||
if (ret != 0) {
|
||||
fprintf(stderr, "libnms: cpuset_getdomain failed with %d\n", errno);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
domainset_t tmp_domain;
|
||||
DOMAINSET_ZERO(&tmp_domain);
|
||||
DOMAINSET_SET(node_id, &tmp_domain);
|
||||
|
||||
ret = cpuset_setdomain(CPU_LEVEL_WHICH, CPU_WHICH_TID, tid, sizeof(tmp_domain), &tmp_domain, DOMAINSET_POLICY_ROUNDROBIN);
|
||||
if (ret != 0) {
|
||||
fprintf(stderr, "libnms: cpuset_setdomain failed with %d\n", errno);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if ((region = mmap(NULL, sz, PROT_READ | PROT_WRITE, MAP_ANON | MAP_ALIGNED_SUPER | MAP_NOCORE | MAP_PRIVATE | MAP_PREFAULT_READ, -1, 0)) == MAP_FAILED) {
|
||||
fprintf(stderr, "libnms: mmap failed with %d\n", errno);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// touch the pages to prefault the pages
|
||||
int sum;
|
||||
for (size_t i = 0; i < sz; i++) {
|
||||
sum += *(uint8_t *)((char *)region + i);
|
||||
*(uint8_t *)((char *)region + i) = i;
|
||||
}
|
||||
|
||||
// restore existing thread's allocation strategy
|
||||
ret = cpuset_setdomain(CPU_LEVEL_WHICH, CPU_WHICH_TID, tid, sizeof(orig_dom), &orig_dom, orig_policy);
|
||||
if (ret != 0) {
|
||||
fprintf(stderr, "libnms: cpuset_setdomain failed with %d\n", errno);
|
||||
munmap(region, REGION_SIZE);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return region;
|
||||
}
|
||||
|
||||
static int
|
||||
nms_desc_init(struct nms_desc * desc, int verbose)
|
||||
{
|
||||
memset(desc, 0, sizeof(struct nms_desc));
|
||||
pthread_mutex_init(&desc->alloc_lock, NULL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void *
|
||||
nms_region_malloc(struct nms_region * region, size_t size)
|
||||
{
|
||||
void * ret = NULL;
|
||||
if (region->size >= region->occupied + size) {
|
||||
ret = (void *)(region->start_addr + region->occupied);
|
||||
region->occupied += size;
|
||||
region->occupied = (region->occupied + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int
|
||||
nms_desc_add_region(struct nms_desc * desc, int nodeid, size_t size)
|
||||
{
|
||||
void * ret;
|
||||
int idx;
|
||||
|
||||
ret = nms_alloc_static(nodeid, REGION_SIZE);
|
||||
if (ret == NULL) {
|
||||
fprintf(stderr, "libnms: failed to allocate region on node %d\n", nodeid);
|
||||
return ENOMEM;
|
||||
}
|
||||
|
||||
desc->region_sz[nodeid]++;
|
||||
idx = desc->region_sz[nodeid] - 1;
|
||||
desc->regions[nodeid][idx].start_addr = (uintptr_t)ret;
|
||||
desc->regions[nodeid][idx].occupied = 0;
|
||||
desc->regions[nodeid][idx].size = REGION_SIZE;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void *
|
||||
nms_desc_malloc(struct nms_desc * desc, unsigned int nodeid, size_t size)
|
||||
{
|
||||
void * ret = NULL;
|
||||
int idx;
|
||||
int new_region = 0;
|
||||
|
||||
if (size > REGION_SIZE) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
pthread_mutex_lock(&desc->alloc_lock);
|
||||
|
||||
retry:
|
||||
if (desc->region_sz[nodeid] > 0) {
|
||||
idx = desc->region_sz[nodeid] - 1;
|
||||
ret = nms_region_malloc(&desc->regions[nodeid][idx], size);
|
||||
}
|
||||
|
||||
if (ret == NULL) {
|
||||
// we need a new region
|
||||
if (nms_desc_add_region(desc, nodeid, REGION_SIZE) != 0) {
|
||||
pthread_mutex_unlock(&desc->alloc_lock);
|
||||
return NULL;
|
||||
}
|
||||
fprintf(stdout, "libnms: malloc request of size %zu -> allocated new region on node %d\n", size, nodeid);
|
||||
goto retry;
|
||||
}
|
||||
|
||||
pthread_mutex_unlock(&desc->alloc_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void
|
||||
nms_desc_free(struct nms_desc * desc __attribute__((unused)), unsigned int node __attribute__((unused)), void * addr __attribute__((unused)))
|
||||
{
|
||||
// dummy function
|
||||
}
|
||||
|
||||
int
|
||||
nms_init(int verbose)
|
||||
{
|
||||
int expected = 0;
|
||||
if (atomic_compare_exchange_strong(&initialized, &expected, 2)) {
|
||||
nms_desc_init(&g_desc, verbose);
|
||||
atomic_store(&initialized, 1);
|
||||
} else {
|
||||
while(atomic_load(&initialized) != 1) {
|
||||
}
|
||||
fprintf(stdout,"libnms: already initialized.\n");
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void *
|
||||
nms_malloc(int nodeid, size_t sz)
|
||||
{
|
||||
assert(atomic_load(&initialized) == 1);
|
||||
return nms_desc_malloc(&g_desc, nodeid, sz);
|
||||
}
|
||||
|
||||
void
|
||||
nms_free(int nodeid, void * addr)
|
||||
{
|
||||
assert(atomic_load(&initialized) == 1);
|
||||
nms_desc_free(&g_desc, nodeid, addr);
|
||||
}
|
||||
|
55
libntr/ntr.c
55
libntr/ntr.c
@ -1,46 +1,43 @@
|
||||
#include "ntr.h"
|
||||
|
||||
static int ntr_log_levels[NTR_DEP_MAX] = { NTR_LEVEL_DEFAULT };
|
||||
static FILE *ntr_out;
|
||||
static int ntr_log_levels[NTR_DEP_MAX] = {NTR_LEVEL_DEFAULT};
|
||||
static FILE * ntr_out;
|
||||
|
||||
void
|
||||
ntr_init()
|
||||
void ntr_init()
|
||||
{
|
||||
ntr_out = stdout;
|
||||
ntr_out = stdout;
|
||||
}
|
||||
|
||||
void
|
||||
ntr(int dep, int level, const char *fmt, ...)
|
||||
void ntr(int dep, int level, const char * fmt, ...)
|
||||
{
|
||||
va_list vl;
|
||||
va_start(vl, fmt);
|
||||
if (dep < NTR_DEP_MAX && level <= ntr_log_levels[dep]) {
|
||||
vfprintf(ntr_out, fmt, vl);
|
||||
}
|
||||
va_end(vl);
|
||||
va_list vl;
|
||||
va_start(vl, fmt);
|
||||
if (dep < NTR_DEP_MAX && level <= ntr_log_levels[dep]) {
|
||||
vfprintf(ntr_out, fmt, vl);
|
||||
}
|
||||
va_end(vl);
|
||||
}
|
||||
|
||||
void
|
||||
ntr_set_level(int dep, int level)
|
||||
void ntr_set_level(int dep, int level)
|
||||
{
|
||||
if (dep < NTR_DEP_MAX) {
|
||||
ntr_log_levels[dep] = level;
|
||||
}
|
||||
if (dep < NTR_DEP_MAX) {
|
||||
ntr_log_levels[dep] = level;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ntr_set_output(FILE *f)
|
||||
|
||||
void ntr_set_output(FILE * f)
|
||||
{
|
||||
if (f != NULL) {
|
||||
ntr_out = f;
|
||||
}
|
||||
if (f != NULL) {
|
||||
ntr_out = f;
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
ntr_get_level(int dep)
|
||||
|
||||
int ntr_get_level(int dep)
|
||||
{
|
||||
if (dep < NTR_DEP_MAX) {
|
||||
return ntr_log_levels[dep];
|
||||
}
|
||||
return 0;
|
||||
if (dep < NTR_DEP_MAX) {
|
||||
return ntr_log_levels[dep];
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
989
net/cat.cc
989
net/cat.cc
@ -1,989 +0,0 @@
|
||||
#include <atomic>
|
||||
#include <cstdlib>
|
||||
#include <ctime>
|
||||
#include <fstream>
|
||||
#include <random>
|
||||
#include <vector>
|
||||
|
||||
#include <topo.h>
|
||||
#include <rte_byteorder.h>
|
||||
#include <rte_common.h>
|
||||
#include <rte_config.h>
|
||||
#include <rte_cycles.h>
|
||||
#include <rte_eal.h>
|
||||
#include <rte_ethdev.h>
|
||||
#include <rte_ether.h>
|
||||
#include <rte_launch.h>
|
||||
#include <rte_lcore.h>
|
||||
#include <rte_mbuf.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "ntr.h"
|
||||
#include "gen.hh"
|
||||
#include "net/netsup.hh"
|
||||
#include "net/pkt.hh"
|
||||
#include "nms.h"
|
||||
|
||||
constexpr static unsigned int BURST_SIZE = 32;
|
||||
constexpr static unsigned int MAX_SLAVES = 32;
|
||||
constexpr static unsigned int SLAVES_MAX_WAIT_MS = 1000;
|
||||
|
||||
struct datapt {
|
||||
uint32_t epoch;
|
||||
uint32_t valid;
|
||||
uint64_t clt_hw_tx;
|
||||
uint64_t clt_sw_tx;
|
||||
uint64_t clt_hw_rx;
|
||||
uint64_t clt_sw_rx;
|
||||
uint64_t srv_hw_tx;
|
||||
uint64_t srv_sw_tx;
|
||||
uint64_t srv_hw_rx;
|
||||
uint64_t srv_sw_rx;
|
||||
};
|
||||
|
||||
constexpr static uint32_t STATE_WAIT = 0; // waiting for sending
|
||||
constexpr static uint32_t STATE_SENT = 1; // we sent a packet
|
||||
constexpr static uint32_t STATE_COMPLETE = 2; // we received everything
|
||||
constexpr static uint32_t STATE_PKTLOSS = 3; // last packet sent was lost
|
||||
|
||||
struct options_t {
|
||||
// parameters
|
||||
unsigned int run_time { 5 };
|
||||
unsigned int warmup_time { 3 };
|
||||
char output[256] = "output.txt";
|
||||
char ia_gen_str[256] = "fixed";
|
||||
unsigned int target_qps { 0 };
|
||||
unsigned int master_mode { 0 };
|
||||
struct net_spec server_spec { };
|
||||
cpuset_t cpu_set = CPUSET_T_INITIALIZER(0x2); // 2nd core
|
||||
std::vector<struct net_spec *> slaves;
|
||||
uint32_t pkt_loss_failure_threshold { 0 };
|
||||
uint32_t pkt_loss_time_ms { UINT32_MAX };
|
||||
int portid { 0 };
|
||||
|
||||
// states
|
||||
struct net_spec s_host_spec { };
|
||||
struct conn_spec s_host_conn {
|
||||
.src = &s_host_spec, .dst = &server_spec, .dst_port = POU_PORT
|
||||
};
|
||||
unsigned int s_rxqid { 0 };
|
||||
unsigned int s_txqid { 0 };
|
||||
unsigned int s_socketid { 0 };
|
||||
// for qps calculation
|
||||
std::atomic<uint32_t> s_recved_pkts { 0 };
|
||||
std::atomic<uint32_t> s_pkt_loss { 0 };
|
||||
std::atomic<uint64_t> s_start_time { 0 };
|
||||
std::atomic<uint64_t> s_end_time { 0 };
|
||||
std::atomic<uint32_t> s_slave_qps { 0 };
|
||||
std::atomic<uint32_t> s_slave_recved { 0 };
|
||||
std::atomic<uint32_t> s_slave_loss { 0 };
|
||||
uint32_t s_state { STATE_WAIT };
|
||||
bool s_hwtimestamp { true };
|
||||
|
||||
Generator *s_iagen { nullptr };
|
||||
std::vector<struct datapt *> s_data;
|
||||
struct datapt *s_last_datapt { nullptr };
|
||||
uint32_t s_epoch { 0 };
|
||||
std::atomic<bool> s_stop { false };
|
||||
std::atomic<uint32_t> s_record { 0 };
|
||||
};
|
||||
|
||||
static struct options_t options;
|
||||
|
||||
static uint16_t
|
||||
rx_add_timestamp(uint16_t port, uint16_t qidx __rte_unused,
|
||||
struct rte_mbuf **pkts, uint16_t nb_pkts, uint16_t max_pkts __rte_unused,
|
||||
void *_ __rte_unused)
|
||||
{
|
||||
uint64_t now = topo_uptime_ns();
|
||||
struct pkt_hdr *pkt_data;
|
||||
struct timespec ts { };
|
||||
int ret;
|
||||
|
||||
if (options.s_state != STATE_SENT) {
|
||||
return nb_pkts;
|
||||
}
|
||||
|
||||
for (int i = 0; i < nb_pkts; i++) {
|
||||
pkt_data = check_valid_packet(pkts[i],
|
||||
&options.s_host_spec.mac_addr);
|
||||
|
||||
if (pkt_data == nullptr) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
|
||||
"rx_add_timestamp: ignoring invalid packet 0x%p.\n",
|
||||
(void *)pkts[i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (rte_be_to_cpu_16(pkt_data->type) == PKT_TYPE_PROBE_RESP) {
|
||||
uint32_t epoch = rte_be_to_cpu_32(
|
||||
((struct pkt_payload_epoch *)pkt_data->payload)
|
||||
->epoch);
|
||||
if (options.s_last_datapt != nullptr &&
|
||||
options.s_last_datapt->epoch == epoch) {
|
||||
if (options.s_hwtimestamp) {
|
||||
if ((ret = rte_eth_timesync_read_rx_timestamp(
|
||||
port, &ts, pkts[i]->timesync & 0x3)) ==
|
||||
0) {
|
||||
// has hw rx timestamp
|
||||
options.s_last_datapt->clt_hw_rx =
|
||||
ts.tv_sec * S2NS + ts.tv_nsec;
|
||||
options.s_last_datapt->clt_sw_rx = now;
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
|
||||
"rx_add_timestamp: tagged packet %p with sw: %lu hw: %lu.\n",
|
||||
(void *)pkts[i], now,
|
||||
options.s_last_datapt->clt_hw_rx);
|
||||
} else {
|
||||
rte_exit(EXIT_FAILURE,
|
||||
"rx_add_timestamp: packet %p not tagged - hw ts not "
|
||||
"available - %d.\n",
|
||||
(void *)pkts[i], ret);
|
||||
}
|
||||
} else {
|
||||
options.s_last_datapt->clt_sw_rx = now;
|
||||
options.s_last_datapt->clt_hw_rx = 0;
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
|
||||
"rx_add_timestamp: tagged packet %p with sw: %lu hw: (disabled).\n",
|
||||
(void *)pkts[i], now);
|
||||
}
|
||||
} else {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING,
|
||||
"rx_add_timestamp: packet %p epoch %d != last epoch %d.\n",
|
||||
(void *)pkts[i], epoch,
|
||||
options.s_last_datapt->epoch);
|
||||
}
|
||||
} else {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
|
||||
"rx_add_timestamp: packet %p not tagged - type %d.\n",
|
||||
(void *)pkts[i], rte_be_to_cpu_16(pkt_data->type));
|
||||
}
|
||||
}
|
||||
|
||||
return nb_pkts;
|
||||
}
|
||||
|
||||
static uint16_t
|
||||
tx_add_timestamp(uint16_t port __rte_unused, uint16_t qidx __rte_unused,
|
||||
struct rte_mbuf **pkts, uint16_t nb_pkts, void *_ __rte_unused)
|
||||
{
|
||||
uint64_t now = topo_uptime_ns();
|
||||
struct pkt_hdr *pkt_data;
|
||||
|
||||
// if (options.s_state != STATE_SENT) {
|
||||
// return nb_pkts;
|
||||
// }
|
||||
|
||||
for (int i = 0; i < nb_pkts; i++) {
|
||||
pkt_data = check_valid_packet(pkts[i],
|
||||
&options.s_host_spec.mac_addr);
|
||||
|
||||
if (pkt_data == nullptr) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
|
||||
"tx_add_timestamp: ignoring invalid packet 0x%p.\n",
|
||||
(void *)pkts[i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (rte_be_to_cpu_16(pkt_data->type) == PKT_TYPE_PROBE) {
|
||||
uint32_t epoch = rte_be_to_cpu_32(
|
||||
((struct pkt_payload_epoch *)pkt_data->payload)
|
||||
->epoch);
|
||||
|
||||
if (options.s_last_datapt == nullptr ||
|
||||
epoch != options.s_last_datapt->epoch) {
|
||||
rte_exit(EXIT_FAILURE,
|
||||
"tx_add_timestamp: packet epoch %d != last epoch %d\n",
|
||||
epoch, options.s_last_datapt->epoch);
|
||||
}
|
||||
|
||||
options.s_last_datapt->clt_sw_tx = now;
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
|
||||
"tx_add_timestamp: tagged packet %p with sw: %lu.\n",
|
||||
(void *)pkts[i], now);
|
||||
} else {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
|
||||
"tx_add_timestamp: packet %p not tagged - type %d.\n",
|
||||
(void *)pkts[i], pkt_data->type);
|
||||
}
|
||||
}
|
||||
|
||||
return nb_pkts;
|
||||
}
|
||||
|
||||
// returns 0 on success
|
||||
static void
|
||||
send_all_slaves(uint16_t type)
|
||||
{
|
||||
struct rte_mbuf *tx_bufs[MAX_SLAVES];
|
||||
//struct rte_eth_stats stats;
|
||||
|
||||
struct conn_spec cspec;
|
||||
cspec.src = &options.s_host_spec;
|
||||
cspec.dst_port = DEFAULT_RAT_PORT;
|
||||
cspec.src_port = DEFAULT_RAT_PORT;
|
||||
|
||||
// send all clients SYNC
|
||||
for (unsigned int i = 0; i < options.slaves.size(); i++) {
|
||||
struct pkt_hdr *hdr;
|
||||
cspec.dst = options.slaves.at(i);
|
||||
if (alloc_pkt_hdr(mempool_get(options.s_socketid), type, &cspec, 0,
|
||||
&tx_bufs[i], &hdr) != 0) {
|
||||
rte_exit(EXIT_FAILURE, "failed to alloc packet\n");
|
||||
}
|
||||
}
|
||||
|
||||
// if (rte_eth_stats_get(options.portid, &stats) != 0 ) {
|
||||
// rte_exit(EXIT_FAILURE, "failed!");
|
||||
// }
|
||||
// printf("send_all_slaves: ipackets %lu, opackets %lu, ierrors %lu, oerrors %lu\n", stats.ipackets, stats.opackets, stats.ierrors, stats.oerrors);
|
||||
|
||||
if (rte_eth_tx_burst(options.portid, options.s_txqid, tx_bufs,
|
||||
options.slaves.size()) != options.slaves.size()) {
|
||||
rte_exit(EXIT_FAILURE, "failed to send some packets\n");
|
||||
}
|
||||
}
|
||||
|
||||
// sizeof mbuf must >= MAX_SLAVES
|
||||
// this function fills up to #slave
|
||||
static void
|
||||
wait_for_slaves(uint16_t etype, struct rte_mbuf **out)
|
||||
{
|
||||
struct rte_mbuf *tx_bufs[MAX_SLAVES];
|
||||
bool stop = false;
|
||||
const uint64_t start = topo_uptime_ns();
|
||||
std::vector<struct rte_ether_addr *> recved;
|
||||
uint32_t tot = 0;
|
||||
|
||||
while (!stop) {
|
||||
uint64_t now = topo_uptime_ns();
|
||||
const uint16_t nb_rx = rte_eth_rx_burst(options.portid,
|
||||
options.s_rxqid, tx_bufs, MAX_SLAVES);
|
||||
|
||||
if (nb_rx > 0) {
|
||||
for (unsigned int i = 0; i < nb_rx; i++) {
|
||||
struct pkt_hdr *each = check_valid_packet(
|
||||
tx_bufs[i], &options.s_host_spec.mac_addr);
|
||||
uint16_t type;
|
||||
if (each == nullptr) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
|
||||
"wait_for_slaves: ignoring invalid packet %p.\n",
|
||||
(void *)tx_bufs[i]);
|
||||
goto end_loop;
|
||||
}
|
||||
|
||||
type = rte_be_to_cpu_16(each->type);
|
||||
|
||||
if (type == etype) {
|
||||
bool invalid = true;
|
||||
|
||||
// check if it is from one of our
|
||||
// clients
|
||||
for (auto eaddr : options.slaves) {
|
||||
if (rte_is_same_ether_addr(
|
||||
&eaddr->mac_addr,
|
||||
&each->eth_hdr
|
||||
.src_addr)) {
|
||||
invalid = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (invalid) {
|
||||
// received invalid packet from
|
||||
// unregistered slave
|
||||
ntr(NTR_DEP_USER1,
|
||||
NTR_LEVEL_WARNING,
|
||||
"wait_for_slaves: invalid packet %p from unregistered slave\n.",
|
||||
tx_bufs[i]);
|
||||
goto end_loop;
|
||||
}
|
||||
|
||||
invalid = false;
|
||||
// check if we have already received the
|
||||
// same packet from the mac addr
|
||||
for (auto eaddr : recved) {
|
||||
if (rte_is_same_ether_addr(
|
||||
eaddr,
|
||||
&each->eth_hdr
|
||||
.src_addr)) {
|
||||
invalid = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (invalid) {
|
||||
// received invalid packet from
|
||||
// the same slave
|
||||
ntr(NTR_DEP_USER1,
|
||||
NTR_LEVEL_WARNING,
|
||||
"wait_for_slaves: invalid packet %p - duplicated\n.",
|
||||
tx_bufs[i]);
|
||||
goto end_loop;
|
||||
}
|
||||
|
||||
recved.push_back(
|
||||
&each->eth_hdr.src_addr);
|
||||
|
||||
if (recved.size() ==
|
||||
options.slaves.size()) {
|
||||
stop = true;
|
||||
}
|
||||
|
||||
if (out != nullptr) {
|
||||
out[tot] = tx_bufs[i];
|
||||
tot++;
|
||||
// don't free this packet
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
|
||||
"wait_for_slaves: ignoring invalid packet %p type %d.\n",
|
||||
(void *)tx_bufs[i], type);
|
||||
}
|
||||
end_loop:
|
||||
rte_pktmbuf_free(tx_bufs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// struct rte_eth_stats stats;
|
||||
// if (rte_eth_stats_get(options.portid, &stats) != 0 ) {
|
||||
// rte_exit(EXIT_FAILURE, "failed!");
|
||||
// }
|
||||
//printf("wait_slaves <AFTER>: ipackets %lu, opackets %lu, ierrors %lu, oerrors %lu\n", stats.ipackets, stats.opackets, stats.ierrors, stats.oerrors);
|
||||
|
||||
if (now - start > SLAVES_MAX_WAIT_MS * MS2NS) {
|
||||
rte_exit(EXIT_FAILURE,
|
||||
"cat: waiting for too long %d. I QUIT!!", etype);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
pkt_loop()
|
||||
{
|
||||
struct rte_mbuf *tx_buf;
|
||||
struct rte_mbuf *rx_bufs[BURST_SIZE];
|
||||
struct pkt_hdr *pkt_data;
|
||||
rdport_generator port_gen(MIN_RANDOM_PORT);
|
||||
|
||||
bool read_tx = true;
|
||||
bool recv_stat = true;
|
||||
bool recv_resp = true;
|
||||
|
||||
if (rte_eth_dev_socket_id(options.portid) > 0 &&
|
||||
rte_eth_dev_socket_id(options.portid) != (int)rte_socket_id()) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING,
|
||||
"locore_main: WARNING, port %d is on remote NUMA node to "
|
||||
"polling thread.\n\tPerformance will "
|
||||
"not be optimal.\n",
|
||||
options.portid);
|
||||
}
|
||||
|
||||
uint64_t next_ts = topo_uptime_ns();
|
||||
uint64_t last_send_ts = next_ts;
|
||||
bool is_last_pkt_lost = false;
|
||||
uint32_t num_cts_pkt_lost = 0;
|
||||
|
||||
while (!options.s_stop.load()) {
|
||||
uint64_t now = topo_uptime_ns();
|
||||
// always pop incoming packets
|
||||
const uint16_t nb_rx = rte_eth_rx_burst(options.portid,
|
||||
options.s_rxqid, rx_bufs, BURST_SIZE);
|
||||
|
||||
if (nb_rx > 0) {
|
||||
for (int i = 0; i < nb_rx; i++) {
|
||||
if (options.s_state != STATE_SENT) {
|
||||
// only need to process packets after we
|
||||
// sent one
|
||||
rte_pktmbuf_free(rx_bufs[i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
struct pkt_hdr *each = check_valid_packet(
|
||||
rx_bufs[i], &options.s_host_spec.mac_addr);
|
||||
|
||||
if (each == nullptr) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
|
||||
"locore_main: ignoring invalid packet %p.\n",
|
||||
(void *)rx_bufs[i]);
|
||||
rte_pktmbuf_free(rx_bufs[i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
uint16_t type = rte_be_to_cpu_16(each->type);
|
||||
NTR_PKT(NTR_DEP_USER1, NTR_LEVEL_DEBUG, each,
|
||||
"locore_main: received packet %p ", each);
|
||||
struct pkt_payload_epoch *pld_epoch;
|
||||
struct pkt_payload_stat *pld_stat;
|
||||
uint32_t epoch;
|
||||
switch (type) {
|
||||
case PKT_TYPE_PROBE_RESP:
|
||||
pld_epoch = (struct pkt_payload_epoch *)
|
||||
each->payload;
|
||||
epoch = rte_be_to_cpu_32(
|
||||
pld_epoch->epoch);
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "lcore_main: PROBE_RESP received packet %p epoch %d\n", each, epoch);
|
||||
|
||||
if (options.s_last_datapt == nullptr ||
|
||||
epoch !=
|
||||
options.s_last_datapt->epoch) {
|
||||
ntr(NTR_DEP_USER1,
|
||||
NTR_LEVEL_WARNING,
|
||||
"locore_main: packet %p epoch %d doesn't match datapt %d.\n",
|
||||
(void *)rx_bufs[i], epoch,
|
||||
options.s_last_datapt
|
||||
->epoch);
|
||||
break;
|
||||
}
|
||||
|
||||
recv_resp = true;
|
||||
break;
|
||||
case PKT_TYPE_STAT:
|
||||
pld_stat = (struct pkt_payload_stat *)
|
||||
each->payload;
|
||||
epoch = rte_be_to_cpu_32(
|
||||
pld_stat->epoch);
|
||||
|
||||
if (options.s_last_datapt == nullptr ||
|
||||
epoch !=
|
||||
options.s_last_datapt->epoch) {
|
||||
ntr(NTR_DEP_USER1,
|
||||
NTR_LEVEL_WARNING,
|
||||
"locore_main: packet %p epoch %d doesn't match datapt %d.\n",
|
||||
(void *)rx_bufs[i], epoch,
|
||||
options.s_last_datapt
|
||||
->epoch);
|
||||
break;
|
||||
}
|
||||
|
||||
options.s_last_datapt->srv_hw_tx =
|
||||
rte_be_to_cpu_64(pld_stat->hw_tx);
|
||||
options.s_last_datapt->srv_hw_rx =
|
||||
rte_be_to_cpu_64(pld_stat->hw_rx);
|
||||
options.s_last_datapt->srv_sw_tx =
|
||||
rte_be_to_cpu_64(pld_stat->sw_tx);
|
||||
options.s_last_datapt->srv_sw_rx =
|
||||
rte_be_to_cpu_64(pld_stat->sw_rx);
|
||||
|
||||
recv_stat = true;
|
||||
is_last_pkt_lost = false;
|
||||
break;
|
||||
default:
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
|
||||
"locore_main: ignoring packet %p with unknown type %d.\n",
|
||||
(void *)rx_bufs[i], type);
|
||||
}
|
||||
|
||||
rte_pktmbuf_free(rx_bufs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
if (options.s_state == STATE_SENT) {
|
||||
// check if hw tx ts is read
|
||||
if (!read_tx) {
|
||||
int ret;
|
||||
struct timespec ts;
|
||||
if (options.s_hwtimestamp) {
|
||||
if ((ret = rte_eth_timesync_read_tx_timestamp(
|
||||
options.portid, &ts)) == 0) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
|
||||
"locore_main: read hw tx timestamp %lu.\n",
|
||||
(ts.tv_nsec + ts.tv_sec * S2NS));
|
||||
options.s_last_datapt->clt_hw_tx =
|
||||
ts.tv_nsec + ts.tv_sec * S2NS;
|
||||
read_tx = true;
|
||||
}
|
||||
} else {
|
||||
options.s_last_datapt->clt_hw_tx = 0;
|
||||
read_tx = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (read_tx && recv_resp && recv_stat) {
|
||||
options.s_state = STATE_COMPLETE;
|
||||
} else {
|
||||
// check packet loss
|
||||
if (now - last_send_ts >
|
||||
options.pkt_loss_time_ms * MS2NS) {
|
||||
|
||||
if (is_last_pkt_lost) {
|
||||
num_cts_pkt_lost++;
|
||||
} else {
|
||||
is_last_pkt_lost = true;
|
||||
num_cts_pkt_lost = 1;
|
||||
}
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
|
||||
"locore_main: packet loss: waiting too long for epoch %d. %d in a row.\n",
|
||||
options.s_last_datapt->epoch,
|
||||
num_cts_pkt_lost);
|
||||
|
||||
delete options.s_last_datapt;
|
||||
options.s_last_datapt = nullptr;
|
||||
options.s_state = STATE_PKTLOSS;
|
||||
options.s_pkt_loss.fetch_add(1);
|
||||
|
||||
if (num_cts_pkt_lost >
|
||||
options
|
||||
.pkt_loss_failure_threshold) {
|
||||
rte_exit(EXIT_FAILURE,
|
||||
"too many continuous packet loss detected\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (options.s_state == STATE_COMPLETE ||
|
||||
options.s_state == STATE_PKTLOSS ||
|
||||
options.s_state == STATE_WAIT) {
|
||||
if (options.s_state == STATE_COMPLETE) {
|
||||
options.s_data.push_back(options.s_last_datapt);
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
|
||||
"locore_main: datapt for epoch %d dump:\n"
|
||||
" Valid: %d\n"
|
||||
" client TX HW: %lu\n"
|
||||
" client TX SW: %lu\n"
|
||||
" client RX HW: %lu\n"
|
||||
" client RX SW: %lu\n"
|
||||
" server TX HW: %lu\n"
|
||||
" server TX SW: %lu\n"
|
||||
" server RX HW: %lu\n"
|
||||
" server RX SW: %lu\n\n",
|
||||
options.s_last_datapt->epoch,
|
||||
options.s_last_datapt->valid,
|
||||
options.s_last_datapt->clt_hw_tx,
|
||||
options.s_last_datapt->clt_sw_tx,
|
||||
options.s_last_datapt->clt_hw_rx,
|
||||
options.s_last_datapt->clt_sw_rx,
|
||||
options.s_last_datapt->srv_hw_tx,
|
||||
options.s_last_datapt->srv_sw_tx,
|
||||
options.s_last_datapt->srv_hw_rx,
|
||||
options.s_last_datapt->srv_sw_rx);
|
||||
options.s_recved_pkts.fetch_add(1);
|
||||
options.s_last_datapt = nullptr;
|
||||
}
|
||||
|
||||
options.s_state = STATE_WAIT;
|
||||
|
||||
if (now >= next_ts) {
|
||||
struct pkt_payload_epoch *pld_epoch;
|
||||
uint32_t epoch;
|
||||
|
||||
next_ts += (int)(options.s_iagen->generate() *
|
||||
S2NS);
|
||||
|
||||
options.s_host_conn.src_port = port_gen.next();
|
||||
if (alloc_pkt_hdr(mempool_get(options.s_socketid),
|
||||
PKT_TYPE_PROBE, &options.s_host_conn, 0,
|
||||
&tx_buf, &pkt_data) != 0) {
|
||||
rte_exit(EXIT_FAILURE,
|
||||
"failed to alloc probe packet.\n");
|
||||
}
|
||||
|
||||
epoch = options.s_epoch;
|
||||
options.s_epoch++;
|
||||
pld_epoch = (struct pkt_payload_epoch *)
|
||||
pkt_data->payload;
|
||||
pld_epoch->epoch = rte_cpu_to_be_32(epoch);
|
||||
options.s_last_datapt = new struct datapt;
|
||||
options.s_last_datapt->epoch = epoch;
|
||||
options.s_last_datapt->valid =
|
||||
options.s_record.load();
|
||||
|
||||
last_send_ts = now;
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
|
||||
"locore_main: sending packet 0x%p with epoch %d\n",
|
||||
(void *)tx_buf, epoch);
|
||||
const uint16_t nb_tx =
|
||||
rte_eth_tx_burst(options.portid,
|
||||
options.s_txqid, &tx_buf, 1);
|
||||
|
||||
if (nb_tx != 1) {
|
||||
rte_exit(EXIT_FAILURE,
|
||||
"failed to send packet 0x%p, epoch %d\n",
|
||||
(void *)tx_buf, epoch);
|
||||
}
|
||||
|
||||
rte_pktmbuf_free(tx_buf);
|
||||
|
||||
read_tx = false;
|
||||
recv_resp = false;
|
||||
recv_stat = false;
|
||||
options.s_state = STATE_SENT;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
locore_main(void *tif __rte_unused)
|
||||
{
|
||||
struct rte_mbuf *mbufs[MAX_SLAVES];
|
||||
uint32_t core_id = rte_lcore_id();
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "locore_main: core %d running...\n",
|
||||
core_id);
|
||||
|
||||
if (options.master_mode == 1) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
|
||||
"locore_main: sending SYNC ...\n");
|
||||
send_all_slaves(PKT_TYPE_SYNC);
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
|
||||
"locore_main: waiting for SYNC_ACK ...\n");
|
||||
wait_for_slaves(PKT_TYPE_SYNC_ACK, nullptr);
|
||||
}
|
||||
|
||||
options.s_start_time.store(topo_uptime_ns());
|
||||
pkt_loop();
|
||||
options.s_end_time.store(topo_uptime_ns());
|
||||
|
||||
if (options.master_mode == 1) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
|
||||
"locore_main: sending FIN ...\n");
|
||||
send_all_slaves(PKT_TYPE_FIN);
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
|
||||
"locore_main: waiting for FIN_ACK ...\n");
|
||||
wait_for_slaves(PKT_TYPE_FIN_ACK, mbufs);
|
||||
|
||||
// aggregate slave QPS
|
||||
for (unsigned int i = 0; i < options.slaves.size(); i++) {
|
||||
// these packets already underwent validity check in
|
||||
// wait_for_slaves
|
||||
auto pkt_hdr = rte_pktmbuf_mtod(mbufs[i],
|
||||
struct pkt_hdr *);
|
||||
auto pld_qps = (struct pkt_payload_qps *)
|
||||
pkt_hdr->payload;
|
||||
uint32_t qps = rte_be_to_cpu_32(pld_qps->qps);
|
||||
uint32_t recved = rte_be_to_cpu_32(
|
||||
pld_qps->recved_pkts);
|
||||
uint32_t loss = rte_be_to_cpu_32(pld_qps->lost_pkts);
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
|
||||
"locore_main: received qps %d from client %d\n",
|
||||
qps, i);
|
||||
options.s_slave_qps.fetch_add(qps);
|
||||
options.s_slave_loss.fetch_add(loss);
|
||||
options.s_slave_recved.fetch_add(recved);
|
||||
rte_pktmbuf_free(mbufs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "locore_main: exited\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
dump_options()
|
||||
{
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO,
|
||||
"Configuration:\n"
|
||||
" verbosity = +%d\n"
|
||||
" run time = %d\n"
|
||||
" warmup time = %d\n"
|
||||
" output file = %s\n"
|
||||
" number of threads = %d\n"
|
||||
" interarrival dist = %s\n"
|
||||
" target qps = %d\n"
|
||||
" host IP = 0x%x\n"
|
||||
" pkt loss time = %u\n"
|
||||
" pkt loss failure threshold = %u\n"
|
||||
" portid = %d\n",
|
||||
ntr_get_level(NTR_DEP_USER1) - NTR_LEVEL_WARNING, options.run_time,
|
||||
options.warmup_time, options.output, CPU_COUNT(&options.cpu_set),
|
||||
options.ia_gen_str, options.target_qps, options.s_host_spec.ip,
|
||||
options.pkt_loss_time_ms, options.pkt_loss_failure_threshold,
|
||||
options.portid);
|
||||
|
||||
for (auto slave : options.slaves) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO,
|
||||
" slave = 0x%x@%x:%x:%x:%x:%x:%x\n", slave->ip,
|
||||
slave->mac_addr.addr_bytes[0],
|
||||
slave->mac_addr.addr_bytes[1],
|
||||
slave->mac_addr.addr_bytes[2],
|
||||
slave->mac_addr.addr_bytes[3],
|
||||
slave->mac_addr.addr_bytes[4],
|
||||
slave->mac_addr.addr_bytes[5]);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
usage()
|
||||
{
|
||||
fprintf(stdout,
|
||||
"Usage:\n"
|
||||
" -v(vv): verbose mode\n"
|
||||
" -s: server net spec\n"
|
||||
" -S: slave(rat)'s net spec (also turns on master mode)\n"
|
||||
" -t: run time\n"
|
||||
" -T: warmup time\n"
|
||||
" -h: display the information\n"
|
||||
" -o: output filename\n"
|
||||
" -A: affinity mask\n"
|
||||
" -i: inter-arrival time distribution\n"
|
||||
" -q: target qps\n"
|
||||
" -H: host net spec\n"
|
||||
" -L: pkt loss failure threshold\n"
|
||||
" -l: pkt loss time threshold\n");
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
std::ofstream log_file;
|
||||
bool has_host_spec = false;
|
||||
|
||||
ntr_init();
|
||||
|
||||
// init dpdk
|
||||
int ret = rte_eal_init(argc, argv);
|
||||
if (ret < 0) {
|
||||
rte_exit(EXIT_FAILURE, "rte_eal_init failed!\n");
|
||||
}
|
||||
|
||||
argc -= ret;
|
||||
argv += ret;
|
||||
|
||||
// set warning level
|
||||
ntr_set_level(NTR_DEP_USER1, NTR_LEVEL_WARNING);
|
||||
{
|
||||
int c;
|
||||
// parse arguments
|
||||
struct net_spec *ns;
|
||||
while ((c = getopt(argc, argv, "vs:S:t:T:ho:A:i:q:H:L:l:p:")) !=
|
||||
-1) {
|
||||
switch (c) {
|
||||
case 'v':
|
||||
ntr_set_level(NTR_DEP_USER1,
|
||||
ntr_get_level(NTR_DEP_USER1) + 1);
|
||||
break;
|
||||
case 's':
|
||||
if (str_to_netspec(optarg,
|
||||
&options.server_spec) != 0) {
|
||||
rte_exit(EXIT_FAILURE,
|
||||
"invalid server net spec.\n");
|
||||
}
|
||||
break;
|
||||
case 'S':
|
||||
ns = new struct net_spec;
|
||||
if (str_to_netspec(optarg, ns) != 0) {
|
||||
rte_exit(EXIT_FAILURE,
|
||||
"invalid client net spec\n");
|
||||
}
|
||||
options.slaves.push_back(ns);
|
||||
options.master_mode = 1;
|
||||
if (options.slaves.size() > MAX_SLAVES) {
|
||||
rte_exit(EXIT_FAILURE,
|
||||
"too many rats.\n");
|
||||
}
|
||||
break;
|
||||
case 't':
|
||||
options.run_time = strtol(optarg, nullptr, 10);
|
||||
break;
|
||||
case 'T':
|
||||
options.warmup_time = strtol(optarg, nullptr,
|
||||
10);
|
||||
break;
|
||||
case 'h':
|
||||
usage();
|
||||
rte_exit(EXIT_SUCCESS, "\n");
|
||||
case 'o':
|
||||
strncpy(options.output, optarg,
|
||||
sizeof(options.output) - 1);
|
||||
break;
|
||||
case 'A':
|
||||
cpulist_to_cpuset(optarg, &options.cpu_set);
|
||||
break;
|
||||
case 'i':
|
||||
strncpy(options.ia_gen_str, optarg,
|
||||
sizeof(options.ia_gen_str) - 1);
|
||||
break;
|
||||
case 'q':
|
||||
options.target_qps = strtoul(optarg, nullptr,
|
||||
10);
|
||||
break;
|
||||
case 'H':
|
||||
has_host_spec = true;
|
||||
if (str_to_netspec(optarg,
|
||||
&options.s_host_spec) != 0) {
|
||||
rte_exit(EXIT_FAILURE,
|
||||
"invalid host net spec.\n");
|
||||
}
|
||||
break;
|
||||
case 'L':
|
||||
options.pkt_loss_failure_threshold =
|
||||
strtoul(optarg, nullptr, 10);
|
||||
break;
|
||||
case 'l':
|
||||
options.pkt_loss_time_ms = strtoul(optarg,
|
||||
nullptr, 10);
|
||||
if (options.pkt_loss_time_ms == 0) {
|
||||
options.pkt_loss_time_ms = UINT32_MAX;
|
||||
}
|
||||
break;
|
||||
case 'p':
|
||||
options.portid = strtol(optarg, nullptr, 10);
|
||||
break;
|
||||
default:
|
||||
usage();
|
||||
rte_exit(EXIT_FAILURE, "unknown argument: %c\n",
|
||||
c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!has_host_spec) {
|
||||
rte_exit(EXIT_FAILURE, "must specify host IP\n");
|
||||
}
|
||||
|
||||
// init libtopo
|
||||
if (topo_init(ntr_get_level(NTR_DEP_USER1) - NTR_LEVEL_WARNING) !=
|
||||
0) {
|
||||
rte_exit(EXIT_FAILURE, "libtopo init failed!\n");
|
||||
}
|
||||
|
||||
// init nms
|
||||
if (nms_init(ntr_get_level(NTR_DEP_USER1) - NTR_LEVEL_WARNING) != 0) {
|
||||
rte_exit(EXIT_FAILURE, "failed to init libnms!\n");
|
||||
}
|
||||
|
||||
if (CPU_COUNT(&options.cpu_set) != 1) {
|
||||
rte_exit(EXIT_FAILURE, "must specify exactly one core\n");
|
||||
}
|
||||
int core_id = CPU_FFS(&options.cpu_set) - 1;
|
||||
|
||||
dump_options();
|
||||
|
||||
// configure memory and port
|
||||
struct port_conf pconf;
|
||||
struct device_conf dconf;
|
||||
struct mem_conf mconf;
|
||||
portconf_get(options.portid, &pconf);
|
||||
|
||||
if (!pconf.timesync) {
|
||||
options.s_hwtimestamp = false;
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING,
|
||||
"main: timesync disabled. hw timestamp unavailable.\n ");
|
||||
}
|
||||
|
||||
if (CPU_COUNT(&options.cpu_set) > 1) {
|
||||
int ffs = CPU_FFS(&options.cpu_set);
|
||||
CPU_ZERO(&options.cpu_set);
|
||||
CPU_SET(ffs - 1, &options.cpu_set);
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING, "cat only supports one thread, using only core %d.\n", ffs - 1);
|
||||
}
|
||||
|
||||
dconf.mtu = MAX_STANDARD_MTU;
|
||||
CPU_COPY(&options.cpu_set, &dconf.core_affinity);
|
||||
dconf.portid = options.portid;
|
||||
dconf.rss_hf = pconf.rss_hf;
|
||||
dconf.rx_offloads = pconf.rxoffload;
|
||||
dconf.tx_offloads = pconf.txoffload;
|
||||
dconf.timesync = pconf.timesync;
|
||||
|
||||
dconf.rx_fn = rx_add_timestamp;
|
||||
dconf.rx_user = nullptr;
|
||||
dconf.rx_ring_sz = 2048;
|
||||
dconf.tx_fn = tx_add_timestamp;
|
||||
dconf.tx_user = nullptr;
|
||||
dconf.tx_ring_sz = 2048;
|
||||
|
||||
mconf.cache_size = 64;
|
||||
mconf.priv_size = 0;
|
||||
mconf.num_elements = 4096;
|
||||
mconf.data_room_size = RTE_MBUF_DEFAULT_BUF_SIZE + MAX_STANDARD_MTU;
|
||||
mconf.max_pools = -1;
|
||||
|
||||
dpdk_init(&dconf, &mconf);
|
||||
|
||||
if (rte_eth_macaddr_get(options.portid,
|
||||
&options.s_host_spec.mac_addr) != 0) {
|
||||
rte_exit(EXIT_FAILURE, "cannot get mac address of port %d\n",
|
||||
options.portid);
|
||||
}
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO,
|
||||
"Configured port %d with mac addr %x:%x:%x:%x:%x:%x\n",
|
||||
options.portid, options.s_host_spec.mac_addr.addr_bytes[0],
|
||||
options.s_host_spec.mac_addr.addr_bytes[1],
|
||||
options.s_host_spec.mac_addr.addr_bytes[2],
|
||||
options.s_host_spec.mac_addr.addr_bytes[3],
|
||||
options.s_host_spec.mac_addr.addr_bytes[4],
|
||||
options.s_host_spec.mac_addr.addr_bytes[5]);
|
||||
|
||||
// create default generator
|
||||
options.s_iagen = createGenerator(options.ia_gen_str);
|
||||
if (options.s_iagen == nullptr) {
|
||||
rte_exit(EXIT_FAILURE, "invalid generator string %s\n",
|
||||
options.ia_gen_str);
|
||||
}
|
||||
options.s_iagen->set_lambda((double)options.target_qps);
|
||||
|
||||
// open log file for writing
|
||||
log_file.open(options.output, std::ofstream::out);
|
||||
if (!log_file) {
|
||||
rte_exit(EXIT_FAILURE, "failed to open log file %s\n",
|
||||
options.output);
|
||||
}
|
||||
|
||||
sleep(INIT_DELAY);
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO,
|
||||
"main: launching thread on core %d\n", core_id);
|
||||
if (rte_eal_remote_launch(locore_main, nullptr, core_id) != 0) {
|
||||
rte_exit(EXIT_FAILURE, "failed to launch function on locore\n");
|
||||
}
|
||||
|
||||
// XXX: poor man's timer
|
||||
uint32_t second = 0;
|
||||
while (true) {
|
||||
if (second >= options.warmup_time) {
|
||||
options.s_record.store(1);
|
||||
}
|
||||
if (second >= options.run_time + options.warmup_time) {
|
||||
options.s_stop.store(true);
|
||||
break;
|
||||
}
|
||||
usleep(S2US);
|
||||
second++;
|
||||
}
|
||||
|
||||
if (rte_eal_wait_lcore(core_id) < 0)
|
||||
rte_exit(EXIT_FAILURE, "failed to wait for job completion\n");
|
||||
|
||||
// calculate QPS
|
||||
uint32_t qps = (double)options.s_recved_pkts.load() /
|
||||
(((double)(options.s_end_time.load() -
|
||||
options.s_start_time.load()) /
|
||||
(double)S2NS));
|
||||
qps += options.s_slave_qps.load();
|
||||
|
||||
// dump stats
|
||||
log_file << qps << ',' << options.s_recved_pkts.load() << ','
|
||||
<< options.s_pkt_loss.load() << ','
|
||||
<< options.s_slave_recved.load() << ','
|
||||
<< options.s_slave_loss.load() << std::endl;
|
||||
|
||||
for (auto it : options.s_data) {
|
||||
if (it->valid) {
|
||||
log_file << it->clt_sw_rx << ',' << it->clt_sw_tx << ','
|
||||
<< it->clt_hw_rx << ',' << it->clt_hw_tx << ','
|
||||
<< it->srv_sw_rx << ',' << it->srv_sw_tx << ','
|
||||
<< it->srv_hw_rx << ',' << it->srv_hw_tx
|
||||
<< std::endl;
|
||||
}
|
||||
delete it;
|
||||
}
|
||||
log_file.close();
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO,
|
||||
"qps = %d, recved = %d, loss = %d, slave recved = %d, slave loss = %d\n",
|
||||
qps, options.s_recved_pkts.load(), options.s_pkt_loss.load(),
|
||||
options.s_slave_recved.load(), options.s_slave_loss.load());
|
||||
|
||||
// clean up
|
||||
dpdk_cleanup(&dconf);
|
||||
|
||||
return 0;
|
||||
}
|
701
net/khat.cc
701
net/khat.cc
@ -1,701 +0,0 @@
|
||||
#include <atomic>
|
||||
#include <cassert>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <ctime>
|
||||
#include <vector>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <sys/cpuset.h>
|
||||
#include <sys/endian.h>
|
||||
#include <sys/sched.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <topo.h>
|
||||
|
||||
#include <rte_common.h>
|
||||
#include <rte_config.h>
|
||||
#include <rte_cycles.h>
|
||||
#include <rte_eal.h>
|
||||
#include <rte_ethdev.h>
|
||||
#include <rte_ether.h>
|
||||
#include <rte_launch.h>
|
||||
#include <rte_lcore.h>
|
||||
#include <rte_mbuf.h>
|
||||
|
||||
#include "ntr.h"
|
||||
|
||||
//#include "gen.hh"
|
||||
#include "net/netsup.hh"
|
||||
#include "net/pkt.hh"
|
||||
#include "nms.h"
|
||||
#include "rte_byteorder.h"
|
||||
|
||||
constexpr static unsigned int BURST_SIZE = 32;
|
||||
constexpr static unsigned int CACHELINE_SIZE = 64;
|
||||
constexpr static uint16_t THREAD_LOAD_BUFFER_SZ = 16384;
|
||||
|
||||
struct probe_state_t {
|
||||
struct net_spec dst;
|
||||
struct conn_spec cspec {
|
||||
.dst = &dst
|
||||
};
|
||||
uint64_t last_sw_rx;
|
||||
uint64_t last_sw_tx;
|
||||
uint64_t last_hw_rx;
|
||||
uint32_t epoch;
|
||||
};
|
||||
|
||||
// keep track of the probe state
|
||||
// when a probe packet first arrives this state is set to be influx and the
|
||||
// rte_mbuf's userdata is set to PROBE_MAGIC which prevents other probe packets
|
||||
// to be processed when the server sends the probe stats back to user influx is
|
||||
// released this is to guarantee that the server only processes one probe packet
|
||||
// at the time
|
||||
// XXX: also this can be attached to the mbuf itself and processed by the lcore
|
||||
// thread
|
||||
// I kept this global because globally there could be only one pending
|
||||
// probe request and rx_add_timestamp can save their shit here too
|
||||
struct thread_info {
|
||||
int tid;
|
||||
int rxqid;
|
||||
int txqid;
|
||||
int lcore_id;
|
||||
int node_id;
|
||||
void *cache_lines;
|
||||
void *load_buffer;
|
||||
};
|
||||
|
||||
struct options_t {
|
||||
// config
|
||||
int num_threads { 1 };
|
||||
cpuset_t cpu_set = CPUSET_T_INITIALIZER(0x2); // 2nd core
|
||||
bool jumbo_frame_enabled {
|
||||
false
|
||||
}; // setting this to true changes mbuf size and mtu
|
||||
int port_mtu { MAX_STANDARD_MTU };
|
||||
int thread_cacheline_cnt = { 1600 }; // 100MB data per thread
|
||||
uint16_t portid { 0 };
|
||||
|
||||
// states
|
||||
struct net_spec s_host_spec { };
|
||||
std::vector<struct thread_info *> s_thr_info;
|
||||
int probe_state_offset { 0 };
|
||||
bool s_hwtimestamp { true };
|
||||
|
||||
struct probe_state_t s_probe_info;
|
||||
std::atomic<bool> is_probing { false };
|
||||
};
|
||||
|
||||
struct options_t options;
|
||||
|
||||
static bool
|
||||
mbuf_is_probe_valid(struct rte_mbuf *pkt)
|
||||
{
|
||||
return *RTE_MBUF_DYNFIELD(pkt, options.probe_state_offset, bool *);
|
||||
}
|
||||
|
||||
static void
|
||||
mbuf_set_probe_valid(struct rte_mbuf *pkt, bool b)
|
||||
{
|
||||
*RTE_MBUF_DYNFIELD(pkt, options.probe_state_offset, bool *) = b;
|
||||
}
|
||||
|
||||
static uint16_t
|
||||
rx_add_timestamp(uint16_t port __rte_unused, uint16_t qidx __rte_unused,
|
||||
struct rte_mbuf **pkts, uint16_t nb_pkts, uint16_t max_pkts __rte_unused,
|
||||
void *_ __rte_unused)
|
||||
{
|
||||
int rc = 0;
|
||||
uint64_t now = topo_uptime_ns();
|
||||
struct timespec ts { };
|
||||
struct pkt_hdr *pkt_data;
|
||||
for (int i = 0; i < nb_pkts; i++) {
|
||||
pkt_data = check_valid_packet(pkts[i],
|
||||
&options.s_host_spec.mac_addr);
|
||||
|
||||
if (pkt_data == nullptr) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
|
||||
"rx_add_timestamp: ignoring invalid packet %p.\n",
|
||||
(void *)pkts[i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (rte_be_to_cpu_16(pkt_data->type) == PKT_TYPE_PROBE) {
|
||||
bool cmp = false;
|
||||
mbuf_set_probe_valid(pkts[i], false);
|
||||
if (options.is_probing.compare_exchange_strong(cmp,
|
||||
true)) {
|
||||
options.s_probe_info.last_sw_rx = now;
|
||||
if (options.s_hwtimestamp) {
|
||||
if ((rc = rte_eth_timesync_read_rx_timestamp(
|
||||
port, &ts,
|
||||
pkts[i]->timesync & 0x3)) ==
|
||||
0) {
|
||||
options.s_probe_info
|
||||
.last_hw_rx = ts.tv_nsec +
|
||||
ts.tv_sec * S2NS;
|
||||
ntr(NTR_DEP_USER1,
|
||||
NTR_LEVEL_DEBUG,
|
||||
"rx_add_timestamp: tagged packet %p with sw rx: %lu hw rx:%lu.\n",
|
||||
(void *)pkts[i],
|
||||
options.s_probe_info
|
||||
.last_sw_rx,
|
||||
options.s_probe_info
|
||||
.last_hw_rx);
|
||||
mbuf_set_probe_valid(pkts[i],
|
||||
true);
|
||||
} else {
|
||||
options.is_probing.store(false);
|
||||
ntr(NTR_DEP_USER1,
|
||||
NTR_LEVEL_WARNING,
|
||||
"rx_add_timestamp: packet %p not tagged - failed to read hw rx timestamp: %d.\n",
|
||||
(void *)pkts[i], rc);
|
||||
}
|
||||
} else {
|
||||
mbuf_set_probe_valid(pkts[i], true);
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
|
||||
"rx_add_timestamp: tagged packet %p with sw rx only: %lu.\n",
|
||||
(void *)pkts[i], now);
|
||||
}
|
||||
} else {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
|
||||
"rx_add_timestamp: packet %p not tagged - server is probing.\n",
|
||||
(void *)pkts[i]);
|
||||
}
|
||||
} else {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
|
||||
"rx_add_timestamp: packet %p not tagged - not PROBE packet: type %d.\n",
|
||||
(void *)pkts[i], rte_be_to_cpu_16(pkt_data->type));
|
||||
}
|
||||
}
|
||||
|
||||
return nb_pkts;
|
||||
}
|
||||
|
||||
static uint16_t
|
||||
tx_add_timestamp(uint16_t port __rte_unused, uint16_t qidx __rte_unused,
|
||||
struct rte_mbuf **pkts, uint16_t nb_pkts, void *_ __rte_unused)
|
||||
{
|
||||
uint64_t now = topo_uptime_ns();
|
||||
struct pkt_hdr *pkt_data;
|
||||
|
||||
for (int i = 0; i < nb_pkts; i++) {
|
||||
|
||||
pkt_data = check_valid_packet(pkts[i],
|
||||
&options.s_host_spec.mac_addr);
|
||||
|
||||
if (pkt_data == nullptr) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
|
||||
"tx_add_timestamp: ignoring invalid packet %p.\n",
|
||||
(void *)pkts[i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (rte_be_to_cpu_16(pkt_data->type) == PKT_TYPE_PROBE_RESP) {
|
||||
// this packet is the response to PROBE packets
|
||||
|
||||
// at this time the packet is not sent to the NIC yet so
|
||||
// the state must be waiting stats
|
||||
assert(options.is_probing.load() &&
|
||||
mbuf_is_probe_valid(pkts[i]));
|
||||
|
||||
options.s_probe_info.last_sw_tx = now;
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
|
||||
"tx_add_timestamp: tagged packet %p with sw tx %lu\n",
|
||||
(void *)pkts[i], options.s_probe_info.last_sw_tx);
|
||||
} else {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
|
||||
"tx_add_timestamp: packet %p not tagged - type %d\n",
|
||||
(void *)pkts[i], pkt_data->type);
|
||||
}
|
||||
}
|
||||
|
||||
return nb_pkts;
|
||||
}
|
||||
|
||||
static void
|
||||
worker_cpu_load(unsigned long us)
|
||||
{
|
||||
uint64_t now = topo_uptime_ns();
|
||||
while(true) {
|
||||
uint64_t cur = topo_uptime_ns();
|
||||
if (cur - now >= us * 1000) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
worker_memory_load(int tid, uint32_t which, uint32_t load)
|
||||
{
|
||||
uint32_t start_cacheline = which % (options.thread_cacheline_cnt * options.s_thr_info.size());
|
||||
uint32_t thrd = start_cacheline / options.thread_cacheline_cnt;
|
||||
uint32_t start = start_cacheline % options.thread_cacheline_cnt;
|
||||
struct thread_info * cur = options.s_thr_info.at(tid);
|
||||
struct thread_info * tgt = options.s_thr_info.at(thrd);
|
||||
for (uint32_t i = 0; i < load; i++) {
|
||||
*(uint32_t *)cur->load_buffer = *(uint32_t *)((char *)tgt->cache_lines + ((start + i) % options.thread_cacheline_cnt) * CACHELINE_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
locore_main(void *ti)
|
||||
{
|
||||
auto tinfo = (struct thread_info *)ti;
|
||||
struct rte_mbuf *bufs[BURST_SIZE];
|
||||
// + 1 because it might involve an extra PKT_TYPE_STAT packet
|
||||
// when all tx timestamps are ready
|
||||
struct rte_mbuf *tx_bufs[BURST_SIZE];
|
||||
struct pkt_hdr *pkt_data;
|
||||
// XXX: hack hardcode to be larger than MTU
|
||||
|
||||
bool pending_probe = false;
|
||||
|
||||
if (rte_eth_dev_socket_id(options.portid) > 0 &&
|
||||
rte_eth_dev_socket_id(options.portid) != (int)rte_socket_id()) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING,
|
||||
"locore_main <thread %d>: WARNING, port %d is on remote NUMA node to "
|
||||
"polling thread.\n\tPerformance will "
|
||||
"not be optimal.\n",
|
||||
tinfo->tid, options.portid);
|
||||
}
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO,
|
||||
"locore_main <thread %d>: running on locore %d with txqid %d and rxqid %d.\n",
|
||||
tinfo->tid, rte_lcore_id(), tinfo->txqid, tinfo->rxqid);
|
||||
|
||||
while (true) {
|
||||
uint16_t nb_tx = 0;
|
||||
const uint16_t nb_rx = rte_eth_rx_burst(options.portid,
|
||||
tinfo->rxqid, bufs, BURST_SIZE);
|
||||
struct rte_mbuf *pkt_buf;
|
||||
struct pkt_hdr *tx_data;
|
||||
|
||||
for (int i = 0; i < nb_rx; i++) {
|
||||
// XXX: optimization: in rx_add_timestamp every packet
|
||||
// is already validated once can just mark valid packet
|
||||
// with a value so we can avoid this redundant check
|
||||
pkt_data = check_valid_packet(bufs[i],
|
||||
&options.s_host_spec.mac_addr);
|
||||
|
||||
if (pkt_data == nullptr) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
|
||||
"locore_main <thread %d>: skipping invalid packet %p.\n",
|
||||
tinfo->tid, (void *)bufs[i]);
|
||||
// dump_pkt(bufs[i]);
|
||||
rte_pktmbuf_free(bufs[i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
NTR_PKT(NTR_DEP_USER1, NTR_LEVEL_DEBUG, pkt_data,
|
||||
"locore_main <thread %d>: received packet ", tinfo->tid);
|
||||
switch (rte_be_to_cpu_16(pkt_data->type)) {
|
||||
case PKT_TYPE_PROBE: {
|
||||
if (mbuf_is_probe_valid(bufs[i])) {
|
||||
// send back probe_resp pkt to probe for
|
||||
// return latency
|
||||
pending_probe = true;
|
||||
|
||||
// book keep probe results
|
||||
options.s_probe_info.epoch =
|
||||
rte_be_to_cpu_32(
|
||||
((struct pkt_payload_epoch *)
|
||||
pkt_data->payload)
|
||||
->epoch);
|
||||
|
||||
pkt_hdr_to_netspec(pkt_data,
|
||||
&options.s_probe_info.dst,
|
||||
&options.s_probe_info.cspec
|
||||
.dst_port,
|
||||
nullptr,
|
||||
&options.s_probe_info.cspec
|
||||
.src_port);
|
||||
|
||||
options.s_probe_info.cspec.src =
|
||||
&options.s_host_spec;
|
||||
|
||||
if (alloc_pkt_hdr(mempool_get(
|
||||
tinfo->node_id),
|
||||
PKT_TYPE_PROBE_RESP,
|
||||
&options.s_probe_info.cspec, 0,
|
||||
&pkt_buf, &tx_data) != 0) {
|
||||
rte_exit(EXIT_FAILURE,
|
||||
"failed to allocate pkt\n");
|
||||
}
|
||||
|
||||
rte_memcpy(tx_data->payload,
|
||||
pkt_data->payload,
|
||||
sizeof(struct pkt_payload_epoch));
|
||||
|
||||
mbuf_set_probe_valid(pkt_buf, true);
|
||||
|
||||
// queue for burst send
|
||||
NTR_PKT(NTR_DEP_USER1, NTR_LEVEL_DEBUG, tx_data,
|
||||
"locore_main <thread %d>: sending packet ", tinfo->tid);
|
||||
tx_bufs[nb_tx++] = pkt_buf;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PKT_TYPE_LOAD: {
|
||||
struct conn_spec cspec;
|
||||
struct net_spec src;
|
||||
struct net_spec dst;
|
||||
|
||||
// touch the unused data to pretend that we read
|
||||
// those dummy fields
|
||||
memcpy(tinfo->load_buffer, pkt_data->payload,
|
||||
MIN(bufs[i]->data_len -
|
||||
sizeof(struct pkt_hdr),
|
||||
THREAD_LOAD_BUFFER_SZ));
|
||||
|
||||
// perform the load
|
||||
auto pld = (struct pkt_payload_load *)
|
||||
pkt_data->payload;
|
||||
|
||||
uint32_t load_type = rte_be_to_cpu_32(pld->type);
|
||||
uint32_t load_arg0 = rte_be_to_cpu_32(pld->arg0);
|
||||
uint32_t load_arg1 = rte_be_to_cpu_32(pld->arg1);
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
|
||||
"locore_main <thread %d>: LOAD type %d, arg0 %d, arg1 %d\n",
|
||||
tinfo->tid, load_type, load_arg0, load_arg1);
|
||||
|
||||
if (load_type == LOAD_TYPE_CPU) {
|
||||
worker_cpu_load(load_arg0);
|
||||
} else if (load_type == LOAD_TYPE_MEM) {
|
||||
worker_memory_load(tinfo->tid, load_arg0, load_arg1);
|
||||
} else {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING,
|
||||
"locore_main <thread %d>: unknown LOAD type %d, ignoring...", tinfo->tid, load_type);
|
||||
break;
|
||||
}
|
||||
|
||||
// reply
|
||||
pkt_hdr_to_netspec(pkt_data, &src,
|
||||
&cspec.dst_port, &dst, &cspec.src_port);
|
||||
cspec.dst = &src;
|
||||
cspec.src = &dst;
|
||||
|
||||
// printf("LOAD PKT SIZE: %d\n",
|
||||
// bufs[i]->data_len); we reply to load packet
|
||||
// regardless of the server state
|
||||
if (alloc_pkt_hdr(mempool_get(tinfo->node_id),
|
||||
PKT_TYPE_LOAD_RESP, &cspec, 0, &pkt_buf,
|
||||
&tx_data) != 0) {
|
||||
rte_exit(EXIT_FAILURE,
|
||||
"failed to allocate pkt\n");
|
||||
}
|
||||
|
||||
rte_memcpy(tx_data->payload, pkt_data->payload,
|
||||
sizeof(struct pkt_payload_load));
|
||||
|
||||
// queue for burst send
|
||||
NTR_PKT(NTR_DEP_USER1, NTR_LEVEL_DEBUG, tx_data,
|
||||
"locore_main <thread %d>: sending packet ", tinfo->tid);
|
||||
tx_bufs[nb_tx++] = pkt_buf;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
|
||||
"locore_main <thread %d>: ignoring packet %p with unknown type %d.\n",
|
||||
tinfo->tid, (void *)bufs[i],
|
||||
rte_be_to_cpu_16(pkt_data->type));
|
||||
break;
|
||||
}
|
||||
rte_pktmbuf_free(bufs[i]);
|
||||
}
|
||||
|
||||
// send all packets
|
||||
tx_burst_all(options.portid, tinfo->txqid, tx_bufs, nb_tx);
|
||||
|
||||
// we wanna check every loop not only when there are packets
|
||||
if (pending_probe) {
|
||||
assert(options.is_probing.load());
|
||||
struct timespec ts { };
|
||||
struct pkt_payload_stat *stat;
|
||||
int status = 0;
|
||||
if (options.s_hwtimestamp) {
|
||||
if ((status = rte_eth_timesync_read_tx_timestamp(
|
||||
options.portid, &ts)) == 0) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
|
||||
"locore_main <thread %d>: obtained hw tx timestamp %lu.\n",
|
||||
tinfo->tid,
|
||||
(ts.tv_sec * S2NS + ts.tv_nsec));
|
||||
} else {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
|
||||
"locore_main <thread %d>: failed to obtain hw tx timestamp: %d.\n",
|
||||
tinfo->tid, status);
|
||||
}
|
||||
}
|
||||
if (status == 0) {
|
||||
// now we have everything we need
|
||||
|
||||
if (alloc_pkt_hdr(mempool_get(tinfo->node_id),
|
||||
PKT_TYPE_STAT, &options.s_probe_info.cspec, 0,
|
||||
&pkt_buf, &tx_data) != 0) {
|
||||
rte_exit(EXIT_FAILURE,
|
||||
"failed to alloc pkt_buf\n");
|
||||
}
|
||||
|
||||
// populate stats
|
||||
stat = (struct pkt_payload_stat *)tx_data->payload;
|
||||
stat->epoch = rte_cpu_to_be_32(
|
||||
options.s_probe_info.epoch);
|
||||
if (options.s_hwtimestamp) {
|
||||
stat->hw_rx = rte_cpu_to_be_64(
|
||||
options.s_probe_info.last_hw_rx);
|
||||
stat->hw_tx = rte_cpu_to_be_64(
|
||||
ts.tv_nsec + ts.tv_sec * S2NS);
|
||||
} else {
|
||||
stat->hw_rx = 0;
|
||||
stat->hw_tx = 0;
|
||||
}
|
||||
stat->sw_rx = rte_cpu_to_be_64(
|
||||
options.s_probe_info.last_sw_rx);
|
||||
stat->sw_tx = rte_cpu_to_be_64(
|
||||
options.s_probe_info.last_sw_tx);
|
||||
|
||||
// send the packet
|
||||
tx_burst_all(options.portid, tinfo->txqid, &pkt_buf, 1);
|
||||
|
||||
// release flux
|
||||
pending_probe = false;
|
||||
options.is_probing.store(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
usage()
|
||||
{
|
||||
fprintf(stdout,
|
||||
"Usage:\n"
|
||||
" -v(vv): verbose mode\n"
|
||||
" -h: seek help\n"
|
||||
" -A: cpu list for worker threads\n"
|
||||
" -m: enable memory load generator(MLG)\n"
|
||||
" -b: MLG trunk size\n"
|
||||
" -x: MLG thread affinity mask\n"
|
||||
" -X: MLG target domain affinity mask\n"
|
||||
" -S: MLG shared buffer\n"
|
||||
" -H: host spec\n"
|
||||
" -J: enable jumbo frames\n"
|
||||
" -p: port id\n");
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
static void
|
||||
dump_options()
|
||||
{
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO,
|
||||
"main: khat configuration:\n"
|
||||
" verbosity: +%d\n"
|
||||
" thread count: %d\n"
|
||||
" ip: 0x%x\n"
|
||||
" jumbo frame: %d\n"
|
||||
" port id: %d\n",
|
||||
ntr_get_level(NTR_DEP_USER1) - NTR_LEVEL_WARNING,
|
||||
options.num_threads, options.s_host_spec.ip,
|
||||
options.jumbo_frame_enabled, options.portid);
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
bool has_host_spec { false };
|
||||
struct mem_conf mconf;
|
||||
struct device_conf dconf;
|
||||
|
||||
ntr_init();
|
||||
|
||||
// init dpdk
|
||||
int ret = rte_eal_init(argc, argv);
|
||||
if (ret < 0) {
|
||||
rte_exit(EXIT_FAILURE, "rte_eal_init failed!\n");
|
||||
}
|
||||
|
||||
argc -= ret;
|
||||
argv += ret;
|
||||
|
||||
// set warning level
|
||||
ntr_set_level(NTR_DEP_USER1, NTR_LEVEL_WARNING);
|
||||
{
|
||||
int c;
|
||||
// parse arguments
|
||||
while ((c = getopt(argc, argv, "hvA:H:Jp:")) != -1) {
|
||||
switch (c) {
|
||||
case 'v':
|
||||
ntr_set_level(NTR_DEP_USER1,
|
||||
ntr_get_level(NTR_DEP_USER1) + 1);
|
||||
break;
|
||||
case 'h':
|
||||
usage();
|
||||
rte_exit(EXIT_SUCCESS, "\n");
|
||||
case 'A':
|
||||
cpulist_to_cpuset(optarg, &options.cpu_set);
|
||||
options.num_threads = CPU_COUNT(
|
||||
&options.cpu_set);
|
||||
if (options.num_threads == 0) {
|
||||
rte_exit(EXIT_FAILURE,
|
||||
"must run at least one thread\n");
|
||||
}
|
||||
break;
|
||||
case 'H':
|
||||
if (str_to_netspec(optarg,
|
||||
&options.s_host_spec) != 0) {
|
||||
rte_exit(EXIT_FAILURE,
|
||||
"invalid host spec\n");
|
||||
}
|
||||
has_host_spec = true;
|
||||
break;
|
||||
case 'J':
|
||||
options.jumbo_frame_enabled = true;
|
||||
options.port_mtu = MAX_JUMBO_MTU;
|
||||
break;
|
||||
case 'p':
|
||||
options.portid = atoi(optarg);
|
||||
break;
|
||||
default:
|
||||
usage();
|
||||
rte_exit(EXIT_SUCCESS, "unknown argument: %c",
|
||||
c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!has_host_spec) {
|
||||
rte_exit(EXIT_FAILURE, "Must specify host spec\n");
|
||||
}
|
||||
|
||||
// init libtopo
|
||||
if (topo_init(ntr_get_level(NTR_DEP_USER1) - NTR_LEVEL_WARNING) !=
|
||||
0) {
|
||||
rte_exit(EXIT_FAILURE, "libtopo init failed!\n");
|
||||
}
|
||||
|
||||
// init libnms
|
||||
if (nms_init(ntr_get_level(NTR_DEP_USER1) - NTR_LEVEL_WARNING) != 0) {
|
||||
rte_exit(EXIT_FAILURE, "libnms init failed!\n");
|
||||
}
|
||||
|
||||
dump_options();
|
||||
|
||||
// register dynamic field
|
||||
struct rte_mbuf_dynfield rte_mbuf_dynfield_probe_flag = {
|
||||
.name = "rte_mbuf_dynfield_probe_valid",
|
||||
.size = sizeof(bool),
|
||||
.align = __alignof__(uint32_t),
|
||||
.flags = 0
|
||||
};
|
||||
options.probe_state_offset = rte_mbuf_dynfield_register(
|
||||
&rte_mbuf_dynfield_probe_flag);
|
||||
if (options.probe_state_offset == -1) {
|
||||
rte_exit(EXIT_FAILURE, "failed to register dynamic field: %d\n",
|
||||
rte_errno);
|
||||
}
|
||||
|
||||
// configure memory and port
|
||||
struct port_conf pconf;
|
||||
portconf_get(options.portid, &pconf);
|
||||
if (!pconf.timesync) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING,
|
||||
"main: timesync disabled. hw timestamp unavailable.\n ");
|
||||
options.s_hwtimestamp = false;
|
||||
}
|
||||
dconf.mtu = options.port_mtu;
|
||||
CPU_COPY(&options.cpu_set, &dconf.core_affinity);
|
||||
dconf.portid = options.portid;
|
||||
dconf.rss_hf = pconf.rss_hf;
|
||||
dconf.rx_offloads = pconf.rxoffload;
|
||||
dconf.tx_offloads = pconf.txoffload;
|
||||
dconf.timesync = pconf.timesync;
|
||||
|
||||
dconf.rx_fn = rx_add_timestamp;
|
||||
dconf.rx_user = nullptr;
|
||||
dconf.rx_ring_sz = 2048;
|
||||
dconf.tx_fn = tx_add_timestamp;
|
||||
dconf.tx_user = nullptr;
|
||||
dconf.tx_ring_sz = 2048;
|
||||
|
||||
mconf.cache_size = 512;
|
||||
mconf.priv_size = 0;
|
||||
mconf.num_elements = (dconf.rx_ring_sz + dconf.tx_ring_sz) *
|
||||
rte_lcore_count() / rte_socket_count();
|
||||
mconf.data_room_size = RTE_MBUF_DEFAULT_BUF_SIZE + MAX_JUMBO_MTU -
|
||||
MAX_STANDARD_MTU;
|
||||
mconf.max_pools = -1;
|
||||
|
||||
dpdk_init(&dconf, &mconf);
|
||||
|
||||
if (rte_eth_macaddr_get(options.portid,
|
||||
&options.s_host_spec.mac_addr) != 0) {
|
||||
rte_exit(EXIT_FAILURE, "cannot get mac address of port %d\n",
|
||||
options.portid);
|
||||
}
|
||||
|
||||
// init threads
|
||||
uint32_t cpu_idx = CPU_FFS(&options.cpu_set);
|
||||
uint32_t tid = 0;
|
||||
while (cpu_idx != 0) {
|
||||
uint32_t lcore_id = cpu_idx - 1;
|
||||
uint32_t node_id = rte_lcore_to_socket_id(lcore_id);
|
||||
auto *tinfo = (struct thread_info *)nms_malloc(node_id,
|
||||
sizeof(struct thread_info));
|
||||
tinfo->cache_lines = nms_malloc(node_id,
|
||||
CACHELINE_SIZE * options.thread_cacheline_cnt);
|
||||
tinfo->load_buffer = nms_malloc(node_id,
|
||||
THREAD_LOAD_BUFFER_SZ);
|
||||
tinfo->tid = tid;
|
||||
tinfo->lcore_id = lcore_id;
|
||||
tinfo->node_id = node_id;
|
||||
tinfo->rxqid = tid;
|
||||
tinfo->txqid = tid;
|
||||
options.s_thr_info.push_back(tinfo);
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO,
|
||||
"main: thread %d assigned to cpu %d, node %d\n", tinfo->tid,
|
||||
tinfo->lcore_id, topo_core_to_numa(lcore_id));
|
||||
|
||||
tid++;
|
||||
CPU_CLR(cpu_idx - 1, &options.cpu_set);
|
||||
cpu_idx = CPU_FFS(&options.cpu_set);
|
||||
}
|
||||
|
||||
sleep(INIT_DELAY);
|
||||
|
||||
for (int i = 0; i < options.num_threads; i++) {
|
||||
struct thread_info *tinfo = options.s_thr_info.at(i);
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO,
|
||||
"main: launching thread %d on locore %d\n", tinfo->tid,
|
||||
tinfo->lcore_id);
|
||||
if (rte_eal_remote_launch(locore_main,
|
||||
(void *)options.s_thr_info.at(i),
|
||||
tinfo->lcore_id) != 0) {
|
||||
rte_exit(EXIT_FAILURE,
|
||||
"failed to launch function on locore %d\n",
|
||||
tinfo->lcore_id);
|
||||
}
|
||||
}
|
||||
|
||||
while (true) {
|
||||
usleep(S2US);
|
||||
}
|
||||
|
||||
// shouldn't get here
|
||||
// clean up
|
||||
for (int i = 0; i < options.num_threads; i++) {
|
||||
struct thread_info *tinfo = options.s_thr_info.at(i);
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO,
|
||||
"main: waiting for locore %d...\n", tinfo->lcore_id);
|
||||
if (rte_eal_wait_lcore(tinfo->lcore_id) != 0) {
|
||||
rte_exit(EXIT_FAILURE, "failed to wait for locore %d\n",
|
||||
tinfo->lcore_id);
|
||||
}
|
||||
}
|
||||
|
||||
dpdk_cleanup(&dconf);
|
||||
|
||||
return 0;
|
||||
}
|
@ -1,204 +0,0 @@
|
||||
#include "net/netsup.hh"
|
||||
#include <cstdlib>
|
||||
|
||||
#include "rte_build_config.h"
|
||||
#include "rte_common.h"
|
||||
#include "rte_config.h"
|
||||
#include "rte_ether.h"
|
||||
#include "rte_lcore.h"
|
||||
#include "rte_mempool.h"
|
||||
#include "rte_mbuf.h"
|
||||
#include "rte_errno.h"
|
||||
#include "rte_ethdev.h"
|
||||
|
||||
#include "ntr.h"
|
||||
|
||||
static struct rte_mempool *g_mempools[MAX_NUMA_NODES] = {nullptr};
|
||||
static unsigned int g_mempool_sz = 0;
|
||||
|
||||
static void
|
||||
mempool_init(struct mem_conf *mconf)
|
||||
{
|
||||
struct rte_mempool * mbuf_pool;
|
||||
char mempool_name[64];
|
||||
|
||||
for (int i = 0; i < (int)rte_socket_count(); i++) {
|
||||
uint32_t nodeid = i;
|
||||
// ntr(NTR_DEP_USER1, NTR_LEVEL_INFO,
|
||||
// "mempool_init: creating mempool for node %d\n", nodeid);
|
||||
|
||||
// create one mbuf pool per socket
|
||||
snprintf(mempool_name, sizeof(mempool_name), "net_mempool_%d", nodeid);
|
||||
|
||||
mbuf_pool = rte_pktmbuf_pool_create(mempool_name, mconf->num_elements,
|
||||
mconf->cache_size, mconf->priv_size,
|
||||
mconf->data_room_size, nodeid);
|
||||
|
||||
if (mbuf_pool == nullptr) {
|
||||
rte_exit(EXIT_FAILURE, "cannot create mbuf pool: %d\n", rte_errno);
|
||||
}
|
||||
|
||||
g_mempools[nodeid] = mbuf_pool;
|
||||
g_mempool_sz++;
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "mempool_init: created mempool for node %d\n", nodeid);
|
||||
}
|
||||
}
|
||||
|
||||
struct rte_mempool *
|
||||
mempool_get(int nodeid)
|
||||
{
|
||||
if ((unsigned int)nodeid < g_mempool_sz) {
|
||||
return g_mempools[nodeid];
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
static void
|
||||
port_init(struct device_conf *dconf)
|
||||
{
|
||||
struct rte_ether_addr addr;
|
||||
struct rte_eth_dev_info dev_info {
|
||||
};
|
||||
struct rte_eth_conf port_conf;
|
||||
struct rte_eth_txconf txconf {
|
||||
};
|
||||
struct rte_eth_rxconf rxconf {
|
||||
};
|
||||
int ret;
|
||||
|
||||
int num_threads = CPU_COUNT(&dconf->core_affinity);
|
||||
if (rte_eth_dev_count_avail() == 0) {
|
||||
rte_exit(EXIT_FAILURE, "number of ports must be > 0\n");
|
||||
}
|
||||
|
||||
if (!rte_eth_dev_is_valid_port(dconf->portid)) {
|
||||
rte_exit(EXIT_FAILURE, "cannot find port %d\n", dconf->portid);
|
||||
}
|
||||
|
||||
if ((ret = rte_eth_macaddr_get(dconf->portid, &addr)) != 0) {
|
||||
rte_exit(EXIT_FAILURE, "cannot get mac address of port: %d\n", ret);
|
||||
}
|
||||
|
||||
ret = rte_eth_dev_info_get(dconf->portid, &dev_info);
|
||||
if (ret != 0) {
|
||||
rte_exit(EXIT_FAILURE, "failed to get dev info: %d", ret);
|
||||
}
|
||||
|
||||
memset(&port_conf, 0, sizeof(struct rte_eth_conf));
|
||||
port_conf.rxmode.mtu = dconf->mtu;
|
||||
port_conf.rxmode.mq_mode = RTE_ETH_MQ_RX_RSS;
|
||||
port_conf.rx_adv_conf.rss_conf.rss_key = nullptr;
|
||||
port_conf.rx_adv_conf.rss_conf.rss_hf = dconf->rss_hf;
|
||||
|
||||
port_conf.rxmode.offloads = dconf->rx_offloads;
|
||||
port_conf.txmode.offloads = dconf->tx_offloads;
|
||||
|
||||
/* Configure the Ethernet device. */
|
||||
ret = rte_eth_dev_configure(dconf->portid, num_threads, num_threads, &port_conf);
|
||||
if (ret != 0)
|
||||
rte_exit(EXIT_FAILURE, "failed to configure port: %d\n", ret);
|
||||
|
||||
ret = rte_eth_dev_adjust_nb_rx_tx_desc(dconf->portid, &dconf->rx_ring_sz, &dconf->tx_ring_sz);
|
||||
if (ret != 0)
|
||||
rte_exit(EXIT_FAILURE, "failed to set rx tx queue size: %d\n", ret);
|
||||
|
||||
/* Allocate and set up 1 RX queue per thread per Ethernet port. */
|
||||
rxconf = dev_info.default_rxconf;
|
||||
rxconf.offloads = port_conf.rxmode.offloads;
|
||||
rxconf.rx_nseg = 0;
|
||||
rxconf.rx_seg = nullptr;
|
||||
txconf = dev_info.default_txconf;
|
||||
txconf.offloads = port_conf.txmode.offloads;
|
||||
|
||||
int core;
|
||||
int qid = 0;
|
||||
CPU_FOREACH_ISSET(core, &dconf->core_affinity) {
|
||||
int socket = rte_lcore_to_socket_id(core);
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "port_init: setting up rx & tx queue for core %d (socket %d)...\n", core, socket);
|
||||
ret = rte_eth_rx_queue_setup(dconf->portid, qid, dconf->rx_ring_sz, socket, &rxconf, mempool_get(socket));
|
||||
if (ret < 0)
|
||||
rte_exit(EXIT_FAILURE, "failed to setup rx queue for core %d: %d\n", core, ret);
|
||||
|
||||
ret = rte_eth_tx_queue_setup(dconf->portid, qid, dconf->tx_ring_sz, socket, &txconf);
|
||||
if (ret < 0)
|
||||
rte_exit(EXIT_FAILURE, "failed to setup tx queue for core %d: %d", core, ret);
|
||||
|
||||
qid++;
|
||||
}
|
||||
|
||||
// set mtu
|
||||
ret = rte_eth_dev_set_mtu(dconf->portid, dconf->mtu);
|
||||
if (ret != 0)
|
||||
rte_exit(EXIT_FAILURE, "failed to set mtu: %d\n", ret);
|
||||
|
||||
ret = rte_eth_dev_start(dconf->portid);
|
||||
if (ret < 0)
|
||||
rte_exit(EXIT_FAILURE, "failed to start port: %d\n", ret);
|
||||
|
||||
if (dconf->timesync) {
|
||||
ret = rte_eth_timesync_enable(dconf->portid);
|
||||
if (ret != 0)
|
||||
rte_exit(EXIT_FAILURE, "failed to enable timesync: %d\n", ret);
|
||||
}
|
||||
|
||||
/* Enable RX in promiscuous mode for the Ethernet device. */
|
||||
ret = rte_eth_promiscuous_enable(dconf->portid);
|
||||
if (ret != 0)
|
||||
rte_exit(EXIT_FAILURE, "failed to enable promiscuous mode: %d\n", ret);
|
||||
|
||||
for (int i = 0; i < num_threads; i++) {
|
||||
if (dconf->tx_fn != nullptr) {
|
||||
if (rte_eth_add_tx_callback(dconf->portid, i, dconf->tx_fn, dconf->tx_user) == nullptr) {
|
||||
rte_exit(EXIT_FAILURE, "failed to attach callback to tx queue %d\n", i);
|
||||
}
|
||||
}
|
||||
|
||||
if (dconf->rx_fn != nullptr) {
|
||||
if (rte_eth_add_rx_callback(dconf->portid, i, dconf->rx_fn, dconf->rx_user) == nullptr) {
|
||||
rte_exit(EXIT_FAILURE, "failed to attach callback to rx queue %d\n", i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// sync_port_clock(portid);
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO,
|
||||
"port_init: configured port %d on socket %d with mac addr %x:%x:%x:%x:%x:%x\n",
|
||||
dconf->portid, rte_eth_dev_socket_id(dconf->portid),
|
||||
addr.addr_bytes[0],
|
||||
addr.addr_bytes[1],
|
||||
addr.addr_bytes[2],
|
||||
addr.addr_bytes[3],
|
||||
addr.addr_bytes[4],
|
||||
addr.addr_bytes[5]);
|
||||
}
|
||||
|
||||
void
|
||||
dpdk_init(struct device_conf *dconf, struct mem_conf *mconf)
|
||||
{
|
||||
if (rte_socket_count() > (int)MAX_NUMA_NODES) {
|
||||
rte_exit(EXIT_FAILURE, "too many numa nodes\n");
|
||||
}
|
||||
|
||||
// ensure 1-1 mapping
|
||||
for (int i = 0; i < (int)rte_socket_count(); i++) {
|
||||
if (rte_socket_id_by_idx(i) != i) {
|
||||
rte_exit(EXIT_FAILURE, "socket %d has id %d instead.\n", i, rte_socket_id_by_idx(i));
|
||||
}
|
||||
}
|
||||
|
||||
mempool_init(mconf);
|
||||
|
||||
port_init(dconf);
|
||||
}
|
||||
|
||||
void
|
||||
dpdk_cleanup(struct device_conf * dconf)
|
||||
{
|
||||
rte_eth_dev_stop(dconf->portid);
|
||||
rte_eth_dev_close(dconf->portid);
|
||||
|
||||
for (int i = 0; i < (int)rte_socket_count(); i++) {
|
||||
rte_mempool_free(g_mempools[i]);
|
||||
}
|
||||
}
|
@ -1,66 +0,0 @@
|
||||
#include "rte_ethdev.h"
|
||||
#include "net/netsup.hh"
|
||||
#include <cstdlib>
|
||||
|
||||
static struct port_conf port_confs[] = {
|
||||
{
|
||||
.driver_name = "net_cxgbe",
|
||||
.rxoffload = RTE_ETH_RX_OFFLOAD_RSS_HASH | RTE_ETH_RX_OFFLOAD_UDP_CKSUM | RTE_ETH_RX_OFFLOAD_IPV4_CKSUM,
|
||||
.txoffload = RTE_ETH_TX_OFFLOAD_UDP_CKSUM | RTE_ETH_TX_OFFLOAD_IPV4_CKSUM,
|
||||
.rss_hf = RTE_ETH_RSS_UDP | RTE_ETH_RSS_FRAG_IPV4,
|
||||
.timesync = false
|
||||
},
|
||||
{
|
||||
.driver_name = "net_i40e",
|
||||
.rxoffload = RTE_ETH_RX_OFFLOAD_RSS_HASH | RTE_ETH_RX_OFFLOAD_UDP_CKSUM | RTE_ETH_RX_OFFLOAD_IPV4_CKSUM,
|
||||
.txoffload = RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE | RTE_ETH_TX_OFFLOAD_UDP_CKSUM | RTE_ETH_TX_OFFLOAD_IPV4_CKSUM,
|
||||
.rss_hf = RTE_ETH_RSS_FRAG_IPV4 | RTE_ETH_RSS_NONFRAG_IPV4_UDP | RTE_ETH_RSS_NONFRAG_IPV4_OTHER | RTE_ETH_RSS_L2_PAYLOAD,
|
||||
.timesync = false
|
||||
},
|
||||
{
|
||||
.driver_name = "net_ice",
|
||||
.rxoffload = RTE_ETH_RX_OFFLOAD_RSS_HASH | RTE_ETH_RX_OFFLOAD_UDP_CKSUM | RTE_ETH_RX_OFFLOAD_IPV4_CKSUM | RTE_ETH_RX_OFFLOAD_TIMESTAMP,
|
||||
.txoffload = RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE | RTE_ETH_TX_OFFLOAD_UDP_CKSUM | RTE_ETH_TX_OFFLOAD_IPV4_CKSUM,
|
||||
.rss_hf = RTE_ETH_RSS_FRAG_IPV4 | RTE_ETH_RSS_NONFRAG_IPV4_UDP | RTE_ETH_RSS_NONFRAG_IPV4_OTHER | RTE_ETH_RSS_L2_PAYLOAD,
|
||||
.timesync = false
|
||||
},
|
||||
{
|
||||
.driver_name = "net_ixgbe",
|
||||
.rxoffload = RTE_ETH_RX_OFFLOAD_RSS_HASH | RTE_ETH_RX_OFFLOAD_UDP_CKSUM | RTE_ETH_RX_OFFLOAD_IPV4_CKSUM,
|
||||
.txoffload = RTE_ETH_TX_OFFLOAD_UDP_CKSUM | RTE_ETH_TX_OFFLOAD_IPV4_CKSUM,
|
||||
.rss_hf = RTE_ETH_RSS_IPV4 | RTE_ETH_RSS_NONFRAG_IPV4_UDP,
|
||||
.timesync = true
|
||||
}
|
||||
};
|
||||
|
||||
static struct port_conf default_conf = {
|
||||
.driver_name = "default",
|
||||
.rxoffload = RTE_ETH_RX_OFFLOAD_RSS_HASH | RTE_ETH_RX_OFFLOAD_UDP_CKSUM | RTE_ETH_RX_OFFLOAD_IPV4_CKSUM | RTE_ETH_RX_OFFLOAD_TIMESTAMP,
|
||||
.txoffload = RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE | RTE_ETH_TX_OFFLOAD_UDP_CKSUM | RTE_ETH_TX_OFFLOAD_IPV4_CKSUM,
|
||||
.rss_hf = RTE_ETH_RSS_FRAG_IPV4 | RTE_ETH_RSS_NONFRAG_IPV4_UDP | RTE_ETH_RSS_NONFRAG_IPV4_OTHER | RTE_ETH_RSS_L2_PAYLOAD,
|
||||
.timesync = true
|
||||
};
|
||||
|
||||
static const int port_size = sizeof(port_confs) / sizeof(port_confs[0]);
|
||||
|
||||
int
|
||||
portconf_get(int portid, struct port_conf * out)
|
||||
{
|
||||
struct rte_eth_dev_info dev_info {};
|
||||
|
||||
if (rte_eth_dev_info_get(portid, &dev_info) != 0) {
|
||||
rte_exit(EXIT_FAILURE, "failed to obtain device info for port %d\n", portid);
|
||||
}
|
||||
|
||||
for(int i = 0; i < port_size; i++) {
|
||||
struct port_conf * conf = &port_confs[i];
|
||||
if (strcmp(conf->driver_name, dev_info.driver_name) == 0) {
|
||||
memcpy(out, conf, sizeof(struct port_conf));
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
fprintf(stdout, "portconf_get: unable to find matching conf for port %d:%s, returning default conf.\n", portid, dev_info.driver_name);
|
||||
memcpy(out, &default_conf, sizeof(struct port_conf));
|
||||
return -1;
|
||||
}
|
909
net/rat.cc
909
net/rat.cc
@ -1,909 +0,0 @@
|
||||
#include <atomic>
|
||||
#include <cstddef>
|
||||
#include <cstdlib>
|
||||
#include <list>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
#include <random>
|
||||
#include <vector>
|
||||
|
||||
#include <sys/endian.h>
|
||||
|
||||
#include <topo.h>
|
||||
#include <rte_byteorder.h>
|
||||
#include <rte_common.h>
|
||||
#include <rte_config.h>
|
||||
#include <rte_eal.h>
|
||||
#include <rte_ethdev.h>
|
||||
#include <rte_ether.h>
|
||||
#include <rte_launch.h>
|
||||
#include <rte_lcore.h>
|
||||
#include <rte_mbuf.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "ntr.h"
|
||||
|
||||
#include "gen.hh"
|
||||
#include "net/netsup.hh"
|
||||
#include "net/pkt.hh"
|
||||
#include "nms.h"
|
||||
|
||||
constexpr static unsigned int BURST_SIZE = 32;
|
||||
|
||||
static unsigned int
|
||||
epoch_mk(unsigned int id, unsigned int epoch)
|
||||
{
|
||||
return (id << 24) | epoch;
|
||||
}
|
||||
|
||||
static unsigned int
|
||||
epoch_get_id(unsigned int epoch)
|
||||
{
|
||||
return epoch >> 24;
|
||||
}
|
||||
|
||||
static unsigned int
|
||||
epoch_get_epoch(unsigned int epoch)
|
||||
{
|
||||
return epoch & 0x00FFFFFF;
|
||||
}
|
||||
|
||||
struct epoch_info {
|
||||
unsigned int epoch;
|
||||
uint64_t ts;
|
||||
};
|
||||
|
||||
struct thread_info {
|
||||
unsigned int id { 0 };
|
||||
unsigned int lcore_id { 0 };
|
||||
unsigned int rxqid { 0 };
|
||||
unsigned int txqid { 0 };
|
||||
int socket_id;
|
||||
// this field is read by the stat collecting thread
|
||||
std::atomic<int> recved_pkts { 0 };
|
||||
std::atomic<int> lost_pkts { 0 };
|
||||
|
||||
Generator *ia_gen { nullptr };
|
||||
Generator *load_gen0 { nullptr };
|
||||
Generator *load_gen1 { nullptr };
|
||||
|
||||
std::mutex
|
||||
mtx; // this lock protects data shared between worker threads, i.e.:
|
||||
std::list<struct epoch_info *> recved_epochs;
|
||||
|
||||
thread_info() = default;
|
||||
};
|
||||
|
||||
constexpr static int STATE_SYNC = 0; // waiting for SYNC
|
||||
constexpr static int STATE_SYNC_ACK = 1; // Waiting for sending SYNC_ACK
|
||||
constexpr static int STATE_RUNNING = 2; // Running
|
||||
constexpr static int STATE_FIN = 3; // FIN received
|
||||
|
||||
constexpr static int WORKLOAD_MAX_ARGS = 2;
|
||||
|
||||
struct options_t {
|
||||
unsigned int run_time { 5 };
|
||||
// parameters
|
||||
int slave_mode { 0 };
|
||||
uint32_t rage_quit_time { UINT32_MAX };
|
||||
char ia_gen[256] { "fixed:0" };
|
||||
char load_gen[WORKLOAD_MAX_ARGS][256] = {{"fixed:0"}, {"fixed:0"}};
|
||||
uint32_t workload_type {LOAD_TYPE_CPU};
|
||||
uint32_t target_qps { 0 };
|
||||
uint32_t depth { 1 };
|
||||
struct net_spec server_spec { };
|
||||
cpuset_t cpu_set = CPUSET_T_INITIALIZER(0x2); // 1 thread @ core 2
|
||||
uint32_t pkt_loss_delay_ms { UINT32_MAX };
|
||||
bool jumbo_frame_enabled { false };
|
||||
int pkt_pad_sz { 0 };
|
||||
int port_mtu { MAX_STANDARD_MTU };
|
||||
int portid { 0 };
|
||||
|
||||
// states
|
||||
unsigned int s_num_threads { 1 }; // 1 thread
|
||||
struct net_spec s_host_spec { };
|
||||
struct net_spec s_master_spec { };
|
||||
struct conn_spec s_master_cspec {
|
||||
.src = &s_host_spec, .src_port = DEFAULT_RAT_PORT,
|
||||
.dst = &s_master_spec, .dst_port = DEFAULT_RAT_PORT,
|
||||
};
|
||||
std::vector<struct thread_info *> s_thr_info;
|
||||
std::atomic<int> s_state { STATE_RUNNING }; // default non master mode
|
||||
|
||||
// states for qps
|
||||
std::atomic<uint64_t> s_ts_begin { 0 };
|
||||
};
|
||||
|
||||
static struct options_t options;
|
||||
|
||||
static inline void
|
||||
calc_stats(uint64_t now, uint32_t *qps, uint32_t *recved_pkt,
|
||||
uint32_t *total_loss)
|
||||
{
|
||||
uint32_t recv = 0;
|
||||
uint32_t loss = 0;
|
||||
|
||||
for (auto i : options.s_thr_info) {
|
||||
recv += i->recved_pkts.load();
|
||||
loss += i->lost_pkts.load();
|
||||
}
|
||||
|
||||
if (recved_pkt != nullptr) {
|
||||
*recved_pkt = recv;
|
||||
}
|
||||
|
||||
if (total_loss != nullptr) {
|
||||
*total_loss = loss;
|
||||
}
|
||||
|
||||
if (qps != nullptr) {
|
||||
*qps = (uint32_t)((double)(recv) /
|
||||
((double)(now - options.s_ts_begin.load()) / (double)S2NS));
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
proto_loop(struct thread_info *tinfo)
|
||||
{
|
||||
struct rte_mbuf *tx_buf;
|
||||
struct rte_mbuf *rx_bufs[BURST_SIZE];
|
||||
struct pkt_hdr *pkt_data;
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO,
|
||||
"proto_loop <thread %d>: waiting for SYNC from cat\n", tinfo->id);
|
||||
while (options.s_state.load() == STATE_SYNC) {
|
||||
const uint16_t nb_rx = rte_eth_rx_burst(options.portid,
|
||||
tinfo->rxqid, rx_bufs, BURST_SIZE);
|
||||
if (nb_rx > 0) {
|
||||
for (int i = 0; i < nb_rx; i++) {
|
||||
struct pkt_hdr *each = check_valid_packet(
|
||||
rx_bufs[i], &options.s_host_spec.mac_addr);
|
||||
|
||||
if (each != nullptr) {
|
||||
uint16_t type = rte_be_to_cpu_16(
|
||||
each->type);
|
||||
if (type == PKT_TYPE_SYNC) {
|
||||
int expected = STATE_SYNC;
|
||||
|
||||
ntr(NTR_DEP_USER1,
|
||||
NTR_LEVEL_INFO,
|
||||
"proto_loop <thread %d>: received SYNC from cat\n",
|
||||
tinfo->id);
|
||||
|
||||
if (!options.s_state
|
||||
.compare_exchange_strong(
|
||||
expected,
|
||||
STATE_SYNC_ACK)) {
|
||||
// someone barged in,
|
||||
// listen to that guy
|
||||
ntr(NTR_DEP_USER1,
|
||||
NTR_LEVEL_WARNING,
|
||||
"proto_loop <thread %d>: failed to cmpxchg sync_recv.\n",
|
||||
tinfo->id);
|
||||
} else {
|
||||
pkt_hdr_to_netspec(each,
|
||||
&options
|
||||
.s_master_spec,
|
||||
nullptr, nullptr,
|
||||
nullptr);
|
||||
|
||||
if (alloc_pkt_hdr(
|
||||
mempool_get(
|
||||
tinfo
|
||||
->socket_id),
|
||||
PKT_TYPE_SYNC_ACK,
|
||||
&options
|
||||
.s_master_cspec,
|
||||
0, &tx_buf,
|
||||
&pkt_data) !=
|
||||
0) {
|
||||
rte_exit(
|
||||
EXIT_FAILURE,
|
||||
"failed to alloc pkt hdr\n");
|
||||
}
|
||||
|
||||
tx_burst_all(
|
||||
options.portid,
|
||||
tinfo->txqid,
|
||||
&tx_buf, 1);
|
||||
|
||||
expected =
|
||||
STATE_SYNC_ACK;
|
||||
// we've done our job,
|
||||
// set off the threads
|
||||
if (!options.s_state
|
||||
.compare_exchange_strong(
|
||||
expected,
|
||||
STATE_RUNNING)) {
|
||||
rte_exit(
|
||||
EXIT_FAILURE,
|
||||
"state unexpectedly changed\n");
|
||||
}
|
||||
|
||||
ntr(NTR_DEP_USER1,
|
||||
NTR_LEVEL_INFO,
|
||||
"proto_loop <thread %d>: sent SYNC_ACK to cat\n",
|
||||
tinfo->id);
|
||||
}
|
||||
} else {
|
||||
ntr(NTR_DEP_USER1,
|
||||
NTR_LEVEL_DEBUG,
|
||||
"proto_loop <thread %d>: ignoring invalid packet %p type %d.\n",
|
||||
tinfo->id,
|
||||
(void *)rx_bufs[i], type);
|
||||
}
|
||||
} else {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
|
||||
"proto_loop <thread %d>: ignoring invalid packet %p.\n",
|
||||
tinfo->id, (void *)rx_bufs[i]);
|
||||
//dump_pkt(rx_bufs[i]);
|
||||
}
|
||||
|
||||
rte_pktmbuf_free(rx_bufs[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
|
||||
"proto_loop <thread %d>: exiting loop...\n", tinfo->id);
|
||||
}
|
||||
|
||||
static void
|
||||
pkt_loop(struct thread_info *tinfo)
|
||||
{
|
||||
struct rte_mbuf *tx_bufs[BURST_SIZE];
|
||||
struct rte_mbuf *rx_bufs[BURST_SIZE];
|
||||
std::vector<struct epoch_info *> recved_epochs;
|
||||
std::map<unsigned int, struct epoch_info *> sent_epochs;
|
||||
uint64_t cur_epoch = 0;
|
||||
uint64_t next_ts;
|
||||
uint64_t last_recv_ts = 0;
|
||||
struct conn_spec srv_cspec;
|
||||
rdport_generator src_port_gen(MIN_RANDOM_PORT);
|
||||
rdport_generator dst_port_gen(MIN_RANDOM_PORT);
|
||||
|
||||
srv_cspec.src = &options.s_host_spec;
|
||||
srv_cspec.dst = &options.server_spec;
|
||||
|
||||
next_ts = topo_uptime_ns();
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "pkt_loop <thread %d>: entering\n",
|
||||
tinfo->id);
|
||||
|
||||
while (options.s_state.load() == STATE_RUNNING) {
|
||||
uint64_t now = topo_uptime_ns();
|
||||
// always pop incoming packets
|
||||
const uint16_t nb_rx = rte_eth_rx_burst(options.portid,
|
||||
tinfo->rxqid, rx_bufs, BURST_SIZE);
|
||||
|
||||
if (nb_rx > 0) {
|
||||
for (int i = 0; i < nb_rx; i++) {
|
||||
struct pkt_hdr *each = check_valid_packet(
|
||||
rx_bufs[i], &options.s_host_spec.mac_addr);
|
||||
|
||||
if (each == nullptr) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
|
||||
"pkt_loop <thread %d>: ignoring invalid packet %p.\n",
|
||||
tinfo->id, (void *)rx_bufs[i]);
|
||||
rte_pktmbuf_free(rx_bufs[i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
uint16_t type = rte_be_to_cpu_16(each->type);
|
||||
NTR_PKT(NTR_DEP_USER1, NTR_LEVEL_DEBUG, each,
|
||||
"locore_main <thread %d>: ", tinfo->id);
|
||||
struct pkt_payload_epoch *pld_epoch;
|
||||
struct epoch_info *einfo;
|
||||
uint32_t epoch;
|
||||
uint32_t id;
|
||||
struct thread_info *other_t;
|
||||
int int_expected = STATE_RUNNING;
|
||||
switch (type) {
|
||||
case PKT_TYPE_LOAD_RESP:
|
||||
pld_epoch = (struct pkt_payload_epoch *)
|
||||
each->payload;
|
||||
epoch = rte_be_to_cpu_32(
|
||||
pld_epoch->epoch);
|
||||
id = epoch_get_id(epoch);
|
||||
|
||||
// printf("Load resp size : %d\n",
|
||||
// rx_bufs[i]->data_len);
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
|
||||
"pkt_loop <thread %d>: packet %p epoch 0x%x id %d.\n",
|
||||
tinfo->id, (void *)rx_bufs[i],
|
||||
epoch, id);
|
||||
|
||||
if (id >= options.s_num_threads) {
|
||||
ntr(NTR_DEP_USER1,
|
||||
NTR_LEVEL_WARNING,
|
||||
"pkt_loop <thread %d>: packet %p invalid id %d.\n",
|
||||
tinfo->id,
|
||||
(void *)rx_bufs[i], id);
|
||||
break;
|
||||
}
|
||||
|
||||
einfo = new struct epoch_info;
|
||||
einfo->epoch = epoch;
|
||||
einfo->ts = now;
|
||||
|
||||
other_t = options.s_thr_info.at(id);
|
||||
other_t->mtx.lock();
|
||||
other_t->recved_epochs.push_back(einfo);
|
||||
other_t->mtx.unlock();
|
||||
|
||||
break;
|
||||
case PKT_TYPE_FIN:
|
||||
if (rte_is_same_ether_addr(
|
||||
&each->eth_hdr.src_addr,
|
||||
&options.s_master_spec
|
||||
.mac_addr)) {
|
||||
ntr(NTR_DEP_USER1,
|
||||
NTR_LEVEL_DEBUG,
|
||||
"pkt_loop <thread %d>: recved FIN from cat.\n",
|
||||
tinfo->id);
|
||||
// master told us to stop!
|
||||
if (!options.s_state
|
||||
.compare_exchange_strong(
|
||||
int_expected,
|
||||
STATE_FIN)) {
|
||||
ntr(NTR_DEP_USER1,
|
||||
NTR_LEVEL_WARNING,
|
||||
"pkt_loop <thread %d>: failed to cmpxchg state.\n",
|
||||
tinfo->id);
|
||||
}
|
||||
|
||||
uint32_t qps;
|
||||
uint32_t total_recv;
|
||||
uint32_t total_loss;
|
||||
|
||||
calc_stats(now, &qps,
|
||||
&total_recv, &total_loss);
|
||||
|
||||
struct pkt_hdr *pkt_hdr;
|
||||
if (alloc_pkt_hdr(
|
||||
mempool_get(
|
||||
tinfo->socket_id),
|
||||
PKT_TYPE_FIN_ACK,
|
||||
&options.s_master_cspec,
|
||||
0, &tx_bufs[0],
|
||||
&pkt_hdr) != 0) {
|
||||
rte_exit(EXIT_FAILURE,
|
||||
"failed to allocate pkt hdr\n");
|
||||
}
|
||||
|
||||
auto pld_qps =
|
||||
(struct pkt_payload_qps *)
|
||||
pkt_hdr->payload;
|
||||
pld_qps->qps = rte_cpu_to_be_32(
|
||||
qps);
|
||||
pld_qps->recved_pkts =
|
||||
rte_cpu_to_be_32(
|
||||
total_recv);
|
||||
pld_qps->lost_pkts =
|
||||
rte_cpu_to_be_32(
|
||||
total_loss);
|
||||
|
||||
tx_burst_all(options.portid,
|
||||
tinfo->txqid, &tx_bufs[0],
|
||||
1);
|
||||
|
||||
options.s_state.store(
|
||||
STATE_FIN);
|
||||
|
||||
ntr(NTR_DEP_USER1,
|
||||
NTR_LEVEL_DEBUG,
|
||||
"pkt_loop <thread %d>: sent FIN_ACK to cat. QPS = %d.\n",
|
||||
tinfo->id, qps);
|
||||
} else {
|
||||
ntr(NTR_DEP_USER1,
|
||||
NTR_LEVEL_WARNING,
|
||||
"pkt_loop <thread %d>: invalid FIN packet from a different cat.\n",
|
||||
tinfo->id);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
|
||||
"pkt_loop: ignoring packet %p with unknown type %d.\n",
|
||||
(void *)rx_bufs[i], type);
|
||||
}
|
||||
|
||||
rte_pktmbuf_free(rx_bufs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// dequeue receved epochs
|
||||
struct epoch_info *einfo;
|
||||
tinfo->mtx.lock();
|
||||
while (!tinfo->recved_epochs.empty()) {
|
||||
// only dequeue, process later
|
||||
einfo = tinfo->recved_epochs.front();
|
||||
tinfo->recved_epochs.pop_front();
|
||||
|
||||
// XXX: might call into the allocator
|
||||
// otherwise we need to have an array and do batching
|
||||
// => complex code and don't think it's worth it
|
||||
recved_epochs.push_back(einfo);
|
||||
}
|
||||
tinfo->mtx.unlock();
|
||||
|
||||
if (!recved_epochs.empty())
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
|
||||
"pkt_loop <thread %d>: dequeued %lu received epochs\n",
|
||||
tinfo->id, recved_epochs.size());
|
||||
|
||||
// process epochs
|
||||
while (!recved_epochs.empty()) {
|
||||
einfo = recved_epochs.back();
|
||||
recved_epochs.pop_back();
|
||||
|
||||
auto it = sent_epochs.find(einfo->epoch);
|
||||
if (it != sent_epochs.end()) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
|
||||
"pkt_loop <thread %d>: received epoch 0x%x\n",
|
||||
tinfo->id, epoch_get_epoch(einfo->epoch));
|
||||
|
||||
if (einfo->ts > last_recv_ts) {
|
||||
last_recv_ts = einfo->ts;
|
||||
}
|
||||
delete it->second;
|
||||
sent_epochs.erase(it);
|
||||
tinfo->recved_pkts.fetch_add(1);
|
||||
} else {
|
||||
// we recved an epoch we never sent
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
|
||||
"pkt_loop <thread %d>: received epoch 0x%x but never sent it. Packet loss?\n",
|
||||
tinfo->id, einfo->epoch);
|
||||
}
|
||||
delete einfo;
|
||||
}
|
||||
|
||||
// handle packet loss
|
||||
for (auto it = sent_epochs.begin(); it != sent_epochs.end();) {
|
||||
einfo = it->second;
|
||||
if (now - einfo->ts >
|
||||
options.pkt_loss_delay_ms * MS2NS) {
|
||||
// timed out
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
|
||||
"pkt_loop <thread %d>: epoch 0x%x is lost after not receiving for too long\n",
|
||||
tinfo->id, einfo->epoch);
|
||||
|
||||
delete it->second;
|
||||
it = sent_epochs.erase(it);
|
||||
tinfo->lost_pkts.fetch_add(1);
|
||||
} else {
|
||||
++it;
|
||||
}
|
||||
}
|
||||
|
||||
// check to send the next packet
|
||||
uint32_t total_send = 0;
|
||||
while (now >= next_ts && sent_epochs.size() < options.depth &&
|
||||
total_send < BURST_SIZE) {
|
||||
struct pkt_payload_load *pld_load;
|
||||
struct pkt_hdr *pkt_data;
|
||||
next_ts += (int)(tinfo->ia_gen->generate() * S2NS);
|
||||
|
||||
// change dst port for every packet for RSS
|
||||
srv_cspec.dst_port = dst_port_gen.next();
|
||||
srv_cspec.src_port = src_port_gen.next();
|
||||
if (alloc_pkt_hdr(mempool_get(tinfo->socket_id),
|
||||
PKT_TYPE_LOAD, &srv_cspec, options.pkt_pad_sz,
|
||||
&tx_bufs[total_send], &pkt_data) != 0) {
|
||||
rte_exit(EXIT_FAILURE,
|
||||
"failed to allocate pkt hdr\n");
|
||||
}
|
||||
|
||||
pld_load = (struct pkt_payload_load *)pkt_data->payload;
|
||||
pld_load->type = rte_cpu_to_be_32(options.workload_type);
|
||||
pld_load->arg0 = rte_cpu_to_be_32((uint32_t)tinfo->load_gen0->generate());
|
||||
pld_load->arg1 = rte_cpu_to_be_32((uint32_t)tinfo->load_gen1->generate());
|
||||
unsigned int epoch = epoch_mk(tinfo->id, cur_epoch);
|
||||
pld_load->epoch = rte_cpu_to_be_32(epoch);
|
||||
cur_epoch++;
|
||||
|
||||
einfo = new struct epoch_info;
|
||||
einfo->epoch = epoch;
|
||||
einfo->ts = now;
|
||||
sent_epochs.insert({ epoch, einfo });
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
|
||||
"pkt_loop <thread %d>: sending packet %p with epoch 0x%x\n",
|
||||
tinfo->id, (void *)tx_bufs[total_send], epoch);
|
||||
|
||||
total_send++;
|
||||
}
|
||||
|
||||
tx_burst_all(options.portid, tinfo->txqid, tx_bufs, total_send);
|
||||
|
||||
// check rage quit only when we have sent a packet
|
||||
if (last_recv_ts == 0) {
|
||||
last_recv_ts = topo_uptime_ns();
|
||||
}
|
||||
if (topo_uptime_ns() >
|
||||
options.rage_quit_time * MS2NS + last_recv_ts) {
|
||||
rte_exit(EXIT_FAILURE,
|
||||
"rat: thread %d waiting too long for resp. I F QUIT!\n",
|
||||
tinfo->id);
|
||||
}
|
||||
}
|
||||
|
||||
// clean up
|
||||
for (auto it = sent_epochs.begin(); it != sent_epochs.end();) {
|
||||
delete it->second;
|
||||
++it;
|
||||
}
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
|
||||
"pkt_loop <thread %d>: exiting loop...\n", tinfo->id);
|
||||
}
|
||||
|
||||
static int
|
||||
locore_main(void *tif)
|
||||
{
|
||||
auto tinfo = (struct thread_info *)tif;
|
||||
uint32_t core_id = rte_lcore_id();
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO,
|
||||
"locore_main <thread %d>: running on core %d rxqid %d txqid %d...\n", tinfo->id,
|
||||
core_id, tinfo->rxqid, tinfo->txqid);
|
||||
|
||||
if (rte_eth_dev_socket_id(options.portid) > 0 &&
|
||||
rte_eth_dev_socket_id(options.portid) != (int)rte_socket_id()) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING,
|
||||
"locore_main <thread %d>: WARNING, port %d is on remote NUMA node to "
|
||||
"polling thread.\n\tPerformance will "
|
||||
"not be optimal.\n",
|
||||
tinfo->id, options.portid);
|
||||
}
|
||||
|
||||
if (options.slave_mode == 1) {
|
||||
// perform rat protocol
|
||||
proto_loop(tinfo);
|
||||
}
|
||||
|
||||
// wait for the primary thread sending SYNC_ACK
|
||||
while (options.s_state.load() != STATE_RUNNING) {
|
||||
}
|
||||
// store the current timestamp
|
||||
options.s_ts_begin.store(topo_uptime_ns());
|
||||
pkt_loop(tinfo);
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "locore_main <thread %d>: exited\n",
|
||||
tinfo->id);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
dump_options()
|
||||
{
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO,
|
||||
"Configuration:\n"
|
||||
" verbosity = +%d\n"
|
||||
" run time = %d\n"
|
||||
" num threads = %d\n"
|
||||
" rage quit time = %ul\n"
|
||||
" slave mode = %d\n"
|
||||
" interarrival dist = %s\n"
|
||||
" workload type = %d\n"
|
||||
" workload arg0 = %s\n"
|
||||
" workload arg1 = %s\n"
|
||||
" qps = %d\n"
|
||||
" host IP = 0x%x\n"
|
||||
" depth = %u\n"
|
||||
" packet loss time threshold = %u\n"
|
||||
" jumbo frame = %d\n"
|
||||
" packet pad size = %d\n"
|
||||
" portid = %d\n",
|
||||
ntr_get_level(NTR_DEP_USER1) - NTR_LEVEL_WARNING, options.run_time,
|
||||
options.s_num_threads, options.rage_quit_time, options.slave_mode,
|
||||
options.ia_gen, options.workload_type, options.load_gen[0], options.load_gen[1], options.target_qps,
|
||||
options.s_host_spec.ip, options.depth, options.pkt_loss_delay_ms,
|
||||
options.jumbo_frame_enabled, options.pkt_pad_sz, options.portid);
|
||||
}
|
||||
|
||||
static void
|
||||
usage()
|
||||
{
|
||||
fprintf(stdout,
|
||||
"Usage:\n"
|
||||
" -v(vv): verbose mode\n"
|
||||
" -h: display the information\n"
|
||||
" -t: run time\n"
|
||||
" -s: server net spec\n"
|
||||
" -S: slave(rat) mode\n"
|
||||
" -A: affinity mask\n"
|
||||
" -i: inter-arrival time distribution\n"
|
||||
" -w: workload type\n"
|
||||
" -w (repeated): workload arg0 distribution\n"
|
||||
" -w (repeated): workload arg1 distribution\n"
|
||||
" -r: rage quit time (in ms)\n"
|
||||
" -q: target QPS\n"
|
||||
" -H: host net spec\n"
|
||||
" -D: max number of packets in flight\n"
|
||||
" -l: packet loss time threshold\n"
|
||||
" -J: enable jumbo frame\n"
|
||||
" -P: pad load packets to this size\n"
|
||||
" -p: portid\n");
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
struct thread_info *tinfo;
|
||||
bool has_host_spec = false;
|
||||
|
||||
ntr_init();
|
||||
|
||||
// init dpdk
|
||||
int ret = rte_eal_init(argc, argv);
|
||||
if (ret < 0) {
|
||||
rte_exit(EXIT_FAILURE, "rte_eal_init failed!\n");
|
||||
}
|
||||
|
||||
argc -= ret;
|
||||
argv += ret;
|
||||
|
||||
// set warning level
|
||||
ntr_set_level(NTR_DEP_USER1, NTR_LEVEL_WARNING);
|
||||
{
|
||||
int c;
|
||||
int num_of_ws = 0;
|
||||
// parse arguments
|
||||
while ((c = getopt(argc, argv,
|
||||
"vht:s:SA:i:w:r:q:H:D:l:JP:p:")) != -1) {
|
||||
switch (c) {
|
||||
case 'v':
|
||||
ntr_set_level(NTR_DEP_USER1,
|
||||
ntr_get_level(NTR_DEP_USER1) + 1);
|
||||
break;
|
||||
case 'h':
|
||||
usage();
|
||||
rte_exit(EXIT_SUCCESS, "\n");
|
||||
case 't':
|
||||
options.run_time = strtol(optarg, nullptr, 10);
|
||||
break;
|
||||
case 's':
|
||||
if (str_to_netspec(optarg,
|
||||
&options.server_spec) != 0) {
|
||||
rte_exit(EXIT_FAILURE,
|
||||
"invalid server net spec\n");
|
||||
}
|
||||
break;
|
||||
case 'S':
|
||||
options.slave_mode = 1;
|
||||
options.s_state =
|
||||
STATE_SYNC; // set state to wait for SYNC
|
||||
break;
|
||||
case 'A':
|
||||
cpulist_to_cpuset(optarg, &options.cpu_set);
|
||||
options.s_num_threads = CPU_COUNT(
|
||||
&options.cpu_set);
|
||||
if (options.s_num_threads == 0) {
|
||||
rte_exit(EXIT_FAILURE,
|
||||
"invalid cpu mask %s\n", optarg);
|
||||
}
|
||||
break;
|
||||
case 'i':
|
||||
strncpy(options.ia_gen, optarg,
|
||||
sizeof(options.ia_gen) - 1);
|
||||
break;
|
||||
case 'w':
|
||||
if (num_of_ws == 0) {
|
||||
options.workload_type = strtol(optarg, NULL, 10);
|
||||
if (options.workload_type >= LOAD_TYPE_MAX) {
|
||||
rte_exit(EXIT_FAILURE,
|
||||
"invalid workload type %s\n", optarg);
|
||||
}
|
||||
} else if (num_of_ws <= WORKLOAD_MAX_ARGS) {
|
||||
strncpy(options.load_gen[num_of_ws - 1], optarg, 255);
|
||||
}
|
||||
|
||||
num_of_ws++;
|
||||
break;
|
||||
case 'r':
|
||||
options.rage_quit_time = strtol(optarg, nullptr,
|
||||
10);
|
||||
break;
|
||||
case 'q':
|
||||
options.target_qps = strtol(optarg, nullptr,
|
||||
10);
|
||||
break;
|
||||
case 'H':
|
||||
has_host_spec = true;
|
||||
if (str_to_netspec(optarg,
|
||||
&options.s_host_spec) != 0) {
|
||||
rte_exit(EXIT_FAILURE,
|
||||
"invalid host net spec.\n");
|
||||
}
|
||||
break;
|
||||
case 'D':
|
||||
options.depth = strtol(optarg, nullptr, 10);
|
||||
if (options.depth == 0) {
|
||||
options.depth = UINT32_MAX;
|
||||
}
|
||||
break;
|
||||
case 'l':
|
||||
options.pkt_loss_delay_ms = strtol(optarg,
|
||||
nullptr, 10);
|
||||
if (options.pkt_loss_delay_ms == 0) {
|
||||
options.pkt_loss_delay_ms = UINT32_MAX;
|
||||
}
|
||||
break;
|
||||
case 'J':
|
||||
options.jumbo_frame_enabled = true;
|
||||
options.port_mtu = MAX_JUMBO_MTU;
|
||||
break;
|
||||
case 'P':
|
||||
options.pkt_pad_sz = strtol(optarg, nullptr,
|
||||
10);
|
||||
break;
|
||||
case 'p':
|
||||
options.portid = strtol(optarg, nullptr, 10);
|
||||
break;
|
||||
default:
|
||||
usage();
|
||||
rte_exit(EXIT_FAILURE, "unknown argument: %c\n",
|
||||
c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (options.pkt_pad_sz != 0 &&
|
||||
options.pkt_pad_sz > mtu_to_pkt_size(options.port_mtu)) {
|
||||
rte_exit(EXIT_FAILURE, "pkt_pad_sz is too large for mtu %d\n",
|
||||
options.port_mtu);
|
||||
}
|
||||
|
||||
if (!has_host_spec) {
|
||||
rte_exit(EXIT_FAILURE, "Must specify host IP.\n");
|
||||
}
|
||||
|
||||
// init libtopo
|
||||
if (topo_init(ntr_get_level(NTR_DEP_USER1) - NTR_LEVEL_WARNING) !=
|
||||
0) {
|
||||
rte_exit(EXIT_FAILURE, "libtopo init failed!\n");
|
||||
}
|
||||
|
||||
if (nms_init(ntr_get_level(NTR_DEP_USER1) - NTR_LEVEL_WARNING) !=
|
||||
0) {
|
||||
rte_exit(EXIT_FAILURE, "libnms init failed!\n");
|
||||
}
|
||||
|
||||
dump_options();
|
||||
|
||||
// configure memory and port
|
||||
struct port_conf pconf;
|
||||
struct device_conf dconf;
|
||||
struct mem_conf mconf;
|
||||
portconf_get(options.portid, &pconf);
|
||||
if (!pconf.timesync) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING,
|
||||
"main: timesync disabled. hw timestamp unavailable.\n ");
|
||||
}
|
||||
dconf.mtu = options.port_mtu;
|
||||
CPU_COPY(&options.cpu_set, &dconf.core_affinity);
|
||||
dconf.portid = options.portid;
|
||||
dconf.rss_hf = pconf.rss_hf;
|
||||
dconf.rx_offloads = pconf.rxoffload;
|
||||
dconf.tx_offloads = pconf.txoffload;
|
||||
dconf.timesync = pconf.timesync;
|
||||
|
||||
dconf.rx_fn = nullptr;
|
||||
dconf.rx_user = nullptr;
|
||||
dconf.rx_ring_sz = 2048;
|
||||
dconf.tx_fn = nullptr;
|
||||
dconf.tx_user = nullptr;
|
||||
dconf.tx_ring_sz = 2048;
|
||||
|
||||
mconf.cache_size = 512;
|
||||
mconf.priv_size = 0;
|
||||
mconf.num_elements = (dconf.rx_ring_sz + dconf.tx_ring_sz) *
|
||||
rte_lcore_count() / rte_socket_count();
|
||||
mconf.data_room_size = RTE_MBUF_DEFAULT_BUF_SIZE + MAX_JUMBO_MTU -
|
||||
MAX_STANDARD_MTU;
|
||||
mconf.max_pools = -1;
|
||||
|
||||
dpdk_init(&dconf, &mconf);
|
||||
|
||||
if (rte_eth_macaddr_get(options.portid,
|
||||
&options.s_host_spec.mac_addr) != 0) {
|
||||
rte_exit(EXIT_FAILURE, "cannot get mac address of port %d\n",
|
||||
options.portid);
|
||||
}
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO,
|
||||
"Configured port %d with mac addr %x:%x:%x:%x:%x:%x\n",
|
||||
options.portid, options.s_host_spec.mac_addr.addr_bytes[0],
|
||||
options.s_host_spec.mac_addr.addr_bytes[1],
|
||||
options.s_host_spec.mac_addr.addr_bytes[2],
|
||||
options.s_host_spec.mac_addr.addr_bytes[3],
|
||||
options.s_host_spec.mac_addr.addr_bytes[4],
|
||||
options.s_host_spec.mac_addr.addr_bytes[5]);
|
||||
|
||||
unsigned int cpuset_idx = CPU_FFS(&options.cpu_set);
|
||||
unsigned int tid = 0;
|
||||
while (cpuset_idx != 0) {
|
||||
unsigned int lcore_id = cpuset_idx - 1;
|
||||
tinfo = new thread_info;
|
||||
tinfo->ia_gen = createGenerator(options.ia_gen);
|
||||
tinfo->load_gen0 = createGenerator(options.load_gen[0]);
|
||||
tinfo->load_gen1 = createGenerator(options.load_gen[1]);
|
||||
if (tinfo->ia_gen == nullptr || tinfo->load_gen0 == nullptr || tinfo->load_gen1 == nullptr) {
|
||||
rte_exit(EXIT_FAILURE,
|
||||
"invalid ia_gen or ld_gen string\n");
|
||||
}
|
||||
tinfo->ia_gen->set_lambda((double)options.target_qps /
|
||||
(double)(options.s_num_threads));
|
||||
tinfo->id = tid;
|
||||
tinfo->lcore_id = lcore_id;
|
||||
tinfo->socket_id = rte_lcore_to_socket_id(lcore_id);
|
||||
tinfo->rxqid = tid;
|
||||
tinfo->txqid = tid;
|
||||
options.s_thr_info.push_back(tinfo);
|
||||
|
||||
tid++;
|
||||
CPU_CLR(lcore_id, &options.cpu_set);
|
||||
cpuset_idx = CPU_FFS(&options.cpu_set);
|
||||
}
|
||||
|
||||
sleep(INIT_DELAY);
|
||||
|
||||
for (unsigned int i = 0; i < options.s_num_threads; i++) {
|
||||
tinfo = options.s_thr_info.at(i);
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO,
|
||||
"main: launching thread %d on locore %d\n", tinfo->id,
|
||||
tinfo->lcore_id);
|
||||
if (rte_eal_remote_launch(locore_main,
|
||||
(void *)options.s_thr_info.at(i),
|
||||
tinfo->lcore_id) != 0) {
|
||||
rte_exit(EXIT_FAILURE,
|
||||
"failed to launch function on locore %d\n",
|
||||
tinfo->lcore_id);
|
||||
}
|
||||
}
|
||||
|
||||
// poor man's timer
|
||||
uint32_t second = 0;
|
||||
// this loop exit is signaled by SYNC_FIN in slave mode and by itself in
|
||||
// non slave mode
|
||||
while (options.s_state.load() != STATE_FIN) {
|
||||
if (options.slave_mode != 1) {
|
||||
if (second >= options.run_time) {
|
||||
options.s_state.store(STATE_FIN);
|
||||
break;
|
||||
}
|
||||
usleep(1 * S2US);
|
||||
second++;
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned int i = 0; i < options.s_num_threads; i++) {
|
||||
tinfo = options.s_thr_info.at(i);
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO,
|
||||
"main: waiting for locore %d...\n", tinfo->lcore_id);
|
||||
if (rte_eal_wait_lcore(tinfo->lcore_id) != 0) {
|
||||
rte_exit(EXIT_FAILURE, "failed to wait for locore %d\n",
|
||||
tinfo->lcore_id);
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t qps;
|
||||
uint32_t total_recv;
|
||||
uint32_t total_loss;
|
||||
calc_stats(topo_uptime_ns(), &qps, &total_recv, &total_loss);
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "qps = %d, recv = %d, loss = %d\n",
|
||||
qps, total_recv, total_loss);
|
||||
|
||||
for (auto each : options.s_thr_info) {
|
||||
delete each->load_gen0;
|
||||
delete each->load_gen1;
|
||||
delete each->ia_gen;
|
||||
delete each;
|
||||
}
|
||||
|
||||
// clean up
|
||||
dpdk_cleanup(&dconf);
|
||||
|
||||
return 0;
|
||||
}
|
544
rat/rat.cc
Normal file
544
rat/rat.cc
Normal file
@ -0,0 +1,544 @@
|
||||
#include <cstdio>
|
||||
#include <ctime>
|
||||
#include <netinet/in.h>
|
||||
#include <rte_config.h>
|
||||
#include <rte_common.h>
|
||||
#include <rte_eal.h>
|
||||
#include <rte_ethdev.h>
|
||||
#include <rte_cycles.h>
|
||||
#include <rte_lcore.h>
|
||||
#include <rte_mbuf.h>
|
||||
#include <rte_ether.h>
|
||||
#include <rte_launch.h>
|
||||
#include <rte_log.h>
|
||||
#include <rte_byteorder.h>
|
||||
#include <rte_ip.h>
|
||||
#include <atomic>
|
||||
#include <vector>
|
||||
#include <fstream>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "nm.h"
|
||||
#include "gen.h"
|
||||
#include "ntr.h"
|
||||
#include "pkt.h"
|
||||
#include "util.h"
|
||||
|
||||
constexpr static unsigned int MBUF_MAX_COUNT = 16384;
|
||||
constexpr static unsigned int MBUF_CACHE_SIZE = 512;
|
||||
constexpr static unsigned int RX_RING_SIZE = 4096;
|
||||
constexpr static unsigned int TX_RING_SIZE = 4096;
|
||||
constexpr static unsigned int BURST_SIZE = 32;
|
||||
|
||||
constexpr static unsigned int MODE_MASTER = 0;
|
||||
constexpr static unsigned int MODE_CLIENT = 1;
|
||||
|
||||
static const struct rte_eth_conf port_conf_default{};
|
||||
|
||||
struct datapt {
|
||||
uint32_t epoch;
|
||||
uint32_t valid;
|
||||
uint64_t clt_hw_tx;
|
||||
uint64_t clt_sw_tx;
|
||||
uint64_t clt_hw_rx;
|
||||
uint64_t clt_sw_rx;
|
||||
uint64_t srv_hw_tx;
|
||||
uint64_t srv_sw_tx;
|
||||
uint64_t srv_hw_rx;
|
||||
uint64_t srv_sw_rx;
|
||||
};
|
||||
|
||||
struct thread_info {
|
||||
unsigned int id;
|
||||
unsigned int rxqid{0};
|
||||
unsigned int txqid{0};
|
||||
std::vector<struct datapt *> data;
|
||||
struct datapt * last_datapt{nullptr};
|
||||
unsigned int tot_send{0};
|
||||
unsigned int tot_recv{0};
|
||||
Generator * ia_gen;
|
||||
};
|
||||
|
||||
struct options_t {
|
||||
unsigned int run_time{5};
|
||||
unsigned int warmup_time{0};
|
||||
unsigned int num_threads{1};
|
||||
unsigned int mode{MODE_MASTER};
|
||||
char output[256] = "output.txt";
|
||||
char ia_gen[256] = "fixed:1";
|
||||
struct rte_ether_addr server_mac;
|
||||
uint64_t cpu_mask;
|
||||
// states
|
||||
struct rte_mempool * mbuf_pool;
|
||||
struct rte_ether_addr s_host_mac;
|
||||
uint16_t s_portid;
|
||||
std::vector<struct thread_info *> s_thr_info;
|
||||
std::atomic<uint32_t> s_epoch;
|
||||
std::atomic<bool> s_stop {false};
|
||||
std::atomic<uint32_t> s_record {0};
|
||||
};
|
||||
|
||||
static struct options_t options;
|
||||
|
||||
// static struct thread_info * get_thread_info(int qid)
|
||||
// {
|
||||
// return options.s_thr_info.at(qid);
|
||||
// }
|
||||
|
||||
static int
|
||||
locore_main(void * tif)
|
||||
{
|
||||
struct thread_info * tinfo = (struct thread_info *)tif;
|
||||
struct rte_mbuf *tx_buf;
|
||||
struct rte_mbuf *rx_bufs[BURST_SIZE];
|
||||
struct pkt_hdr *pkt_data;
|
||||
uint32_t core_id = rte_lcore_id();
|
||||
int32_t ret;
|
||||
|
||||
bool read_tx = true;
|
||||
bool recv_stat = true;
|
||||
bool recv_resp = true;
|
||||
|
||||
uint64_t next_ts;
|
||||
// XXX: check link status instead
|
||||
|
||||
sleep(1);
|
||||
if (rte_eth_dev_socket_id(options.s_portid) > 0 && rte_eth_dev_socket_id(options.s_portid) != (int)rte_socket_id()) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING, "locore_main: WARNING, port %d is on remote NUMA node to "
|
||||
"polling thread.\n\tPerformance will "
|
||||
"not be optimal.\n", options.s_portid);
|
||||
}
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "locore_main: core %d running thread %d...\n", core_id, tinfo->id);
|
||||
|
||||
next_ts = get_time_us();
|
||||
|
||||
while(!options.s_stop.load()) {
|
||||
uint64_t now = get_time_us();
|
||||
// always pop incoming packets
|
||||
const uint16_t nb_rx = rte_eth_rx_burst(options.s_portid, 0, rx_bufs, BURST_SIZE);
|
||||
|
||||
if (nb_rx > 0) {
|
||||
for (int i = 0; i < nb_rx; i++) {
|
||||
struct pkt_hdr * each = check_valid_packet(rx_bufs[i]);
|
||||
|
||||
if (each == NULL) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "locore_main: ignoring invalid packet %p.\n", (void*)rx_bufs[i]);
|
||||
rte_pktmbuf_free(rx_bufs[i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
uint16_t type = rte_be_to_cpu_16(each->type);
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "locore_main: received packet %p type %d.\n", (void*)rx_bufs[i], type);
|
||||
switch (type) {
|
||||
struct pkt_payload_epoch * pld_epoch;
|
||||
struct pkt_payload_stat * pld_stat;
|
||||
uint32_t epoch;
|
||||
|
||||
case PKT_TYPE_PROBE_RESP:
|
||||
pld_epoch = (struct pkt_payload_epoch *)each->payload;
|
||||
epoch = rte_be_to_cpu_32(pld_epoch->epoch);
|
||||
|
||||
if (tinfo->last_datapt == nullptr || epoch != tinfo->last_datapt->epoch) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING, "locore_main: packet %p epoch %d doesn't match datapt %d.\n", (void*)rx_bufs[i], epoch, tinfo->last_datapt->epoch);
|
||||
break;
|
||||
}
|
||||
|
||||
tinfo->tot_recv++;
|
||||
|
||||
recv_resp = true;
|
||||
break;
|
||||
case PKT_TYPE_STAT:
|
||||
pld_stat = (struct pkt_payload_stat *)each->payload;
|
||||
epoch = rte_be_to_cpu_32(pld_stat->epoch);
|
||||
|
||||
if (tinfo->last_datapt == nullptr || epoch != tinfo->last_datapt->epoch) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING, "locore_main: packet %p epoch %d doesn't match datapt %d.\n", (void*)rx_bufs[i], epoch, tinfo->last_datapt->epoch);
|
||||
break;
|
||||
}
|
||||
|
||||
tinfo->last_datapt->srv_hw_tx = rte_be_to_cpu_64(pld_stat->hw_tx);
|
||||
tinfo->last_datapt->srv_hw_rx = rte_be_to_cpu_64(pld_stat->hw_rx);
|
||||
tinfo->last_datapt->srv_sw_tx = rte_be_to_cpu_64(pld_stat->sw_tx);
|
||||
tinfo->last_datapt->srv_sw_rx = rte_be_to_cpu_64(pld_stat->sw_rx);
|
||||
|
||||
tinfo->tot_recv++;
|
||||
|
||||
recv_stat = true;
|
||||
break;
|
||||
default:
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING, "locore_main: ignoring packet %p with unknown type %d.\n", (void*)rx_bufs[i], type);
|
||||
rte_pktmbuf_free(rx_bufs[i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
rte_pktmbuf_free(rx_bufs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
if (read_tx && recv_stat & recv_resp) {
|
||||
// if we have all the data
|
||||
|
||||
if (tinfo->last_datapt != nullptr) {
|
||||
// push the data to the queue if we haven't done so already
|
||||
tinfo->data.push_back(tinfo->last_datapt);
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "locore_main: datapt for epoch %d dump:\n" \
|
||||
" Valid: %d\n"
|
||||
" client TX HW: %llu\n" \
|
||||
" client TX SW: %llu\n" \
|
||||
" client RX HW: %llu\n" \
|
||||
" client RX SW: %llu\n" \
|
||||
" server TX HW: %llu\n" \
|
||||
" server TX SW: %llu\n" \
|
||||
" server RX HW: %llu\n" \
|
||||
" server RX SW: %llu\n\n",
|
||||
tinfo->last_datapt->epoch,
|
||||
tinfo->last_datapt->valid,
|
||||
tinfo->last_datapt->clt_hw_tx,
|
||||
tinfo->last_datapt->clt_sw_tx,
|
||||
tinfo->last_datapt->clt_hw_rx,
|
||||
tinfo->last_datapt->clt_sw_rx,
|
||||
tinfo->last_datapt->srv_hw_tx,
|
||||
tinfo->last_datapt->srv_sw_tx,
|
||||
tinfo->last_datapt->srv_hw_rx,
|
||||
tinfo->last_datapt->srv_sw_rx);
|
||||
tinfo->last_datapt = nullptr;
|
||||
}
|
||||
|
||||
if (now >= next_ts) {
|
||||
struct pkt_payload_epoch * pld_epoch;
|
||||
uint32_t epoch;
|
||||
|
||||
next_ts += (int)(tinfo->ia_gen->generate() * 1000000.0);
|
||||
|
||||
// generate the packet
|
||||
tx_buf = rte_pktmbuf_alloc(options.mbuf_pool);
|
||||
|
||||
if (tx_buf == NULL) {
|
||||
rte_exit(EXIT_FAILURE, "cannot allocate tx_buf\n");
|
||||
}
|
||||
|
||||
pkt_data = construct_pkt_hdr(tx_buf, PKT_TYPE_PROBE,
|
||||
&options.s_host_mac, &options.server_mac);
|
||||
if (pkt_data == NULL) {
|
||||
rte_exit(EXIT_FAILURE, "cannot allocate space for packet_data in mbuf\n");
|
||||
}
|
||||
|
||||
epoch = options.s_epoch.fetch_add(1);
|
||||
pld_epoch = (struct pkt_payload_epoch *)pkt_data->payload;
|
||||
pld_epoch->epoch = rte_cpu_to_be_32(epoch);
|
||||
tinfo->last_datapt = new struct datapt;
|
||||
tinfo->last_datapt->epoch = epoch;
|
||||
tinfo->last_datapt->valid = options.s_record.load();
|
||||
|
||||
read_tx = false;
|
||||
recv_resp = false;
|
||||
recv_stat = false;
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "locore_main: sending packet %p with epoch %d\n", (void*)tx_buf, epoch);
|
||||
const uint16_t nb_tx = rte_eth_tx_burst(options.s_portid, tinfo->txqid, &tx_buf, 1);
|
||||
|
||||
if (nb_tx != 1) {
|
||||
rte_exit(EXIT_FAILURE, "failed to send packet 0x%p, epoch %d\n", (void*)tx_buf, epoch);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!read_tx) {
|
||||
struct timespec ts;
|
||||
if ((ret = rte_eth_timesync_read_tx_timestamp(options.s_portid, &ts)) == 0) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "locore_main: read hw tx timestamp %lld.\n", ts.tv_nsec + ts.tv_sec * S2NS);
|
||||
tinfo->last_datapt->clt_hw_tx = ts.tv_nsec + ts.tv_sec * S2NS;
|
||||
read_tx = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
rte_pktmbuf_free(tx_buf);
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "locore_main: core %d successfully stopped.\n", core_id);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
port_init(uint16_t portid, struct rte_mempool *mbuf_pool)
|
||||
{
|
||||
struct rte_eth_dev_info dev_info;
|
||||
struct rte_eth_conf port_conf = port_conf_default;
|
||||
struct rte_eth_txconf txconf;
|
||||
struct rte_eth_rxconf rxconf;
|
||||
|
||||
uint16_t nb_rxd = RX_RING_SIZE;
|
||||
uint16_t nb_txd = TX_RING_SIZE;
|
||||
port_conf.rxmode.max_rx_pkt_len = RTE_ETHER_MAX_LEN;
|
||||
|
||||
if(!rte_eth_dev_is_valid_port(portid)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
int ret = rte_eth_dev_info_get(portid, &dev_info);
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
port_conf.rxmode.max_rx_pkt_len = RTE_ETHER_MAX_LEN;
|
||||
port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_UDP_CKSUM;
|
||||
port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_IPV4_CKSUM;
|
||||
port_conf.txmode.offloads |= DEV_TX_OFFLOAD_UDP_CKSUM;
|
||||
port_conf.txmode.offloads |= DEV_TX_OFFLOAD_IPV4_CKSUM;
|
||||
port_conf.txmode.offloads |= DEV_TX_OFFLOAD_MBUF_FAST_FREE;
|
||||
|
||||
/* Configure the Ethernet device. */
|
||||
ret = rte_eth_dev_configure(portid, options.num_threads, options.num_threads, &port_conf);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
||||
ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd, &nb_txd);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
||||
/* Allocate and set up 1 RX queue per thread . */
|
||||
rxconf = dev_info.default_rxconf;
|
||||
rxconf.offloads = port_conf.rxmode.offloads;
|
||||
for (uint32_t i = 0; i < options.num_threads; i++) {
|
||||
ret = rte_eth_rx_queue_setup(portid, i, nb_rxd, rte_eth_dev_socket_id(portid), &rxconf, mbuf_pool);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
|
||||
txconf = dev_info.default_txconf;
|
||||
txconf.offloads = port_conf.txmode.offloads;
|
||||
/* Allocate and set up 1 TX queue per Ethernet port. */
|
||||
for (uint32_t i = 0; i < options.num_threads; i++) {
|
||||
ret = rte_eth_tx_queue_setup(portid, i, nb_txd, rte_eth_dev_socket_id(portid), &txconf);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = rte_eth_dev_start(portid);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
/* Display the port MAC address. */
|
||||
struct rte_ether_addr addr;
|
||||
ret = rte_eth_macaddr_get(portid, &addr);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
||||
/* Enable RX in promiscuous mode for the Ethernet device. */
|
||||
ret = rte_eth_promiscuous_enable(portid);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void dump_options()
|
||||
{
|
||||
fprintf(stdout, "Configuration:\n" \
|
||||
" run time = %d\n" \
|
||||
" warmup time = %d\n" \
|
||||
" output file = %s\n" \
|
||||
" server MAC = %x:%x:%x:%x:%x:%x\n",
|
||||
options.run_time,
|
||||
options.warmup_time,
|
||||
options.output,
|
||||
options.server_mac.addr_bytes[0],
|
||||
options.server_mac.addr_bytes[1],
|
||||
options.server_mac.addr_bytes[2],
|
||||
options.server_mac.addr_bytes[3],
|
||||
options.server_mac.addr_bytes[4],
|
||||
options.server_mac.addr_bytes[5]);
|
||||
}
|
||||
|
||||
static void usage()
|
||||
{
|
||||
fprintf(stdout,
|
||||
"Usage:\n " \
|
||||
" -v(vv): verbose mode\n" \
|
||||
" -h: display the information\n" \
|
||||
" -o: output filename\n" \
|
||||
" -t: run time\n" \
|
||||
" -T: warmup time\n" \
|
||||
" -s: server's mac\n" \
|
||||
" -A: affinity mask\n" \
|
||||
" -a: number of threads\n" \
|
||||
" -C: client mode\n"
|
||||
" -i: inter-arrival time distribution\n\n");
|
||||
}
|
||||
// static void int_handler(int)
|
||||
// {
|
||||
// //rte_exit(EXIT_SUCCESS, "Caught SIGINT, exiting...\n");
|
||||
// }
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
unsigned int nb_ports;
|
||||
struct rte_mempool *mbuf_pool;
|
||||
std::ofstream log_file;
|
||||
struct thread_info *tinfo;
|
||||
|
||||
ntr_init();
|
||||
if (nm_init() != 0)
|
||||
rte_exit(EXIT_FAILURE, "failed to init libnm\n");
|
||||
// signal(SIGINT, int_handler);
|
||||
|
||||
// init dpdk
|
||||
int ret = rte_eal_init(argc, argv);
|
||||
if (ret < 0) {
|
||||
rte_exit(EXIT_FAILURE, "rte_eal_init failed!\n");
|
||||
}
|
||||
|
||||
argc -= ret;
|
||||
argv += ret;
|
||||
|
||||
// set warning level
|
||||
ntr_set_level(NTR_DEP_USER1, NTR_LEVEL_WARNING);
|
||||
{
|
||||
int c;
|
||||
// parse arguments
|
||||
while((c = getopt(argc, argv, "hvo:t:T:s:A:a:Ci:")) != -1) {
|
||||
switch (c) {
|
||||
case 'v':
|
||||
ntr_set_level(NTR_DEP_USER1, ntr_get_level(NTR_DEP_USER1) + 1);
|
||||
break;
|
||||
case 's':
|
||||
if (rte_ether_unformat_addr(optarg, &options.server_mac) == -1) {
|
||||
rte_exit(EXIT_FAILURE, "cannot parse %s as mac address.\n", optarg);
|
||||
}
|
||||
break;
|
||||
case 't':
|
||||
options.run_time = atoi(optarg);
|
||||
break;
|
||||
case 'T':
|
||||
options.warmup_time = atoi(optarg);
|
||||
break;
|
||||
case 'h':
|
||||
usage();
|
||||
rte_exit(EXIT_SUCCESS, "success\n");
|
||||
case 'o':
|
||||
strncpy(options.output, optarg, sizeof(options.output) - 1);
|
||||
break;
|
||||
case 'A':
|
||||
options.cpu_mask = atoll(optarg);
|
||||
break;
|
||||
case 'a':
|
||||
options.num_threads = atoi(optarg);
|
||||
break;
|
||||
case 'C':
|
||||
options.mode = MODE_CLIENT;
|
||||
break;
|
||||
case 'i':
|
||||
strncpy(options.ia_gen, optarg, sizeof(options.ia_gen) - 1);
|
||||
break;
|
||||
default:
|
||||
usage();
|
||||
rte_exit(EXIT_FAILURE, "unknown argument: %c\n", c);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// open log file for writing
|
||||
if (options.mode == MODE_MASTER) {
|
||||
log_file.open(options.output, std::ofstream::out);
|
||||
if (!log_file) {
|
||||
rte_exit(EXIT_FAILURE, "failed to open log file %s\n", options.output);
|
||||
}
|
||||
}
|
||||
|
||||
nb_ports = rte_eth_dev_count_avail();
|
||||
if (nb_ports == 0) {
|
||||
rte_exit(EXIT_FAILURE, "number of ports must be > 0\n");
|
||||
}
|
||||
|
||||
uint16_t portid = rte_eth_find_next(0);
|
||||
if (portid == RTE_MAX_ETHPORTS) {
|
||||
rte_exit(EXIT_FAILURE, "cannot find an available port\n");
|
||||
}
|
||||
options.s_portid = portid;
|
||||
|
||||
|
||||
// create a mbuf memory pool on the socket
|
||||
mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", MBUF_MAX_COUNT, MBUF_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_eth_dev_socket_id(options.s_portid));
|
||||
if (mbuf_pool == nullptr) {
|
||||
rte_exit(EXIT_FAILURE, "cannot create mbuf pool\n");
|
||||
}
|
||||
options.mbuf_pool = mbuf_pool;
|
||||
|
||||
for(int i = 0; i < 1; i++) {
|
||||
tinfo = new thread_info;
|
||||
tinfo->id = i;
|
||||
tinfo->ia_gen = createGenerator(options.ia_gen);
|
||||
options.s_thr_info.push_back(tinfo);
|
||||
}
|
||||
|
||||
if (port_init(portid, mbuf_pool) != 0) {
|
||||
rte_exit(EXIT_FAILURE, "cannot init port %d\n", portid);
|
||||
}
|
||||
|
||||
if (rte_eth_macaddr_get(portid, &options.s_host_mac) != 0) {
|
||||
rte_exit(EXIT_FAILURE, "cannot get mac address of port %d\n", portid);
|
||||
}
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "Configured port %d with mac addr %x:%x:%x:%x:%x:%x\n", portid,
|
||||
options.s_host_mac.addr_bytes[0],
|
||||
options.s_host_mac.addr_bytes[1],
|
||||
options.s_host_mac.addr_bytes[2],
|
||||
options.s_host_mac.addr_bytes[3],
|
||||
options.s_host_mac.addr_bytes[4],
|
||||
options.s_host_mac.addr_bytes[5]);
|
||||
|
||||
dump_options();
|
||||
|
||||
sleep(1);
|
||||
|
||||
uint16_t core_id = rte_get_next_lcore(0, true, false);
|
||||
|
||||
if (rte_eal_remote_launch(locore_main, options.s_thr_info.at(0), core_id) != 0) {
|
||||
rte_exit(EXIT_FAILURE, "failed to launch function on locore\n");
|
||||
}
|
||||
|
||||
// poor man's timer
|
||||
// XXX: use kqueue instead
|
||||
struct timespec ts;
|
||||
ts.tv_sec = 1;
|
||||
ts.tv_nsec = 0;
|
||||
uint32_t second = 0;
|
||||
while(true) {
|
||||
if (second >= options.warmup_time) {
|
||||
options.s_record.store(1);
|
||||
}
|
||||
if (second >= options.run_time + options.warmup_time) {
|
||||
options.s_stop.store(true);
|
||||
break;
|
||||
}
|
||||
clock_nanosleep(CLOCK_REALTIME, 0, &ts, NULL);
|
||||
second++;
|
||||
}
|
||||
|
||||
if (rte_eal_wait_lcore(core_id) < 0)
|
||||
rte_exit(EXIT_FAILURE, "failed to wait for job completion\n");
|
||||
|
||||
// dump stats
|
||||
if (options.mode == MODE_MASTER) {
|
||||
thread_info * master_thrd = options.s_thr_info.at(0);
|
||||
for (auto it : master_thrd->data) {
|
||||
if (it->valid) {
|
||||
log_file << it->clt_sw_rx << ',' << it->clt_sw_tx << ','
|
||||
<< it->clt_hw_rx << ',' << it->clt_hw_tx << ','
|
||||
<< it->srv_sw_rx << ',' << it->srv_sw_tx << ','
|
||||
<< it->srv_hw_rx << ',' << it->srv_hw_tx << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
log_file.close();
|
||||
|
||||
// clean up
|
||||
rte_eth_dev_stop(portid);
|
||||
rte_eth_dev_close(portid);
|
||||
|
||||
return 0;
|
||||
}
|
@ -1,50 +0,0 @@
|
||||
import os
|
||||
import sys
|
||||
import getopt
|
||||
import subprocess
|
||||
|
||||
options = getopt.getopt(sys.argv[1:], 'b:s:d:p:')[0]
|
||||
|
||||
base=0
|
||||
stride=2
|
||||
num = 0
|
||||
port = 0
|
||||
|
||||
for opt, arg in options:
|
||||
if opt == '-b':
|
||||
base = int(arg)
|
||||
elif opt == '-s':
|
||||
stride = int(arg)
|
||||
elif opt == '-d':
|
||||
num = int(arg)
|
||||
elif opt == '-p':
|
||||
port = int(arg)
|
||||
result = subprocess.run("sysctl -a", shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
lines = result.stdout.decode().split('\n')
|
||||
cclines : list[str] = []
|
||||
for line in lines:
|
||||
if ("irq" in line) and (f"t6nex{num}" in line) and (f"{port}a" in line):
|
||||
cclines.append(line)
|
||||
|
||||
if len(cclines) == 0:
|
||||
print(f"No t6nex {num}a lines from sysctl.\n")
|
||||
exit(1)
|
||||
|
||||
irqs = []
|
||||
for line in cclines:
|
||||
eles = line.split(' ')
|
||||
irq = eles[0]
|
||||
if (irq.startswith("irq") and irq.endswith(":")):
|
||||
irq = irq[3:-1]
|
||||
irqs.append(int(irq))
|
||||
else:
|
||||
print(f"Unknown line format: f{line}")
|
||||
|
||||
print(f"Detected {len(irqs)} irqs:\n{str(irqs)}")
|
||||
|
||||
for irq in irqs:
|
||||
print(f"Setting irq{irq}'s affinity to core {base}...")
|
||||
subprocess.run(f"cpuset -l {base} -x {irq}", check=True, shell=True)
|
||||
base = base + stride
|
||||
|
||||
exit(0)
|
38
scripts/compile.sh
Executable file
38
scripts/compile.sh
Executable file
@ -0,0 +1,38 @@
|
||||
#!/bin/sh
|
||||
test_dir="/numam.d"
|
||||
root=".."
|
||||
servers="skylake2.rcs.uwaterloo.ca skylake3.rcs.uwaterloo.ca"
|
||||
rsync_flags="-vchr"
|
||||
ssh_args="-o StrictHostKeyChecking=no -p77"
|
||||
|
||||
user=$1
|
||||
|
||||
if [ -z $user ]
|
||||
then
|
||||
user=$(whoami)
|
||||
fi
|
||||
|
||||
echo "USER: $user"
|
||||
|
||||
compile() {
|
||||
# separate these functions because we might change kernel (reboot) without needing to recompile
|
||||
echo "====================$1===================="
|
||||
echo "Syncing directories..."
|
||||
ssh $(echo $ssh_args $user@$1) "sudo mkdir -p $test_dir"
|
||||
ssh $(echo $ssh_args $user@$1) "sudo chmod 777 $test_dir"
|
||||
rsync $(echo $rsync_flags) -e 'ssh -p 77' $root/ $user@$1:$test_dir/
|
||||
echo "Compiling..."
|
||||
ssh $(echo $ssh_args $user@$1) "mkdir -p $test_dir/build; cd $test_dir/build; cmake ../; make clean all -j8" &
|
||||
wait
|
||||
echo "$1 Done."
|
||||
echo ""
|
||||
}
|
||||
|
||||
i=0
|
||||
for server in $servers
|
||||
do
|
||||
i=$(expr $i + 1)
|
||||
compile "$server" &
|
||||
done
|
||||
|
||||
wait
|
@ -1,9 +0,0 @@
|
||||
#!/bin/sh
|
||||
scp -P77 mount.sh oscar@icelake1-int.rcs.uwaterloo.ca:~/
|
||||
scp -P77 mount_small.sh oscar@icelake1-int.rcs.uwaterloo.ca:~/
|
||||
scp -P77 mount.sh oscar@milan1-int.rcs.uwaterloo.ca:~/
|
||||
scp -P77 mount_small.sh oscar@milan1-int.rcs.uwaterloo.ca:~/
|
||||
scp -P77 mount.sh oscar@icelake2-int.rcs.uwaterloo.ca:~/
|
||||
scp -P77 mount_small.sh oscar@icelake2-int.rcs.uwaterloo.ca:~/
|
||||
scp -P77 mount.sh oscar@milan2-int.rcs.uwaterloo.ca:~/
|
||||
scp -P77 mount_small.sh oscar@milan2-int.rcs.uwaterloo.ca:~/
|
230
scripts/dpdk.py
230
scripts/dpdk.py
@ -1,230 +0,0 @@
|
||||
from cgi import test
|
||||
from site import abs_paths
|
||||
import subprocess as sp
|
||||
import time
|
||||
import select
|
||||
import os
|
||||
import datetime
|
||||
import pwd
|
||||
import sys
|
||||
import getopt
|
||||
import numpy as np
|
||||
import re
|
||||
|
||||
import libpar as par
|
||||
import libtc as tc
|
||||
import libmechspec as mechspec
|
||||
import netexp
|
||||
|
||||
only_max_qps = True
|
||||
# [[counter names], counting mode (0 = sampling, 1 = counting)]
|
||||
pmc_counters = [
|
||||
"",
|
||||
# [["mem_load_l3_miss_retired.local_dram"], 1],
|
||||
# [["mem_load_l3_miss_retired.remote_dram"], 1],
|
||||
# [["mem_load_l3_miss_retired.remote_hitm"], 1],
|
||||
# [["mem_load_l3_miss_retired.remote_fwd"], 1]
|
||||
# [["mem_trans_retired.load_latency_gt_8"], 0],
|
||||
# [["mem_trans_retired.load_latency_gt_16"], 0],
|
||||
# [["mem_trans_retired.load_latency_gt_32"], 0],
|
||||
# [["mem_trans_retired.load_latency_gt_64"], 0],
|
||||
# [["mem_trans_retired.load_latency_gt_128"], 0],
|
||||
# [["mem_trans_retired.load_latency_gt_256"], 0],
|
||||
# [["mem_trans_retired.load_latency_gt_512"], 0],
|
||||
#[["mem_trans_retired.load_latency_gt_8", ""], 0],
|
||||
]
|
||||
|
||||
# pkt_pad
|
||||
clt_pkt_pads = [
|
||||
0,
|
||||
# 256,
|
||||
# 512,
|
||||
# 1024,
|
||||
# 2048,
|
||||
# 4096,
|
||||
# 8192
|
||||
]
|
||||
|
||||
clt_pkt_pads_depth = {}
|
||||
clt_pkt_pads_depth[0] = 8
|
||||
clt_pkt_pads_depth[256] = 6
|
||||
clt_pkt_pads_depth[512] = 6
|
||||
clt_pkt_pads_depth[1024] = 4
|
||||
clt_pkt_pads_depth[1518] = 4
|
||||
clt_pkt_pads_depth[2048] = 2
|
||||
clt_pkt_pads_depth[4096] = 2
|
||||
clt_pkt_pads_depth[8192] = 1
|
||||
clt_pkt_pads_depth[9018] = 1
|
||||
|
||||
# clt_load
|
||||
clt_wrkld = [
|
||||
[0, "fixed:0", "fixed:0"],
|
||||
# [0, "uniform:1000", "fixed:0"],
|
||||
# [0, "uniform:100", "fixed:0"],
|
||||
# [0, "uniform:10", "fixed:0"],
|
||||
# [1, "uniform:480", "uniform:1024"],
|
||||
# [1, "uniform:480", "uniform:256"],
|
||||
# [1, "uniform:480", "uniform:64"]
|
||||
]
|
||||
|
||||
# paths
|
||||
file_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
root_dir = os.path.join(file_dir,"..")
|
||||
|
||||
# [srv_affinity, OPTIONAL( memgen_affinity, iteration, buffer_size, target_dom )]
|
||||
server_affinity = [
|
||||
["1,3,5,7,9,11,13,15,17,19,21,23"],
|
||||
["25,27,29,31,33,35,37,39,41,43,45,47"],
|
||||
#["1,3,5,7,9,11,13,15,17,19,21,23", "26,28,30,32,34,36,38,40,42,44,46", -1, 512*1024*1024, 0],
|
||||
#["25,27,29,31,33,35,37,39,41,43,45,47", "2,4,6,8,10,12,14,16,18,20,22", -1, 512*1024*1024, 1],
|
||||
|
||||
# "65,67,69,71,73,75,77,79,81,83,85,87,89,91,93,95,97,99,101,103,105,107,109,111,113,115,117,119,121,123,125,127",
|
||||
# "1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31,33,35,37,39,41,43,45,47,49,51,53,55,57,59,61,63",
|
||||
# "1,3,5,7,9,11,13,15",
|
||||
# "17,19,21,23,25,27,29,31",
|
||||
# "33,35,37,39,41,43,45,47",
|
||||
# "49,51,53,55,57,59,61,63"
|
||||
]
|
||||
|
||||
def flush_netresult(conf : netexp.NetExpConf, result : netexp.NetExpResult):
|
||||
sample_out = tc.get_odir() + "/" + str(result.parser.qps) + ".txt"
|
||||
|
||||
with open(sample_out, "w") as f:
|
||||
f.write(result.sample)
|
||||
|
||||
if conf.enable_pmc:
|
||||
pmc_out = tc.get_odir() + "/" + str(result.parser.qps) + ".pmc"
|
||||
if conf.pmc_mode != 0:
|
||||
with open(pmc_out, "w") as f:
|
||||
f.write(result.pmc_parser.raw)
|
||||
else:
|
||||
with open(pmc_out, "wb") as f:
|
||||
f.write(result.pmc_parser[0])
|
||||
with open(pmc_out + "_parsed", "w") as g:
|
||||
g.write(result.pmc_parser[1])
|
||||
|
||||
tc.log_print("=== Summary - qps: " + str(result.parser.qps) + " master loss: " + str(float(result.parser.master_loss) / float(result.parser.master_recv + result.parser.master_loss) * 100.00) + "% slave loss: " + str(float(result.parser.slave_loss) / float(result.parser.slave_recv + result.parser.slave_loss) * 100.0) + "%" )
|
||||
tc.log_print("=== Server HW:")
|
||||
tc.log_print(par.mutilate_data.build_mut_output(result.parser.srv_hwlat, [result.parser.qps]) + "\n")
|
||||
tc.log_print("=== Server SW:")
|
||||
tc.log_print(par.mutilate_data.build_mut_output(result.parser.srv_swlat, [result.parser.qps]) + "\n")
|
||||
tc.log_print("=== Client HW:")
|
||||
tc.log_print(par.mutilate_data.build_mut_output(result.parser.clt_hwlat, [result.parser.qps]) + "\n")
|
||||
tc.log_print("=== Client SW:")
|
||||
tc.log_print(par.mutilate_data.build_mut_output(result.parser.clt_swlat, [result.parser.qps]) + "\n")
|
||||
if conf.enable_pmc:
|
||||
if conf.pmc_mode != 0:
|
||||
tc.log_print("=== PMC:")
|
||||
tc.log_print("counter: " + result.pmc_parser.counter + " count: " + str(result.pmc_parser.count) + " cores: " + str(result.pmc_parser.cores))
|
||||
|
||||
def main():
|
||||
tc.set_ssh_param("-o StrictHostKeyChecking=no -p77")
|
||||
tc.set_ssh_user("oscar")
|
||||
output_dirname = "run"
|
||||
|
||||
conf = netexp.NetExpConf()
|
||||
conf.srv_mechspec = mechspec.LAB.SKYLAKE1_10G
|
||||
conf.clt_mechspecs = [mechspec.LAB.SKYLAKE3_10G, mechspec.LAB.SKYLAKE5_10G]
|
||||
conf.mst_mechspec = mechspec.LAB.SKYLAKE2_10G
|
||||
conf.finalize_mechspecs()
|
||||
conf.root_dir = "/numam.d/build/bin"
|
||||
|
||||
# server fixed configs
|
||||
conf.srv_port = 0
|
||||
|
||||
# client fixed configs
|
||||
conf.clt_ia = "exponential"
|
||||
conf.clt_affinity = "1,3,5,7,9,11,13,15,17,19,21,23"
|
||||
conf.clt_port = 0
|
||||
conf.clt_pkt_loss_lat = 5000
|
||||
conf.clt_rage_quit_lat = 5000
|
||||
|
||||
# master fixed configs
|
||||
conf.mst_port = 0
|
||||
conf.mst_warmup = 5
|
||||
conf.mst_duration = 20
|
||||
conf.mst_qps = 100
|
||||
conf.mst_ia = "exponential"
|
||||
conf.mst_pkt_loss_lat = 5000
|
||||
conf.mst_pkt_loss_max = 100
|
||||
conf.mst_affinity = "2"
|
||||
|
||||
# pmc stuff
|
||||
conf.pmc_sampling_rate = 4096
|
||||
conf.pmc_counting_interval = 0.1
|
||||
|
||||
options = getopt.getopt(sys.argv[1:], 'scSD')[0]
|
||||
for opt, arg in options:
|
||||
if opt in ('-s'):
|
||||
netexp.stop_all(conf)
|
||||
return
|
||||
elif opt in ('-c'):
|
||||
conf.enable_client_only=True
|
||||
elif opt in ('-S'):
|
||||
netexp.setup(conf, bench = True, dpdk = False)
|
||||
return
|
||||
elif opt in ('-D'):
|
||||
netexp.setup(conf, bench=False, dpdk=True)
|
||||
return
|
||||
|
||||
tc.init("~/results.d/numam_neo/" + output_dirname + "_" + datetime.datetime.now().strftime('%Y%m%d%H%M%S'))
|
||||
cpcmd = "cp " + __file__ + " " + tc.get_odir() + "/"
|
||||
tc.log_print(cpcmd)
|
||||
sp.check_call(cpcmd, shell=True)
|
||||
|
||||
for eaff in server_affinity:
|
||||
conf.srv_affinity = eaff[0]
|
||||
conf.enable_memgen = False
|
||||
if len(eaff) > 1:
|
||||
conf.enable_memgen = True
|
||||
conf.memgen_affinity = eaff[1]
|
||||
conf.memgen_iteration = eaff[2]
|
||||
conf.memgen_size = eaff[3]
|
||||
conf.memgen_tgtdom = eaff[4]
|
||||
for epad in clt_pkt_pads:
|
||||
conf.clt_pkt_pad = 0
|
||||
conf.clt_pkt_depth = clt_pkt_pads_depth[conf.clt_pkt_pad]
|
||||
for eload in clt_wrkld:
|
||||
conf.clt_wrkld = eload[0]
|
||||
conf.clt_wrkarg0 = eload[1]
|
||||
conf.clt_wrkarg1 = eload[2]
|
||||
for epmc in pmc_counters:
|
||||
conf.enable_pmc = False
|
||||
if len(epmc) > 0:
|
||||
conf.enable_pmc = True
|
||||
conf.pmc_counters = epmc[0]
|
||||
conf.pmc_mode = epmc[1]
|
||||
|
||||
test_name = "affinity" + eaff[0] + "_pad" + str(epad) + "_load" + str(eload[0]) + "," + str(eload[1]) + "," + str(eload[2])
|
||||
if (conf.enable_memgen):
|
||||
test_name += "_memload" + str(eaff[1]) + "," + str(eaff[2]) + "," + str(eaff[3]) + "," + str(eaff[4])
|
||||
if (conf.enable_pmc):
|
||||
test_name += "_pmc" + str(epmc[1]) + "_" + conf.get_pmc_str()
|
||||
tc.begin(test_name)
|
||||
|
||||
conf.clt_qps = 0
|
||||
tc.log_print("============ " + test_name + " QPS: MAX ============")
|
||||
result : netexp.NetExpResult = netexp.run(conf)
|
||||
flush_netresult(conf, result)
|
||||
max_qps = result.parser.qps
|
||||
|
||||
if conf.enable_client_only:
|
||||
return
|
||||
|
||||
if only_max_qps:
|
||||
continue
|
||||
|
||||
finish = (int)(max_qps - max(conf.mst_qps, 0.01 * max_qps))
|
||||
step = (int)(finish / 10)
|
||||
cur_qps = step
|
||||
while cur_qps <= finish:
|
||||
tc.log_print("============ " + test_name + " QPS: " + str(cur_qps) + " ============")
|
||||
conf.clt_qps = cur_qps
|
||||
result : netexp.NetExpResult = netexp.run(conf)
|
||||
flush_netresult(result)
|
||||
cur_qps += step
|
||||
tc.log_print("")
|
||||
tc.end()
|
||||
|
||||
netexp.stop_all(conf)
|
||||
main()
|
132
scripts/graph.py
132
scripts/graph.py
@ -1,132 +0,0 @@
|
||||
#!/usr/bin/env python3.6
|
||||
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
from matplotlib import ticker
|
||||
import numpy as np
|
||||
import sys
|
||||
import re
|
||||
import os
|
||||
import json
|
||||
import libpar as par
|
||||
import getopt
|
||||
import math
|
||||
import concurrent.futures as CF
|
||||
|
||||
def process_dir(rootdir):
|
||||
ret = []
|
||||
print("Processing directory " + rootdir + " ...")
|
||||
for subdir in os.listdir(rootdir):
|
||||
each_dir = os.path.join(rootdir, subdir)
|
||||
if os.path.isfile(each_dir) and each_dir.endswith(".txt"):
|
||||
output = None
|
||||
try:
|
||||
with open(each_dir, 'r') as f:
|
||||
if len(f.readlines()) <= 1:
|
||||
print("Skipping empty file - " + each_dir)
|
||||
continue
|
||||
|
||||
with open(each_dir, 'r') as f:
|
||||
output = f.read()
|
||||
parser = par.khat_parser()
|
||||
parser.parse(output)
|
||||
print("Processed raw data - " + each_dir)
|
||||
ret.append(parser)
|
||||
except:
|
||||
print("Unrecognized format - " + subdir)
|
||||
|
||||
print("")
|
||||
return ret
|
||||
|
||||
|
||||
marker_map = ["o", "P", "s", "v", "*", "+", "^", "1", "2", "d", "X", "o", "P", "s", "v", "*", "+", "^", "1", "2", "d", "X"]
|
||||
color_map = ["xkcd:black", "xkcd:red", "xkcd:blue", "xkcd:green", "xkcd:cyan", "xkcd:purple", "xkcd:orange", "xkcd:salmon", "xkcd:lightgreen", "xkcd:indigo", "xkcd:brown", "xkcd:bubblegum", "xkcd:lavender", "xkcd:maroon", "xkcd:fern", "xkcd:sky", "xkcd:orchid", "xkcd:sienna"]
|
||||
parser_idx_labels = ["srv_hw", "srv_sw", "clt_hw", "clt_sw"]
|
||||
|
||||
def add_curve(eax, label : str, qps_arr : [], lat_arr : [], marker : str, color : str):
|
||||
df_dict = {}
|
||||
df_dict['qps'] = qps_arr
|
||||
df_dict['lat'] = lat_arr
|
||||
|
||||
df = pd.DataFrame(df_dict)
|
||||
df = df.sort_values('qps')
|
||||
eax.plot('qps', 'lat', data = df, label=label, marker=marker, color=color, markersize=8)
|
||||
|
||||
# adds curves (avg and 99th percentile) for a specific parser idx
|
||||
def add_curves(rax, label : str, parsers : [], parser_idx : int, marker : str, color : str):
|
||||
qps_arr = []
|
||||
avg_arr = []
|
||||
p99_arr = []
|
||||
|
||||
for parser in parsers:
|
||||
qps_arr.append(parser.qps)
|
||||
each_lat_arr = []
|
||||
each_lat_arr.extend(parser.get_stat_arr(parser_idx))
|
||||
avg_arr.append(np.mean(each_lat_arr))
|
||||
p99_arr.append(np.percentile(each_lat_arr, 99))
|
||||
|
||||
add_curve(rax[0], label, qps_arr, avg_arr, marker, color)
|
||||
add_curve(rax[1], label, qps_arr, p99_arr, marker, color)
|
||||
|
||||
|
||||
# generate the graphs for a parser index
|
||||
def generate_graph(aff_to_parser : {}, parser_idx : int, fn : str):
|
||||
marker_idx = 0
|
||||
color_idx = 0
|
||||
|
||||
fig, rax = plt.subplots(2, 1)
|
||||
rax[0].set_yscale("log")
|
||||
rax[0].set_title("Average")
|
||||
rax[0].set_xlabel("QPS")
|
||||
rax[0].set_ylabel("Latency (ns)")
|
||||
rax[0].xaxis.get_major_formatter().set_scientific(False)
|
||||
rax[0].yaxis.set_minor_formatter(ticker.ScalarFormatter())
|
||||
rax[1].set_yscale("log")
|
||||
rax[1].set_title("99th percentile")
|
||||
rax[1].set_xlabel("QPS")
|
||||
rax[1].set_ylabel("Latency (ns)")
|
||||
rax[1].xaxis.get_major_formatter().set_scientific(False)
|
||||
rax[1].yaxis.set_minor_formatter(ticker.ScalarFormatter())
|
||||
|
||||
print("Generating graph => " + fn + "...")
|
||||
for aff in aff_to_parser:
|
||||
# each affinity gets a different marker type
|
||||
marker_type = marker_map[marker_idx]
|
||||
color_type = color_map[color_idx]
|
||||
marker_idx += 1
|
||||
color_idx += 1
|
||||
|
||||
print(" Processing affinity " + aff + "...")
|
||||
|
||||
add_curves(rax, aff, aff_to_parser[aff], parser_idx, marker_type, color_type)
|
||||
|
||||
rax[0].legend()
|
||||
rax[1].legend()
|
||||
fig.set_size_inches(23.4, 16.5)
|
||||
plt.savefig(fn, dpi=150)
|
||||
plt.close()
|
||||
|
||||
def main():
|
||||
datdir = None
|
||||
options = getopt.getopt(sys.argv[1:], 'd:')[0]
|
||||
|
||||
for opt, arg in options:
|
||||
if opt in ('-d'):
|
||||
datdir = arg
|
||||
|
||||
if datdir == None:
|
||||
raise Exception("Must specify -d parameter")
|
||||
|
||||
dat = {}
|
||||
|
||||
for subdir in os.listdir(datdir):
|
||||
each_dir = os.path.join(datdir, subdir)
|
||||
if not os.path.isfile(each_dir):
|
||||
dat[subdir] = process_dir(each_dir)
|
||||
|
||||
for i in range(len(parser_idx_labels)):
|
||||
generate_graph(dat, i, datdir + "/" + parser_idx_labels[i])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -12,7 +12,7 @@ import math
|
||||
import concurrent.futures as CF
|
||||
import libpar as par
|
||||
|
||||
num_bins = 250
|
||||
num_bins = 100
|
||||
extra_pct = []
|
||||
|
||||
def saveplot(fp : str, data : [], title : str):
|
||||
@ -20,6 +20,7 @@ def saveplot(fp : str, data : [], title : str):
|
||||
plt.xlabel("Delay")
|
||||
plt.title(title)
|
||||
plt.ylabel("Frequency")
|
||||
plt.title(os.path.basename(fp))
|
||||
f = plt.gcf()
|
||||
f.set_size_inches(11.69, 8.27)
|
||||
f.savefig(fp + "_" + title + "_" + ".png", dpi=160)
|
||||
@ -28,15 +29,6 @@ def saveplot(fp : str, data : [], title : str):
|
||||
|
||||
executor = CF.ProcessPoolExecutor(max_workers=int(os.cpu_count()))
|
||||
|
||||
def clean_data(dat: []):
|
||||
ret = []
|
||||
arr = np.array(dat)
|
||||
cutoff = np.percentile(arr, 99)
|
||||
for i in arr:
|
||||
if i <= cutoff:
|
||||
ret.append(i)
|
||||
return ret
|
||||
|
||||
def process_file(each_dir):
|
||||
try:
|
||||
print("Processing " + each_dir + " ...")
|
||||
@ -53,28 +45,12 @@ def process_file(each_dir):
|
||||
ss.append(pt.s_stx - pt.s_srx)
|
||||
ch.append(pt.c_hrx - pt.c_htx)
|
||||
cs.append(pt.c_srx - pt.c_stx)
|
||||
|
||||
sh = clean_data(sh)
|
||||
ss = clean_data(ss)
|
||||
ch = clean_data(ch)
|
||||
cs = clean_data(cs)
|
||||
|
||||
saveplot(each_dir, sh, "server_hw_delay")
|
||||
saveplot(each_dir, ss, "server_sw_delay")
|
||||
saveplot(each_dir, ch, "client_hw_delay")
|
||||
saveplot(each_dir, cs, "client_sw_delay")
|
||||
|
||||
# output median, etc.
|
||||
with open(each_dir + "_" + "stats.txt", 'w') as f:
|
||||
f.write("===================== SERVER HW ====================\n")
|
||||
f.write(par.mutilate_data.build_mut_output(sh, [len(sh)]))
|
||||
f.write("\n===================== SERVER SW ====================\n")
|
||||
f.write(par.mutilate_data.build_mut_output(ss, [len(ss)]))
|
||||
f.write("\n===================== CLIENT HW ====================\n")
|
||||
f.write(par.mutilate_data.build_mut_output(ch, [len(ch)]))
|
||||
f.write("\n===================== CLIENT SW ====================\n")
|
||||
f.write(par.mutilate_data.build_mut_output(cs, [len(cs)]))
|
||||
|
||||
except Exception:
|
||||
print("Unexpected error:", sys.exc_info())
|
||||
|
||||
@ -82,7 +58,8 @@ def process_dir(rootdir):
|
||||
for subdir in os.listdir(rootdir):
|
||||
each_dir = os.path.join(rootdir, subdir)
|
||||
if os.path.isfile(each_dir):
|
||||
if each_dir.endswith(".txt") or each_dir.endswith(".sample"):
|
||||
if each_dir.endswith("sample.txt") or each_dir.endswith(".sample"):
|
||||
#executor.submit(process_file, each_dir)
|
||||
process_file(each_dir)
|
||||
else:
|
||||
process_dir(each_dir)
|
||||
@ -96,7 +73,8 @@ def main():
|
||||
datdir = arg
|
||||
|
||||
if datdir == None:
|
||||
raise Exception("Must specify -d parameter")
|
||||
datdir = "/home/oscar/projs/kqsched/scripts/pingpong/results.d/sample"
|
||||
#raise Exception("Must specify -d parameter")
|
||||
|
||||
process_dir(datdir)
|
||||
executor.shutdown()
|
||||
|
@ -1,25 +0,0 @@
|
||||
|
||||
class NetSpec:
|
||||
def __init__(self, fqdn, ip, mac) -> None:
|
||||
self.mac = mac
|
||||
self.ip = ip
|
||||
self.fqdn = fqdn
|
||||
self.netspec = ip + "@" + mac
|
||||
|
||||
|
||||
class LabNetSpecs:
|
||||
def __init__(self) -> None:
|
||||
self.SKYLAKE1_10G = NetSpec(fqdn = "skylake1.rcs.uwaterloo.ca",ip = "192.168.123.11", mac = "3c:15:fb:62:9b:28")
|
||||
self.SKYLAKE2_10G = NetSpec(fqdn = "skylake2.rcs.uwaterloo.ca",ip = "192.168.123.12", mac = "3c:15:fb:c9:f3:36")
|
||||
self.SKYLAKE3_10G = NetSpec(fqdn = "skylake3.rcs.uwaterloo.ca",ip = "192.168.123.13", mac = "3c:15:fb:c9:f3:4b")
|
||||
self.SKYLAKE4_10G = NetSpec(fqdn = "skylake4.rcs.uwaterloo.ca",ip = "192.168.123.14", mac = "")
|
||||
self.SKYLAKE5_10G = NetSpec(fqdn = "skylake5.rcs.uwaterloo.ca",ip = "192.168.123.15", mac = "3c:15:fb:c9:f3:28")
|
||||
self.SKYLAKE6_10G = NetSpec(fqdn = "skylake6.rcs.uwaterloo.ca",ip = "192.168.123.16", mac = "3c:15:fb:62:9b:2f")
|
||||
self.SKYLAKE7_10G = NetSpec(fqdn = "skylake7.rcs.uwaterloo.ca",ip = "192.168.123.17", mac = "3c:15:fb:c9:f3:44")
|
||||
self.SKYLAKE8_10G = NetSpec(fqdn = "skylake8.rcs.uwaterloo.ca",ip = "192.168.123.18", mac = "3c:15:fb:62:9c:be")
|
||||
self.MILAN1_100G = NetSpec(fqdn = "milan1-int.rcs.uwaterloo.ca",ip = "192.168.123.19", mac = "")
|
||||
self.MILAN1_10G = NetSpec(fqdn = "milan1-int.rcs.uwaterloo.ca",ip = "192.168.123.19", mac = "a0:42:3f:4d:cb:bc")
|
||||
self.ICELAKE2_100G = NetSpec(fqdn = "icelake2-int.rcs.uwaterloo.ca",ip = "192.168.123.20", mac = "")
|
||||
self.ICELAKE2_10G = NetSpec(fqdn = "icelake2-int.rcs.uwaterloo.ca",ip = "192.168.123.20", mac = "")
|
||||
|
||||
LAB = LabNetSpecs()
|
@ -1,56 +1,6 @@
|
||||
import json
|
||||
import numpy as np
|
||||
|
||||
class iperf_json_parser:
|
||||
def __init__(self, inputs):
|
||||
self.aggregate_egress_bps = 0
|
||||
self.jsonobjs = []
|
||||
for input in inputs:
|
||||
jsobj = json.loads(input)
|
||||
self.jsonobjs.append(jsobj)
|
||||
each_bps = jsobj['end']['sum_sent']['bits_per_second']
|
||||
self.aggregate_egress_bps += each_bps
|
||||
|
||||
class memloadgen_parser:
|
||||
def __init__(self, input, min, max):
|
||||
lines = input.split('\n')
|
||||
if max > len(lines):
|
||||
max = len(lines)
|
||||
if len(lines) <= min:
|
||||
raise Exception("Not enough lines!")
|
||||
if min > max:
|
||||
min = max
|
||||
arr = []
|
||||
for i in range(min, max):
|
||||
arr.append(int(lines[i]))
|
||||
self.bps = np.mean(arr)
|
||||
|
||||
|
||||
class pmc_parser:
|
||||
def __init__(self, input):
|
||||
self.raw = input
|
||||
lines = input.split('\n')
|
||||
if len(lines) < 2:
|
||||
raise Exception("Invalid pmc file format")
|
||||
|
||||
spec = lines[0].strip()
|
||||
if (spec[0] != '#'):
|
||||
raise Exception("Invalid pmc file spec line: \"" + lines[0] + "\"")
|
||||
spec = spec.split(' ')
|
||||
self.cores = len(spec) - 1
|
||||
elements = spec[1].split('/')
|
||||
if (len(elements) != 3):
|
||||
raise Exception("Invalid pmc file spec line: \"" + lines[0] + "\"")
|
||||
self.counter = elements[2].strip()
|
||||
|
||||
last_line = lines[-1]
|
||||
elements = last_line.split(' ')
|
||||
total = 0
|
||||
for e in elements:
|
||||
if (len(e) > 0):
|
||||
total += int(e)
|
||||
self.count = total
|
||||
|
||||
class khat_parser:
|
||||
class pt:
|
||||
def __init__(self):
|
||||
@ -62,42 +12,13 @@ class khat_parser:
|
||||
self.c_hrx = 0
|
||||
self.c_stx = 0
|
||||
self.c_srx = 0
|
||||
self.master_total = 0
|
||||
self.master_loss = 0
|
||||
self.slave_total = 0
|
||||
self.slave_loss = 0
|
||||
self.qps = 0
|
||||
|
||||
def __init__(self):
|
||||
self.datapt = []
|
||||
self.srv_hwlat = []
|
||||
self.srv_swlat = []
|
||||
self.clt_hwlat = []
|
||||
self.clt_swlat = []
|
||||
self.lat_idx_arr = []
|
||||
self.lat_idx_arr.append(self.srv_hwlat)
|
||||
self.lat_idx_arr.append(self.srv_swlat)
|
||||
self.lat_idx_arr.append(self.clt_hwlat)
|
||||
self.lat_idx_arr.append(self.clt_swlat)
|
||||
|
||||
def get_stat_arr(self, idx : int):
|
||||
return self.lat_idx_arr[idx]
|
||||
|
||||
|
||||
def parse(self, output : str):
|
||||
first = True
|
||||
for line in output.splitlines():
|
||||
# the first line is qps
|
||||
cells = line.split(',')
|
||||
if (first):
|
||||
if len(cells) != 5:
|
||||
raise Exception("Invalid headline:" + line)
|
||||
self.qps = int(cells[0])
|
||||
self.master_recv = int(cells[1])
|
||||
self.master_loss = int(cells[2])
|
||||
self.slave_recv = int(cells[3])
|
||||
self.slave_loss = int(cells[4])
|
||||
first = False
|
||||
continue
|
||||
if len(cells) != 8:
|
||||
raise Exception("Invalid line:" + line)
|
||||
pt = self.pt()
|
||||
@ -110,10 +31,6 @@ class khat_parser:
|
||||
pt.s_hrx = int(cells[6])
|
||||
pt.s_htx = int(cells[7])
|
||||
self.datapt.append(pt)
|
||||
self.srv_hwlat.append(pt.s_htx - pt.s_hrx)
|
||||
self.srv_swlat.append(pt.s_stx - pt.s_srx)
|
||||
self.clt_hwlat.append(pt.c_hrx - pt.c_htx)
|
||||
self.clt_swlat.append(pt.c_srx - pt.c_stx)
|
||||
|
||||
|
||||
class mutilate_data:
|
||||
|
@ -23,7 +23,7 @@ tc_test_id = 0
|
||||
|
||||
def init(odir = "./results.d/"):
|
||||
global tc_output_dir
|
||||
tc_output_dir = odir
|
||||
tc_output_dir = odir + "_" + datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
|
||||
tc_output_dir = os.path.expanduser(tc_output_dir)
|
||||
os.system("mkdir -p " + tc_output_dir)
|
||||
global tc_logfile
|
||||
@ -40,7 +40,7 @@ def begin(name):
|
||||
def end():
|
||||
global tc_cur_test
|
||||
log_print("\n===== Test #" + str(tc_test_id) + " - " + tc_cur_test + " completed =====")
|
||||
tc_cur_test = ""
|
||||
tc_cur_test = None
|
||||
|
||||
def get_odir():
|
||||
return tc_output_dir + "/" + tc_cur_test
|
||||
@ -65,20 +65,12 @@ def set_ssh_param(para):
|
||||
global ssh_param
|
||||
ssh_param = para
|
||||
|
||||
def get_ssh_param():
|
||||
global ssh_param
|
||||
return ssh_param
|
||||
|
||||
ssh_user = None
|
||||
def set_ssh_user(user):
|
||||
global ssh_user
|
||||
ssh_user = user
|
||||
|
||||
def get_ssh_user():
|
||||
global ssh_user
|
||||
return ssh_user
|
||||
|
||||
def remote_exec(srv : list[str], cmd : str, blocking=True, check=True) -> sp.Popen:
|
||||
def remote_exec(srv, cmd, blocking=True, check=True):
|
||||
sub = []
|
||||
for s in srv:
|
||||
p = sp.Popen(["ssh " + ssh_param + " " + ((ssh_user + "@") if ssh_user != None else "") + s + " \"" + cmd +"\""], shell=True, stdout=sp.PIPE, stderr=sp.PIPE)
|
||||
@ -93,27 +85,28 @@ def remote_exec(srv : list[str], cmd : str, blocking=True, check=True) -> sp.Pop
|
||||
return sub
|
||||
|
||||
|
||||
def check_stderr(p, sel, exclude = []):# -> tuple[bool, list[str]]:
|
||||
max_stderr_rd = 10
|
||||
err = []
|
||||
while sel.poll(1) and max_stderr_rd > 0:
|
||||
err.append(p.stderr.readline().decode().strip())
|
||||
max_stderr_rd = max_stderr_rd - 1
|
||||
def scan_stderr(p, exclude = None):
|
||||
for err in p.stderr:
|
||||
fail = True
|
||||
err = err.decode()
|
||||
err = err.strip()
|
||||
|
||||
# print(err)
|
||||
|
||||
good = True
|
||||
for e in err:
|
||||
e = e.strip()
|
||||
if len(e) == 0:
|
||||
if len(err) == 0:
|
||||
continue
|
||||
|
||||
good = False
|
||||
for exc in exclude:
|
||||
if exc in e:
|
||||
good = True
|
||||
break
|
||||
if exclude != None:
|
||||
for exc in exclude:
|
||||
if (exc != None) and (re.match(exc, err) != None):
|
||||
fail = False
|
||||
break
|
||||
|
||||
if fail:
|
||||
log_print("Error detected: " + err)
|
||||
return False
|
||||
|
||||
return good, err
|
||||
return True
|
||||
|
||||
# stderr threads
|
||||
errthr_objs = []
|
||||
@ -123,22 +116,15 @@ errthr_failed = False
|
||||
def errthr_get_failed():
|
||||
return errthr_failed
|
||||
|
||||
def thr_check_stderr(p : sp.Popen, name: str, exclude):
|
||||
def thr_check_stderr(p : sp.Popen, exclude):
|
||||
# print("thread start!")
|
||||
global errthr_failed
|
||||
sel = select.poll()
|
||||
sel.register(p.stderr, select.POLLIN)
|
||||
local_failed = False
|
||||
while(not errthr_sigstop):
|
||||
if (not local_failed):
|
||||
status, err = check_stderr(p, sel, exclude=exclude)
|
||||
if not status:
|
||||
errthr_failed = True
|
||||
local_failed = True
|
||||
log_print("Error detected in \"" + name + "\":")
|
||||
for e in err:
|
||||
log_print(" \"" + e + "\"")
|
||||
log_print("")
|
||||
time.sleep(random.uniform(0.001, 0.1))
|
||||
if not scan_stderr(p, exclude=exclude):
|
||||
errthr_failed = True
|
||||
# print("running!")
|
||||
time.sleep(0.5 + random.uniform(-0.1, 0.1))
|
||||
# print("thread exit!")
|
||||
|
||||
def errthr_start():
|
||||
global errthr_sigstop
|
||||
@ -146,18 +132,18 @@ def errthr_start():
|
||||
errthr_sigstop = False
|
||||
errthr_failed = False
|
||||
for thr in errthr_objs:
|
||||
thr.daemon = True
|
||||
thr.start()
|
||||
|
||||
def errthr_create(cp, name, exclude = None):
|
||||
def errthr_create(cp, exclude = None):
|
||||
global errthr_objs
|
||||
for i in range(len(cp)):
|
||||
errthr_objs.append(Thread(target = thr_check_stderr, args=(cp[i], name[i], exclude)))
|
||||
for p in cp:
|
||||
errthr_objs.append(Thread(target = thr_check_stderr, args=(p, exclude)))
|
||||
|
||||
def errthr_stop():
|
||||
global errthr_objs
|
||||
global errthr_sigstop
|
||||
errthr_sigstop = True
|
||||
# print("waiting!")
|
||||
for thr in errthr_objs:
|
||||
thr.join()
|
||||
errthr_objs.clear()
|
||||
|
@ -1,340 +0,0 @@
|
||||
import time
|
||||
import subprocess as sp
|
||||
import os
|
||||
|
||||
import libpar as par
|
||||
import libtc as tc
|
||||
import libmechspec as mechspec
|
||||
|
||||
class NetExpResult:
|
||||
def __init__(self):
|
||||
self.parser = None
|
||||
self.pmc_parser = None
|
||||
self.sample = None
|
||||
|
||||
|
||||
class NetExpConf:
|
||||
def __init__(self):
|
||||
self.root_dir = ""
|
||||
|
||||
self.enable_client_only = False
|
||||
self.enable_memgen = False
|
||||
|
||||
self.memgen_affinity = ""
|
||||
self.memgen_iteration = -1
|
||||
self.memgen_size = 512 * 1024 * 1024
|
||||
self.memgen_tgtdom = 1
|
||||
|
||||
self.srv_affinity = ""
|
||||
self.srv_mechspec = None
|
||||
self.srv_port = 0
|
||||
|
||||
self.clt_qps = 0
|
||||
self.clt_mechspecs = []
|
||||
self.clt_affinity = "1"
|
||||
self.clt_wrkld = 0
|
||||
self.clt_wrkarg0 = "fixed:0"
|
||||
self.clt_wrkarg1 = "fixed:0"
|
||||
self.clt_pkt_loss_lat = 1000
|
||||
self.clt_rage_quit_lat = 1000
|
||||
self.clt_port = 0
|
||||
self.clt_pkt_pad = 0
|
||||
self.clt_pkt_depth = 1
|
||||
self.clt_ia = "exponential"
|
||||
|
||||
self.mst_mechspec = None
|
||||
self.mst_affinity = "2"
|
||||
self.mst_qps = 100
|
||||
self.mst_port = 0
|
||||
self.mst_pkt_loss_lat = 1000
|
||||
self.mst_pkt_loss_max = 1000
|
||||
self.mst_duration = 10
|
||||
self.mst_warmup = 5
|
||||
self.mst_ia = "exponential"
|
||||
|
||||
self.enable_pmc = False
|
||||
self.pmc_counters = []
|
||||
self.pmc_mode = 0 # 0 = sampling
|
||||
self.pmc_sampling_rate = 8192
|
||||
self.pmc_counting_interval = 0.1
|
||||
|
||||
def __build_fqdn_arr(self, ns):
|
||||
ret = []
|
||||
for n in ns:
|
||||
if n != None:
|
||||
ret.append(n.fqdn)
|
||||
return ret
|
||||
|
||||
def get_pmc_str(self):
|
||||
ret = ""
|
||||
for counter in self.pmc_counters:
|
||||
ret = ret + counter + ","
|
||||
return ret[:-1]
|
||||
|
||||
def calc_client_qps(self):
|
||||
return 0 if self.clt_qps == 0 else (int)((self.clt_qps - self.mst_qps) / len(self.clt_mechspecs))
|
||||
|
||||
def finalize_mechspecs(self):
|
||||
self.clt_fqdns = self.__build_fqdn_arr(self.clt_mechspecs)
|
||||
self.srv_fqdns = self.__build_fqdn_arr([self.srv_mechspec])
|
||||
self.mst_fqdns = self.__build_fqdn_arr([self.mst_mechspec])
|
||||
|
||||
__SAMPLE_FN = "sample.txt.tmp"
|
||||
__PMC_FN = "pmc.txt.tmp"
|
||||
|
||||
def __keep_result(conf : NetExpConf):
|
||||
result = NetExpResult()
|
||||
|
||||
target_scp_fn = tc.get_odir() + "/" + __SAMPLE_FN
|
||||
scpcmd = "scp -P77 " + tc.get_ssh_user() + "@" + conf.mst_mechspec.fqdn + ":" + conf.root_dir + "/" + __SAMPLE_FN + " " + target_scp_fn
|
||||
tc.log_print(scpcmd)
|
||||
sp.check_call(scpcmd, shell=True)
|
||||
|
||||
result.parser = par.khat_parser()
|
||||
with open(target_scp_fn, "r") as f:
|
||||
result.sample = f.read()
|
||||
result.parser.parse(result.sample)
|
||||
|
||||
rmcmd = "rm " + target_scp_fn
|
||||
tc.log_print(rmcmd)
|
||||
sp.check_call(rmcmd, shell=True)
|
||||
|
||||
if conf.enable_pmc:
|
||||
target_pmc_fn = tc.get_odir() + "/" + __PMC_FN
|
||||
|
||||
pmcscpcmd = "scp -P77 " + tc.get_ssh_user() + "@" + conf.srv_mechspec.fqdn + ":" + conf.root_dir + "/" + __PMC_FN + " " + target_pmc_fn
|
||||
tc.log_print(pmcscpcmd)
|
||||
sp.check_call(pmcscpcmd, shell=True)
|
||||
|
||||
if conf.pmc_mode == 0:
|
||||
pmcproccmd = "sudo pmcstat -R " + conf.root_dir + "/" + __PMC_FN + " -m " + conf.root_dir + "/" + __PMC_FN + ".proc"
|
||||
tc.log_print(pmcproccmd)
|
||||
tc.remote_exec(conf.srv_fqdns, pmcproccmd)
|
||||
|
||||
pmcscpcmd = "scp -P77 " + tc.get_ssh_user() + "@" + conf.srv_mechspec.fqdn + ":" + conf.root_dir + "/" + __PMC_FN + ".proc" + " " + target_pmc_fn + ".proc"
|
||||
tc.log_print(pmcscpcmd)
|
||||
sp.check_call(pmcscpcmd, shell=True)
|
||||
|
||||
if conf.pmc_mode != 0:
|
||||
with open(target_pmc_fn, "r") as f:
|
||||
result.pmc_parser = par.pmc_parser(f.read())
|
||||
else:
|
||||
with open(target_pmc_fn, "rb") as f:
|
||||
with open(target_pmc_fn + ".proc", "r") as g:
|
||||
result.pmc_parser = [f.read(), g.read()]
|
||||
|
||||
rmcmd = "rm " + target_pmc_fn + ".proc"
|
||||
tc.log_print(rmcmd)
|
||||
sp.check_call(rmcmd, shell=True)
|
||||
|
||||
rmcmd = "rm " + target_pmc_fn
|
||||
tc.log_print(rmcmd)
|
||||
sp.check_call(rmcmd, shell=True)
|
||||
|
||||
return result
|
||||
|
||||
def stop_all(conf : NetExpConf):
|
||||
# stop clients
|
||||
tc.log_print("Stopping clients...")
|
||||
tc.remote_exec(conf.clt_fqdns, "sudo killall -9 rat; sudo killall -9 cat; sudo killall -9 khat; sudo killall -9 memloadgen", check=False)
|
||||
|
||||
# stop master
|
||||
tc.log_print("Stopping master...")
|
||||
tc.remote_exec(conf.mst_fqdns, "sudo killall -9 rat; sudo killall -9 cat; sudo killall -9 khat; sudo killall -9 memloadgen", check=False)
|
||||
|
||||
if not conf.enable_client_only:
|
||||
# stop server
|
||||
tc.log_print("Stopping server...")
|
||||
tc.remote_exec(conf.srv_fqdns, "sudo killall -9 rat; sudo killall -9 cat; sudo killall -9 khat; sudo killall -9 memloadgen", check=False)
|
||||
|
||||
if conf.enable_pmc:
|
||||
tc.log_print("Stopping server PMC...")
|
||||
tc.remote_exec(conf.srv_fqdns, "sudo killall -9 pmcstat", check=False)
|
||||
|
||||
|
||||
def __run_setup_cmd(conf : NetExpConf, cmd : str, desc : str):
|
||||
all = []
|
||||
all.extend(conf.srv_fqdns)
|
||||
all.extend(conf.clt_fqdns)
|
||||
all.extend(conf.mst_fqdns)
|
||||
|
||||
ssrv : list[tuple[str, sp.Popen]] = []
|
||||
for s in all:
|
||||
tc.log_print(f"Running \'{desc}\' on {s}...")
|
||||
ssrv.append((s, tc.remote_exec([s], cmd, blocking=False, check=False)[0]))
|
||||
|
||||
for p in ssrv:
|
||||
_ , stderr = p[1].communicate()
|
||||
if p[1].returncode != 0:
|
||||
print(f"{ p[0] } \'{desc}\' failed. stderr:\n{stderr.decode()}\n")
|
||||
else:
|
||||
print(f"{ p[0] } \'{desc}\' succeeded")
|
||||
|
||||
def setup(conf : NetExpConf, bench : False, dpdk : False):
|
||||
libtopo_path = "/libtopo"
|
||||
dpdk_path = "/dpdk"
|
||||
bench_path = "/numam.d"
|
||||
if dpdk:
|
||||
setup_cmd = f'''sudo rm -rf {libtopo_path}; sudo rm -rf /usr/local/include/libtopo;
|
||||
sudo rm -rf /usr/local/lib/libtopo;
|
||||
sudo mkdir -p {libtopo_path};
|
||||
sudo chmod 777 {libtopo_path};
|
||||
cd {libtopo_path};
|
||||
git clone https://git.quacker.org/d/libtopo;
|
||||
cd libtopo;
|
||||
mkdir build;
|
||||
cd build;
|
||||
cmake ../;
|
||||
sudo make install'''
|
||||
__run_setup_cmd(conf, setup_cmd, "dpdk - libtopo")
|
||||
setup_cmd = f'''sudo pkg install -y meson pkgconf py39-pyelftools;
|
||||
sudo rm -rf {dpdk_path}
|
||||
sudo mkdir -p {dpdk_path};
|
||||
sudo chmod 777 {dpdk_path};
|
||||
cd {dpdk_path};
|
||||
git clone https://git.quacker.org/d/numam-dpdk;
|
||||
cd numam-dpdk;
|
||||
git checkout migration;
|
||||
CC=gcc CXX=g++ meson -Denable_kmods=true build;
|
||||
cd build;
|
||||
sudo ninja install'''
|
||||
__run_setup_cmd(conf, setup_cmd, "dpdk - dpdk")
|
||||
if bench:
|
||||
setup_cmd = f'''sudo rm -rf {bench_path};
|
||||
sudo mkdir -p {bench_path};
|
||||
sudo chmod 777 {bench_path}'''
|
||||
__run_setup_cmd(conf, setup_cmd, "bench - remove")
|
||||
all = []
|
||||
all.extend(conf.srv_fqdns)
|
||||
all.extend(conf.clt_fqdns)
|
||||
all.extend(conf.mst_fqdns)
|
||||
dir = f"{os.path.dirname(__file__)}/../"
|
||||
for clt in all:
|
||||
print("Syncing files to " + clt + "...")
|
||||
rsync_cmd = f"rsync -az --no-perms --rsync-path=\"sudo rsync\" --omit-dir-times -e \"ssh -p77\" {dir} {tc.get_ssh_user()}@{clt}:{bench_path}/"
|
||||
sp.check_call(rsync_cmd, shell=True)
|
||||
setup_cmd = f'''cd {bench_path};
|
||||
sudo rm -rf build;
|
||||
mkdir build;
|
||||
cd build;
|
||||
cmake ../;
|
||||
make -j8 khat cat rat memloadgen'''
|
||||
__run_setup_cmd(conf, setup_cmd, "bench - compile")
|
||||
|
||||
def run(conf : NetExpConf):
|
||||
stop_all(conf)
|
||||
while True:
|
||||
server_cmd = "sudo "
|
||||
if conf.enable_pmc:
|
||||
if conf.pmc_mode != 0:
|
||||
pmc_cmd = "sudo pmcstat -C -w " + str(conf.pmc_counting_interval) + " -s " + conf.get_pmc_str() + " -o " + conf.root_dir + "/" + __PMC_FN
|
||||
else:
|
||||
pmc_cmd = "sudo pmcstat -n " + str(conf.pmc_sampling_rate) + " -S " + conf.get_pmc_str() + " -O " + conf.root_dir + "/" + __PMC_FN
|
||||
tc.log_print("Starting server PMC...")
|
||||
tc.log_print(pmc_cmd)
|
||||
spmc = tc.remote_exec(conf.srv_fqdns, pmc_cmd, blocking=False)
|
||||
|
||||
server_cmd += conf.root_dir + "/khat --log-level lib.eal:err -- -A " + conf.srv_affinity + \
|
||||
" -H " + conf.srv_mechspec.netspec + " -p " + str(conf.srv_port)
|
||||
if int(conf.clt_pkt_pad) > 1518:
|
||||
server_cmd += " -J "
|
||||
if conf.enable_client_only:
|
||||
ssrv = None
|
||||
tc.log_print(server_cmd)
|
||||
else:
|
||||
# start server
|
||||
tc.log_print("Starting server...")
|
||||
tc.log_print(server_cmd)
|
||||
ssrv = tc.remote_exec(conf.srv_fqdns, server_cmd, blocking=False)
|
||||
|
||||
if conf.enable_memgen:
|
||||
memgen_cmd = "sudo " + conf.root_dir + "/memloadgen -b " + str(conf.memgen_size) + " -s " + conf.memgen_affinity + \
|
||||
" -i " + str(conf.memgen_iteration) + " -d " + str(conf.memgen_tgtdom)
|
||||
tc.log_print("Starting memloadgen...")
|
||||
tc.log_print(memgen_cmd)
|
||||
smem = tc.remote_exec(conf.srv_fqdns, memgen_cmd, blocking=False)
|
||||
|
||||
# start clients
|
||||
tc.log_print("Starting clients...")
|
||||
sclt = []
|
||||
sclt_name = []
|
||||
for i in range(len(conf.clt_fqdns)):
|
||||
client_cmd = "sudo " + conf.root_dir + "/rat --log-level lib.eal:err -- -S -A " + conf.clt_affinity + \
|
||||
" -i " + conf.clt_ia + \
|
||||
" -q " + str(conf.calc_client_qps()) + \
|
||||
" -H " + conf.clt_mechspecs[i].netspec + \
|
||||
" -s " + conf.srv_mechspec.netspec + \
|
||||
" -r " + str(conf.clt_rage_quit_lat) + \
|
||||
" -l " + str(conf.clt_pkt_loss_lat) + \
|
||||
" -w " + str(conf.clt_wrkld) + \
|
||||
" -w " + str(conf.clt_wrkarg0) + \
|
||||
" -w " + str(conf.clt_wrkarg1) + \
|
||||
" -P " + str(conf.clt_pkt_pad) + \
|
||||
" -D " + str(conf.clt_pkt_depth) + \
|
||||
" -p " + str(conf.clt_port)
|
||||
if int(conf.clt_pkt_pad) > 1518:
|
||||
client_cmd += " -J "
|
||||
tc.log_print(client_cmd)
|
||||
sclt.append(tc.remote_exec([conf.clt_fqdns[i]], client_cmd, blocking=False)[0])
|
||||
sclt_name.append(conf.clt_fqdns[i])
|
||||
|
||||
time.sleep(5)
|
||||
# start master
|
||||
tc.remote_exec
|
||||
tc.log_print("Starting master...")
|
||||
master_cmd = "sudo " + conf.root_dir + "/cat --log-level lib.eal:err -- " + \
|
||||
" -s " + conf.srv_mechspec.netspec + \
|
||||
" -o " + conf.root_dir + "/" + __SAMPLE_FN + \
|
||||
" -t " + str(conf.mst_duration) + \
|
||||
" -T " + str(conf.mst_warmup) + \
|
||||
" -i " + conf.mst_ia + \
|
||||
" -q " + str(conf.mst_qps) + \
|
||||
" -l " + str(conf.mst_pkt_loss_lat) + \
|
||||
" -L " + str(conf.mst_pkt_loss_max) + \
|
||||
" -A " + conf.mst_affinity + \
|
||||
" -H " + conf.mst_mechspec.netspec + \
|
||||
" -p " + str(conf.mst_port)
|
||||
for clt in conf.clt_mechspecs:
|
||||
master_cmd += " -S " + clt.netspec
|
||||
tc.log_print(master_cmd)
|
||||
sp = tc.remote_exec(conf.mst_fqdns, master_cmd, blocking=False)
|
||||
p = sp[0]
|
||||
|
||||
# launch stderr monitoring thread
|
||||
exclude = ["Pseudo-terminal", "ice_", "i40e_"]
|
||||
tc.errthr_create([p], conf.mst_fqdns, exclude)
|
||||
if not conf.enable_client_only:
|
||||
tc.errthr_create(ssrv, conf.srv_fqdns, exclude)
|
||||
tc.errthr_create(sclt, sclt_name, exclude)
|
||||
if conf.enable_memgen:
|
||||
tc.errthr_create(smem, ["memloadgen"], exclude)
|
||||
if conf.enable_pmc:
|
||||
tc.errthr_create(spmc, ["pmcstat"], exclude)
|
||||
tc.errthr_start()
|
||||
success = False
|
||||
cur = 0
|
||||
# selec = select.poll()
|
||||
# selec.register(p.stdout, select.POLLIN)
|
||||
while True:
|
||||
# either failed or timeout
|
||||
# we use failure detection to save time for long durations
|
||||
if tc.errthr_get_failed() or cur >= (conf.mst_warmup + conf.mst_duration) * 3:
|
||||
break
|
||||
|
||||
# while selec.poll(1):
|
||||
# print(p.stdout.readline())
|
||||
|
||||
if p.poll() != None:
|
||||
success = True
|
||||
break
|
||||
|
||||
time.sleep(1)
|
||||
cur = cur + 1
|
||||
|
||||
stop_all(conf)
|
||||
tc.errthr_stop()
|
||||
tc.log_print("Cooling down...")
|
||||
time.sleep(5)
|
||||
|
||||
if success:
|
||||
return __keep_result(conf)
|
225
scripts/run.py
Executable file
225
scripts/run.py
Executable file
@ -0,0 +1,225 @@
|
||||
import subprocess as sp
|
||||
import time
|
||||
import select
|
||||
import os
|
||||
import datetime
|
||||
import pwd
|
||||
import sys
|
||||
import getopt
|
||||
import numpy as np
|
||||
import re
|
||||
|
||||
import libpar as par
|
||||
import libtc as tc
|
||||
|
||||
step_inc_pct = 100
|
||||
init_step = 20000 #
|
||||
start_step = 10000
|
||||
term_qps = 85000000000
|
||||
|
||||
term_pct = 1
|
||||
inc_pct = 50
|
||||
server_port = 23444
|
||||
|
||||
# paths
|
||||
test_dir = "/numam.d/build"
|
||||
file_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
root_dir = os.path.join(file_dir,"..")
|
||||
sample_filename = "sample.txt"
|
||||
|
||||
affinity = [
|
||||
"0x4", # core 2
|
||||
"0x1000" # core 12
|
||||
]
|
||||
|
||||
master = ["skylake3.rcs.uwaterloo.ca"]
|
||||
master_mac = ["3c:15:fb:c9:f3:4b"]
|
||||
|
||||
server = ["skylake2.rcs.uwaterloo.ca"]
|
||||
server_mac = ["3c:15:fb:c9:f3:36"]
|
||||
|
||||
clients = []
|
||||
client_mac = []
|
||||
|
||||
rage_quit = 1000 #1s
|
||||
warmup = 5
|
||||
duration = 10
|
||||
cooldown = 0
|
||||
cacheline = 0
|
||||
SSH_PARAM = "-o StrictHostKeyChecking=no -p77"
|
||||
SSH_USER = "oscar"
|
||||
|
||||
hostfile = None
|
||||
lockstat = False
|
||||
client_only = False
|
||||
|
||||
def stop_all():
|
||||
# stop clients
|
||||
tc.log_print("Stopping clients...")
|
||||
tc.remote_exec(clients, "sudo killall -9 rat", check=False)
|
||||
|
||||
if not client_only:
|
||||
# stop server
|
||||
tc.log_print("Stopping server...")
|
||||
tc.remote_exec(server, "sudo killall -9 khat", check=False)
|
||||
|
||||
# stop master
|
||||
tc.log_print("Stopping master...")
|
||||
tc.remote_exec(master, "sudo killall -9 cat", check=False)
|
||||
|
||||
def get_client_str(clt):
|
||||
ret = " "
|
||||
for client in clt:
|
||||
ret += " -a " + client + " "
|
||||
return ret
|
||||
|
||||
def run_exp(sc, ld):
|
||||
while True:
|
||||
if client_only:
|
||||
ssrv = None
|
||||
else:
|
||||
# start server
|
||||
tc.log_print("Starting server...")
|
||||
server_cmd = "sudo " + test_dir + "/khat -- -A " + sc
|
||||
tc.log_print(server_cmd)
|
||||
|
||||
ssrv = tc.remote_exec(server, server_cmd, blocking=False)
|
||||
|
||||
# start clients
|
||||
# tc.log_print("Starting clients...")
|
||||
# client_cmd = tc.get_cpuset_core(client_threads) + " " + test_dir + "/pingpong/build/dismember -A"
|
||||
# tc.log_print(client_cmd)
|
||||
# sclt = tc.remote_exec(ssh_clients, client_cmd, blocking=False)
|
||||
|
||||
time.sleep(3)
|
||||
# start master
|
||||
tc.log_print("Starting master...")
|
||||
master_cmd = "sudo " + test_dir + "/cat -- " + \
|
||||
" -s " + server_mac[0] + \
|
||||
" -o " + test_dir + "/" + sample_filename + \
|
||||
" -t " + str(duration) + \
|
||||
" -T " + str(warmup) + \
|
||||
" -i fixed:0.01" + \
|
||||
" -r " + str(rage_quit) + \
|
||||
" -A 0x4"
|
||||
|
||||
tc.log_print(master_cmd)
|
||||
sp = tc.remote_exec(master, master_cmd, blocking=False)
|
||||
p = sp[0]
|
||||
|
||||
|
||||
# launch stderr monitoring thread
|
||||
tc.errthr_create(sp, exclude=[".*EAL.*"])
|
||||
tc.errthr_create(ssrv, exclude=[".*EAL.*"])
|
||||
tc.errthr_start()
|
||||
success = False
|
||||
cur = 0
|
||||
while True:
|
||||
# either failed or timeout
|
||||
# we use failure detection to save time for long durations
|
||||
if tc.errthr_get_failed() or cur >= int(warmup + duration) + 5 :
|
||||
break
|
||||
|
||||
if p.poll() != None:
|
||||
success = True
|
||||
break
|
||||
|
||||
time.sleep(1)
|
||||
cur = cur + 1
|
||||
|
||||
stop_all()
|
||||
tc.errthr_stop()
|
||||
print("Cooling down...")
|
||||
time.sleep(cooldown)
|
||||
|
||||
if success:
|
||||
return
|
||||
|
||||
def keep_results():
|
||||
scpcmd = "scp -P77 oscar@" + master[0] + ":" + test_dir + "/" + sample_filename + " " + tc.get_odir() + "/sample.txt"
|
||||
tc.log_print(scpcmd)
|
||||
sp.check_call(scpcmd, shell=True)
|
||||
|
||||
with open(tc.get_odir() + "/sample.txt", 'r') as f:
|
||||
tc.log_print("Total requests: " + str(len(f.readlines())))
|
||||
|
||||
return
|
||||
|
||||
def main():
|
||||
global hostfile
|
||||
global server
|
||||
global master
|
||||
global clients
|
||||
global client_only
|
||||
|
||||
tc.set_ssh_param(SSH_PARAM)
|
||||
tc.set_ssh_user(SSH_USER)
|
||||
|
||||
options = getopt.getopt(sys.argv[1:], 'h:sldcp')[0]
|
||||
for opt, arg in options:
|
||||
if opt in ('-h'):
|
||||
hostfile = arg
|
||||
elif opt in ('-s'):
|
||||
stop_all()
|
||||
return
|
||||
elif opt in ('-c'):
|
||||
client_only=True
|
||||
|
||||
tc.init("~/results.d/numam/")
|
||||
|
||||
tc.log_print("Configuration:\n" + \
|
||||
"Hostfile: " + ("None" if hostfile == None else hostfile) + "\n" \
|
||||
"Client only: " + str(client_only) + "\n")
|
||||
|
||||
if hostfile != None:
|
||||
hosts = tc.parse_hostfile(hostfile)
|
||||
server = tc.process_hostnames(server, hosts)
|
||||
clients = tc.process_hostnames(clients, hosts)
|
||||
master = tc.process_hostnames(master, hosts)
|
||||
|
||||
stop_all()
|
||||
|
||||
for i in range(0, len(affinity)):
|
||||
eaff = affinity[i]
|
||||
# step_mul = 100
|
||||
# last_load = 0
|
||||
# cur_load = start_step
|
||||
|
||||
tc.begin(eaff)
|
||||
|
||||
tc.log_print("============ Affinity: " + str(eaff) + " Load: MAX" + " ============")
|
||||
run_exp(eaff, 0)
|
||||
keep_results()
|
||||
stop_all()
|
||||
|
||||
# while True:
|
||||
# tc.log_print("============ Sched: " + str(ename) + " Flag: " + format(esched, '#04x') + " Load: " + str(cur_load) + " ============")
|
||||
|
||||
# output, sout, serr = run_exp(esched, cur_load, lockstat)
|
||||
|
||||
# qps = keep_results(output, sout, serr)
|
||||
|
||||
# pct = int((qps - last_load) / init_step * 100)
|
||||
# tc.log_print("last_load: " + str(last_load) + " this_load: " + str(qps) + " inc_pct: " + str(pct) + "%")
|
||||
|
||||
# if cur_load > term_qps:
|
||||
# tc.log_print("qps more than " + str(term_qps) + "%. Done.")
|
||||
# break
|
||||
|
||||
# if pct <= term_pct:
|
||||
# tc.log_print("inc_pct less than TERM_PCT " + str(term_pct) + "%. Done.")
|
||||
# break
|
||||
|
||||
# if pct <= inc_pct:
|
||||
# step_mul += step_inc_pct
|
||||
# tc.log_print("inc_pct less than INC_PCT " + str(inc_pct) + "%. Increasing step multiplier to " + str(step_mul) + "%")
|
||||
|
||||
# last_load = qps
|
||||
# cur_load += int(init_step * step_mul / 100)
|
||||
# tc.log_print("")
|
||||
|
||||
tc.end()
|
||||
|
||||
stop_all()
|
||||
|
||||
main()
|
@ -1,112 +0,0 @@
|
||||
#!/usr/bin/env python3.6
|
||||
|
||||
import numpy as np
|
||||
import sys
|
||||
import re
|
||||
import os
|
||||
import json
|
||||
import getopt
|
||||
import math
|
||||
import concurrent.futures as CF
|
||||
|
||||
columns = [
|
||||
("Req per second", "rps", ".2f"),
|
||||
("Bytes per second", "bps", ".2f"),
|
||||
("Average Latency", "lat_avg", ".2f"),
|
||||
("50th Latency", "lat_50", ".0f"),
|
||||
("95th Latency", "lat_95", ".0f"),
|
||||
("99th Latency", "lat_99", ".0f"),
|
||||
("Latency stddev", "lat_std", ".2f")
|
||||
]
|
||||
|
||||
TIME = 30
|
||||
REQ_SZ = 4096
|
||||
|
||||
class DatObj:
|
||||
def __init__(self, raw : list, time : int, req_sz : int):
|
||||
self.raw = raw
|
||||
self.rps = len(raw) / time
|
||||
self.bps = self.rps * req_sz
|
||||
self.lat_avg = np.average(self.raw)
|
||||
self.lat_99 = np.percentile(self.raw, 99)
|
||||
self.lat_95 = np.percentile(self.raw, 95)
|
||||
self.lat_50 = np.percentile(self.raw, 50)
|
||||
self.lat_std = np.std(self.raw)
|
||||
|
||||
def parse_file(lines : list, time : int, req_sz : int) -> DatObj :
|
||||
raw = []
|
||||
for line in lines:
|
||||
if len(line) > 0:
|
||||
raw.append(int(line))
|
||||
return DatObj(raw, time, req_sz)
|
||||
|
||||
def output_col():
|
||||
ret = "Benchmark"
|
||||
for name,_,_ in columns:
|
||||
ret = ret + "," + name + "," + name + " (NUMA)" + "," + "% change"
|
||||
return ret
|
||||
|
||||
def get_attr_or_none(obj, attr):
|
||||
if (obj != None):
|
||||
val = getattr(obj, attr)
|
||||
else:
|
||||
val = None
|
||||
return val
|
||||
|
||||
def output_objs(name: str, obj : DatObj, obj_numa : DatObj):
|
||||
ret = name
|
||||
for _, attr, fmt in columns:
|
||||
val = get_attr_or_none(obj, attr)
|
||||
val_numa = get_attr_or_none(obj_numa, attr)
|
||||
|
||||
ret = ret + "," + (format(val, fmt) if val != None else "N/A")
|
||||
ret = ret + "," + (format(val_numa, fmt) if val_numa != None else "N/A")
|
||||
|
||||
if val == None or val_numa == None:
|
||||
ret = ret + "," + "N/A"
|
||||
else:
|
||||
ret = ret + "," + format((val_numa - val) / val * 100, ".2f") + "%"
|
||||
return ret
|
||||
|
||||
def process_file(f : str, obj_map):
|
||||
with open(f, "r") as fp:
|
||||
lines = fp.readlines()
|
||||
|
||||
bench_name = os.path.basename(f)
|
||||
obj_map[bench_name] = parse_file(lines, TIME, REQ_SZ)
|
||||
print("Processed file " + f + ". Benchmark name: " + bench_name)
|
||||
|
||||
def process_dir(path : str, obj_map):
|
||||
files = [os.path.abspath(os.path.join(path, x)) for x in os.listdir(path)]
|
||||
for f in files:
|
||||
if (".sh" in f):
|
||||
continue
|
||||
if (os.path.isfile(f)):
|
||||
process_file(f, obj_map)
|
||||
|
||||
def main():
|
||||
datdir = None
|
||||
options = getopt.getopt(sys.argv[1:], 'd:')[0]
|
||||
|
||||
for opt, arg in options:
|
||||
if opt in ('-d'):
|
||||
datdir = arg
|
||||
|
||||
if datdir == None:
|
||||
raise Exception("Must specify -d parameter")
|
||||
|
||||
obj_map = dict()
|
||||
process_dir(datdir, obj_map)
|
||||
|
||||
with open("results.csv", "w") as f:
|
||||
f.write(output_col())
|
||||
f.write("\n")
|
||||
|
||||
for bench in obj_map:
|
||||
if bench.endswith("_numa"):
|
||||
continue
|
||||
f.write(output_objs(bench, obj_map[bench], obj_map.get(bench+"_numa")))
|
||||
f.write("\n")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -1,19 +0,0 @@
|
||||
# rand_read
|
||||
sudo /numam/code/build/birb_posix -m 0x2 -t 35 -w 5 -I fixed -a 0x555555 -b 4096 -q 0 -D /dev/nvd0 -P R,100 -Q 3 -o rand_read
|
||||
sudo /numam/code/build/birb_posix -m 0x2 -t 35 -w 5 -I fixed -a 0x555555000000 -b 4096 -q 0 -D /dev/nvd0 -P R,100 -Q 3 -o rand_read_numa
|
||||
|
||||
# rand_write
|
||||
sudo /numam/code/build/birb_posix -m 0x2 -t 35 -w 5 -I fixed -a 0x555555 -b 4096 -q 0 -D /dev/nvd0 -P R,0 -Q 3 -o rand_write
|
||||
sudo /numam/code/build/birb_posix -m 0x2 -t 35 -w 5 -I fixed -a 0x555555000000 -b 4096 -q 0 -D /dev/nvd0 -P R,0 -Q 3 -o rand_write_numa
|
||||
|
||||
# mono_read
|
||||
sudo /numam/code/build/birb_posix -m 0x2 -t 35 -w 5 -I fixed -a 0x555555 -b 4096 -q 0 -D /dev/nvd0 -P M,100 -Q 3 -o mono_read
|
||||
sudo /numam/code/build/birb_posix -m 0x2 -t 35 -w 5 -I fixed -a 0x555555000000 -b 4096 -q 0 -D /dev/nvd0 -P M,100 -Q 3 -o mono_read_numa
|
||||
|
||||
# mono_write
|
||||
sudo /numam/code/build/birb_posix -m 0x2 -t 35 -w 5 -I fixed -a 0x555555 -b 4096 -q 0 -D /dev/nvd0 -P M,0 -Q 3 -o mono_write
|
||||
sudo /numam/code/build/birb_posix -m 0x2 -t 35 -w 5 -I fixed -a 0x555555000000 -b 4096 -q 0 -D /dev/nvd0 -P M,0 -Q 3 -o mono_write_numa
|
||||
|
||||
# mixed
|
||||
sudo /numam/code/build/birb_posix -m 0x2 -t 35 -w 5 -I fixed -a 0x555555 -b 4096 -q 0 -D /dev/nvd0 -P R,70 -Q 3 -o mixed_read
|
||||
sudo /numam/code/build/birb_posix -m 0x2 -t 35 -w 5 -I fixed -a 0x555555000000 -b 4096 -q 0 -D /dev/nvd0 -P R,70 -Q 3 -o mixed_read_numa
|
@ -1,19 +0,0 @@
|
||||
# rand_read
|
||||
sudo /numam/code/build/birb -m 0xAAAAAA -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555 -b 4096 -q 0 -D Nvme0n1 -P R,100 -Q 3 -o rand_read -k bdev
|
||||
sudo /numam/code/build/birb -m 0xAAAAAA000000 -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555000000 -b 4096 -q 0 -D Nvme0n1 -P R,100 -Q 3 -o rand_read_numa -k bdev
|
||||
|
||||
# rand_write
|
||||
sudo /numam/code/build/birb -m 0xAAAAAA -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555 -b 4096 -q 0 -D Nvme0n1 -P R,0 -Q 3 -o rand_write -k bdev
|
||||
sudo /numam/code/build/birb -m 0xAAAAAA000000 -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555000000 -b 4096 -q 0 -D Nvme0n1 -P R,0 -Q 3 -o rand_write_numa -k bdev
|
||||
|
||||
# mono_read
|
||||
sudo /numam/code/build/birb -m 0xAAAAAA -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555 -b 4096 -q 0 -D Nvme0n1 -P M,100 -Q 3 -o mono_read -k bdev
|
||||
sudo /numam/code/build/birb -m 0xAAAAAA000000 -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555000000 -b 4096 -q 0 -D Nvme0n1 -P M,100 -Q 3 -o mono_read_numa -k bdev
|
||||
|
||||
# mono_write
|
||||
sudo /numam/code/build/birb -m 0xAAAAAA -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555 -b 4096 -q 0 -D Nvme0n1 -P M,0 -Q 3 -o mono_write -k bdev
|
||||
sudo /numam/code/build/birb -m 0xAAAAAA000000 -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555000000 -b 4096 -q 0 -D Nvme0n1 -P M,0 -Q 3 -o mono_write_numa -k bdev
|
||||
|
||||
# mixed
|
||||
sudo /numam/code/build/birb -m 0xAAAAAA -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555 -b 4096 -q 0 -D Nvme0n1 -P R,70 -Q 3 -o mixed_read -k bdev
|
||||
sudo /numam/code/build/birb -m 0xAAAAAA000000 -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555000000 -b 4096 -q 0 -D Nvme0n1 -P R,70 -Q 3 -o mixed_read_numa -k bdev
|
@ -1,19 +0,0 @@
|
||||
# rand_read
|
||||
sudo /numam/code/build/birb -m 0xAAAAAA -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555 -b 4096 -q 0 -D Nvme0n1 -P R,100 -Q 3 -o rand_read -k bdev
|
||||
sudo /numam/code/build/birb -m 0xAAAAAA000000 -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555000000 -b 4096 -q 0 -D Nvme0n1 -P R,100 -Q 3 -o rand_read_numa -k bdev
|
||||
|
||||
# rand_write
|
||||
sudo /numam/code/build/birb -m 0xAAAAAA -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555 -b 4096 -q 0 -D Nvme0n1 -P R,0 -Q 3 -o rand_write -k bdev
|
||||
sudo /numam/code/build/birb -m 0xAAAAAA000000 -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555000000 -b 4096 -q 0 -D Nvme0n1 -P R,0 -Q 3 -o rand_write_numa -k bdev
|
||||
|
||||
# mono_read
|
||||
sudo /numam/code/build/birb -m 0xAAAAAA -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555 -b 4096 -q 0 -D Nvme0n1 -P M,100 -Q 3 -o mono_read -k bdev
|
||||
sudo /numam/code/build/birb -m 0xAAAAAA000000 -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555000000 -b 4096 -q 0 -D Nvme0n1 -P M,100 -Q 3 -o mono_read_numa -k bdev
|
||||
|
||||
# mono_write
|
||||
sudo /numam/code/build/birb -m 0xAAAAAA -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555 -b 4096 -q 0 -D Nvme0n1 -P M,0 -Q 3 -o mono_write -k bdev
|
||||
sudo /numam/code/build/birb -m 0xAAAAAA000000 -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555000000 -b 4096 -q 0 -D Nvme0n1 -P M,0 -Q 3 -o mono_write_numa -k bdev
|
||||
|
||||
# mixed
|
||||
sudo /numam/code/build/birb -m 0xAAAAAA -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555 -b 4096 -q 0 -D Nvme0n1 -P R,70 -Q 3 -o mixed_read -k bdev
|
||||
sudo /numam/code/build/birb -m 0xAAAAAA000000 -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555000000 -b 4096 -q 0 -D Nvme0n1 -P R,70 -Q 3 -o mixed_read_numa -k bdev
|
797
storage/birb.cc
797
storage/birb.cc
@ -1,797 +0,0 @@
|
||||
#include <sys/endian.h>
|
||||
#include <sys/errno.h>
|
||||
#include <sys/types.h>
|
||||
#include <x86/_stdint.h>
|
||||
#include <getopt.h>
|
||||
#include <pthread.h>
|
||||
#include <pthread_np.h>
|
||||
#include <threads.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <cerrno>
|
||||
#include <cstddef>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <chrono>
|
||||
#include <list>
|
||||
#include <set>
|
||||
|
||||
#include "rte_lcore.h"
|
||||
#include "spdk/cpuset.h"
|
||||
#include "spdk/stdinc.h"
|
||||
#include "spdk/thread.h"
|
||||
#include "spdk/env.h"
|
||||
#include "spdk/event.h"
|
||||
#include "spdk/log.h"
|
||||
#include "spdk/string.h"
|
||||
|
||||
#include "gen.hh"
|
||||
#include "ntr.h"
|
||||
#include "defs.hh"
|
||||
#include "nm.hh"
|
||||
#include "storage/io_gen.hh"
|
||||
#include "storage/drivers/driver.hh"
|
||||
#include "storage/drivers/bdev.hh"
|
||||
#include "storage/drivers/nvme.hh"
|
||||
|
||||
static inline uint64_t get_cur_ts_nano()
|
||||
{
|
||||
return std::chrono::duration_cast<std::chrono::nanoseconds>
|
||||
(std::chrono::high_resolution_clock::now().time_since_epoch()).count();
|
||||
}
|
||||
|
||||
/*
|
||||
* We'll use this struct to gather housekeeping hello_context to pass between
|
||||
* our events and callbacks.
|
||||
*/
|
||||
static constexpr unsigned long MAX_SPEC_LEN = 32;
|
||||
static constexpr unsigned long MAX_DEV_NAME_LEN = 32;
|
||||
static constexpr unsigned long MAX_OUTPUT_FILE_LEN = 256;
|
||||
struct options_t {
|
||||
// args
|
||||
int verbosity = NTR_LEVEL_DEFAULT;
|
||||
int num_threads = 1;
|
||||
unsigned long cpumask = 1;
|
||||
char pattern_spec[MAX_SPEC_LEN] = "R,100";
|
||||
char ia_spec[MAX_SPEC_LEN] = "fixed";
|
||||
|
||||
unsigned int time = 5;
|
||||
unsigned int warmup = 2;
|
||||
unsigned int queue_depth = 1;
|
||||
char dev_name[MAX_DEV_NAME_LEN] = "Malloc0";
|
||||
char driver_name[MAX_DEV_NAME_LEN] = "bdev";
|
||||
unsigned int read_pct = 0;
|
||||
io_generator_address_mode addr_mode = IOGEN_ADDR_UNIFORM_RANDOM;
|
||||
|
||||
char output_file[MAX_OUTPUT_FILE_LEN] = "output.txt";
|
||||
|
||||
unsigned long req_size = 4096;
|
||||
unsigned long rps = 0;
|
||||
};
|
||||
|
||||
struct main_thread_cb_vars {
|
||||
uint32_t worker_thread_init_cnt;
|
||||
uint32_t worker_thread_stop_cnt;
|
||||
};
|
||||
|
||||
struct worker_thread_cb_vars {
|
||||
uint32_t worker_start;
|
||||
uint32_t worker_stop;
|
||||
struct thread_context * ctx;
|
||||
std::list<struct io_request *> * free_ios;
|
||||
};
|
||||
|
||||
static __thread void * cb_vars;
|
||||
static struct options_t options;
|
||||
|
||||
struct io_record {
|
||||
uint64_t start_ts;
|
||||
uint64_t end_ts;
|
||||
};
|
||||
|
||||
struct io_request {
|
||||
uint64_t start_ts;
|
||||
io_generator_opcode op;
|
||||
char * user_buf;
|
||||
char * dma_buf;
|
||||
};
|
||||
|
||||
struct thread_context {
|
||||
unsigned int tid;
|
||||
unsigned int coreid;
|
||||
unsigned int sockid;
|
||||
pthread_t sys_thread;
|
||||
struct spdk_thread * main_thread;
|
||||
birb_driver * driver;
|
||||
|
||||
unsigned long start_region_offset;
|
||||
unsigned long start_region_length;
|
||||
|
||||
/* modified by worker threads */
|
||||
struct spdk_thread * sp_thread;
|
||||
std::list<io_record *> *io_records;
|
||||
uint64_t overhead_avg;
|
||||
uint32_t overhead_cnt;
|
||||
uint64_t overhead_max;
|
||||
uint64_t overhead_min;
|
||||
};
|
||||
|
||||
static void dump_options()
|
||||
{
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: Options:\n"
|
||||
" dev name: %s\n"
|
||||
" driver name: %s\n"
|
||||
" worker threads: 0x%lx\n"
|
||||
" number of threads: %d\n"
|
||||
" IO request size: %lu\n"
|
||||
" IO requests per second: %lu\n"
|
||||
" IO pattern: %s\n"
|
||||
" IO queue depth: %d\n"
|
||||
" IO addressing mode: %d\n"
|
||||
" read percent: %u\n"
|
||||
" inter-arrival dist: %s\n"
|
||||
" run time: %d\n"
|
||||
" warmup time: %d\n"
|
||||
" output file: %s\n",
|
||||
options.dev_name,
|
||||
options.driver_name,
|
||||
options.cpumask,
|
||||
options.num_threads,
|
||||
options.req_size,
|
||||
options.rps,
|
||||
options.pattern_spec,
|
||||
options.queue_depth,
|
||||
options.addr_mode,
|
||||
options.read_pct,
|
||||
options.ia_spec,
|
||||
options.time,
|
||||
options.warmup,
|
||||
options.output_file
|
||||
);
|
||||
}
|
||||
|
||||
static void usage()
|
||||
{
|
||||
fprintf(stdout,
|
||||
" -V(VV): verbose mode\n"
|
||||
" -D: dev name\n"
|
||||
" -k: driver to use (default bdev)\n"
|
||||
" -a: worker threads spec (0x3 = spawn 2 threads on core 1 & 2)\n"
|
||||
" -b: IO request size\n"
|
||||
" -q: IO requests per second\n"
|
||||
" -P: IO request pattern\n"
|
||||
" -Q: IO request queue depth\n"
|
||||
" -I: inter-arrival time distribution\n"
|
||||
" -t: total run time\n"
|
||||
" -w: warm up time\n"
|
||||
" -o: latency response output file\n");
|
||||
}
|
||||
|
||||
static int parse_arg(int c, char *arg)
|
||||
{
|
||||
switch (c) {
|
||||
case 'V':
|
||||
ntr_set_level(NTR_DEP_USER1,
|
||||
ntr_get_level(NTR_DEP_USER1) + 1);
|
||||
break;
|
||||
case 'D':
|
||||
strncpy(options.dev_name, arg, MAX_DEV_NAME_LEN);
|
||||
break;
|
||||
case 'k':
|
||||
strncpy(options.driver_name, arg, MAX_DEV_NAME_LEN);
|
||||
break;
|
||||
case 'a':
|
||||
options.cpumask = strtoull(optarg, nullptr, 16);
|
||||
options.num_threads = cmask_get_num_cpus(
|
||||
options.cpumask);
|
||||
|
||||
if (options.num_threads == 0) {
|
||||
fprintf(stderr,
|
||||
"must run at least one thread\n");
|
||||
return EINVAL;
|
||||
}
|
||||
break;
|
||||
case 'b':
|
||||
options.req_size = strtoull(
|
||||
optarg, nullptr, 10);
|
||||
break;
|
||||
case 'q':
|
||||
options.rps = strtoull(
|
||||
optarg, nullptr, 10);
|
||||
break;
|
||||
case 'Q':
|
||||
options.queue_depth = strtoull(
|
||||
optarg, nullptr, 10);
|
||||
break;
|
||||
case 'P':
|
||||
strncpy(options.pattern_spec, optarg, MAX_SPEC_LEN);
|
||||
break;
|
||||
case 'I':
|
||||
strncpy(options.ia_spec, optarg, MAX_SPEC_LEN);
|
||||
break;
|
||||
case 't':
|
||||
options.time = strtoull(
|
||||
optarg, nullptr, 10);
|
||||
break;
|
||||
case 'w':
|
||||
options.warmup = strtoull(
|
||||
optarg, nullptr, 10);
|
||||
break;
|
||||
case 'o':
|
||||
strncpy(options.output_file, optarg, MAX_OUTPUT_FILE_LEN);
|
||||
break;
|
||||
case 'h':
|
||||
default:
|
||||
return EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static birb_driver *
|
||||
birb_create_driver(const char * driver_name, void * context)
|
||||
{
|
||||
if (strcmp(driver_name, "bdev") == 0) {
|
||||
return new birb_bdev_driver(reinterpret_cast<const char *>(context));
|
||||
} else if (strcmp(driver_name, "nvme") == 0) {
|
||||
return new birb_nvme_driver(reinterpret_cast<const char *>(context));
|
||||
} else {
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
static birb_driver_thread_context *
|
||||
birb_create_thread_context(birb_driver * driver)
|
||||
{
|
||||
if (driver->get_type() == birb_driver::BIRB_DRV_BDEV) {
|
||||
return new birb_bdev_thread_context(dynamic_cast<birb_bdev_driver *>(driver));
|
||||
} else if (driver->get_type() == birb_driver::BIRB_DRV_NVME) {
|
||||
return new birb_nvme_thread_context(dynamic_cast<birb_nvme_driver *>(driver));
|
||||
} else {
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
birb_destroy_driver(birb_driver * drv)
|
||||
{
|
||||
delete drv;
|
||||
}
|
||||
|
||||
static void
|
||||
birb_destroy_thread_context(birb_driver_thread_context * ctx)
|
||||
{
|
||||
delete ctx;
|
||||
}
|
||||
|
||||
/*
|
||||
* Callback function for io completion.
|
||||
*/
|
||||
static void
|
||||
worker_io_complete(bool success, void *cb_arg)
|
||||
{
|
||||
auto vars = (struct worker_thread_cb_vars *)cb_vars;
|
||||
auto req = (struct io_request *)cb_arg;
|
||||
|
||||
uint64_t end_ts = get_cur_ts_nano();
|
||||
|
||||
if (!success) {
|
||||
// XXX: print warning for errors for now
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING, "thread %d <worker_io_complete>: io request failed\n", vars->ctx->tid);
|
||||
} else {
|
||||
auto rec = new struct io_record;
|
||||
rec->start_ts = req->start_ts;
|
||||
rec->end_ts = end_ts;
|
||||
vars->ctx->io_records->push_back(rec);
|
||||
|
||||
if (req->op == IOGEN_READ) {
|
||||
memcpy(req->user_buf, req->dma_buf, options.req_size);
|
||||
}
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d <worker_io_complete>: completed io request type %d\n", vars->ctx->tid, req->op);
|
||||
}
|
||||
|
||||
vars->free_ios->push_back(req);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
cb_notify_main_init(void * arg)
|
||||
{
|
||||
auto * ctx = (struct thread_context *)arg;
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "cb_notify_main_init: from thread %d to main.\n", ctx->tid);
|
||||
|
||||
auto * vars = (struct main_thread_cb_vars *) cb_vars;
|
||||
vars->worker_thread_init_cnt++;
|
||||
}
|
||||
|
||||
static void
|
||||
cb_notify_main_stop(void * arg)
|
||||
{
|
||||
auto * ctx = (struct thread_context *)arg;
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "cb_notify_main_stop: from thread %d to main.\n", ctx->tid);
|
||||
|
||||
auto * vars = (struct main_thread_cb_vars *) cb_vars;
|
||||
vars->worker_thread_stop_cnt++;
|
||||
}
|
||||
|
||||
static void
|
||||
cb_notify_worker_start(void * arg)
|
||||
{
|
||||
auto * ctx = (struct thread_context *)arg;
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "cb_notify_worker_start: from main to thread %d.\n", ctx->tid);
|
||||
|
||||
auto * vars = (struct worker_thread_cb_vars *) cb_vars;
|
||||
vars->worker_start = 1;
|
||||
}
|
||||
|
||||
static void
|
||||
cb_notify_worker_stop(void * arg)
|
||||
{
|
||||
auto * ctx = (struct thread_context *)arg;
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "cb_notify_worker_stop: from main to thread %d.\n", ctx->tid);
|
||||
|
||||
auto * vars = (struct worker_thread_cb_vars *) cb_vars;
|
||||
vars->worker_stop = 1;
|
||||
}
|
||||
|
||||
static void
|
||||
main_thread_cb_vars_init(struct main_thread_cb_vars * vars)
|
||||
{
|
||||
vars->worker_thread_init_cnt = 0;
|
||||
vars->worker_thread_stop_cnt = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
worker_thread_cb_vars_init(struct worker_thread_cb_vars * vars, struct thread_context * ctx,
|
||||
std::list<struct io_request *> * free_ios)
|
||||
{
|
||||
vars->worker_start = 0;
|
||||
vars->worker_stop = 0;
|
||||
vars->ctx = ctx;
|
||||
vars->free_ios = free_ios;
|
||||
}
|
||||
|
||||
static void *
|
||||
worker_thread_main(void * arg)
|
||||
{
|
||||
int rc = 0;
|
||||
|
||||
constexpr static unsigned int SPDK_THREAD_NAME_SZ = 16;
|
||||
|
||||
struct worker_thread_cb_vars vars;
|
||||
auto *ctx = (struct thread_context *)arg;
|
||||
birb_driver_thread_context * driver_thread_ctx;
|
||||
std::list<struct io_request *> free_ios;
|
||||
char spdk_thread_name[SPDK_THREAD_NAME_SZ];
|
||||
struct spdk_cpuset * cpuset;
|
||||
|
||||
Generator * ia_gen = nullptr;
|
||||
io_generator * io_gen = nullptr;
|
||||
|
||||
struct io_generator_ctx io_ctx;
|
||||
uint64_t next_ts;
|
||||
uint64_t a_offset;
|
||||
uint64_t last_loop_ts = 0;
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d: init...\n", ctx->tid);
|
||||
|
||||
ctx->overhead_avg = 0;
|
||||
ctx->overhead_cnt = 0;
|
||||
ctx->overhead_max = 0;
|
||||
ctx->overhead_min = UINT64_MAX;
|
||||
|
||||
// create spdk thread
|
||||
cpuset = spdk_cpuset_alloc();
|
||||
if (cpuset == nullptr) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: failed to alloc cpuset\n");
|
||||
rc = ENOMEM;
|
||||
goto cleanup;
|
||||
}
|
||||
spdk_cpuset_zero(cpuset);
|
||||
spdk_cpuset_set_cpu(cpuset, ctx->coreid, true);
|
||||
snprintf(spdk_thread_name, SPDK_THREAD_NAME_SZ, "birb_worker_%u", ctx->tid);
|
||||
ctx->sp_thread = spdk_thread_create(spdk_thread_name, cpuset);
|
||||
if (ctx->sp_thread == nullptr) {
|
||||
rc = ENOMEM;
|
||||
goto cleanup;
|
||||
}
|
||||
spdk_set_thread(ctx->sp_thread);
|
||||
|
||||
// create thread context
|
||||
driver_thread_ctx = birb_create_thread_context(ctx->driver);
|
||||
if (driver_thread_ctx == nullptr || driver_thread_ctx->get_status() != birb_driver::BIRB_SUCCESS) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "thread %d: could not create thread context!\n", ctx->tid);
|
||||
rc = EINVAL;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
// create io request objects
|
||||
for (unsigned int i = 0; i < options.queue_depth; i++) {
|
||||
auto dma_buf = (char *)spdk_dma_zmalloc_socket(options.req_size, ctx->driver->get_align(), NULL, ctx->sockid);
|
||||
auto user_buf = (char *)nm_malloc(ctx->sockid, options.req_size);
|
||||
|
||||
if (dma_buf == nullptr || user_buf == nullptr) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "thread %d: could not allocate buffers!\n", ctx->tid);
|
||||
rc = ENOMEM;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
auto io_req = new struct io_request;
|
||||
io_req->dma_buf = dma_buf;
|
||||
io_req->user_buf = user_buf;
|
||||
|
||||
free_ios.push_back(io_req);
|
||||
}
|
||||
|
||||
// init thread local states
|
||||
worker_thread_cb_vars_init(&vars, ctx, &free_ios);
|
||||
cb_vars = &vars;
|
||||
|
||||
ia_gen = createGenerator(options.ia_spec);
|
||||
if (ia_gen == nullptr) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "thread %d: could not allocate ia generator!\n", ctx->tid);
|
||||
rc = EINVAL;
|
||||
goto cleanup;
|
||||
}
|
||||
ia_gen->set_lambda((double)options.rps / (double)(options.num_threads));
|
||||
|
||||
io_gen = new io_generator(options.req_size, ctx->start_region_length, options.read_pct, options.addr_mode);
|
||||
if (io_gen == nullptr) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "thread %d: could not allocate ia generator!\n", ctx->tid);
|
||||
rc = EINVAL;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d: init complete.\n", ctx->tid);
|
||||
|
||||
if ((rc = spdk_thread_send_msg(ctx->main_thread, cb_notify_main_init, ctx)) != 0) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "thread %d: could not send message %d\n", ctx->tid, rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d: waiting for start...\n", ctx->tid);
|
||||
|
||||
while (vars.worker_start != 1) {
|
||||
spdk_thread_poll(spdk_get_thread(), 0, 0);
|
||||
}
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d: started...\n", ctx->tid);
|
||||
|
||||
/* random delay 0-100 us */
|
||||
usleep(nm_get_uptime_ns() % 100);
|
||||
|
||||
next_ts = get_cur_ts_nano();
|
||||
|
||||
while (true) {
|
||||
uint64_t cur_loop_ts = get_cur_ts_nano();
|
||||
if (last_loop_ts > 0) {
|
||||
uint64_t overhead = cur_loop_ts - last_loop_ts;
|
||||
if (ctx->overhead_max < overhead) {
|
||||
ctx->overhead_max = overhead;
|
||||
}
|
||||
|
||||
if (ctx->overhead_min > overhead) {
|
||||
ctx->overhead_min = overhead;
|
||||
}
|
||||
|
||||
ctx->overhead_avg = ctx->overhead_avg * ctx->overhead_cnt + overhead;
|
||||
ctx->overhead_cnt++;
|
||||
ctx->overhead_avg /= ctx->overhead_cnt;
|
||||
}
|
||||
last_loop_ts = cur_loop_ts;
|
||||
|
||||
spdk_thread_poll(spdk_get_thread(), 0, 0);
|
||||
driver_thread_ctx->poll();
|
||||
|
||||
if (vars.worker_stop != 0) {
|
||||
if (free_ios.size() >= options.queue_depth) {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if (!free_ios.empty()) {
|
||||
auto io_req = free_ios.front();
|
||||
|
||||
uint64_t cur_ts = get_cur_ts_nano();
|
||||
|
||||
if (cur_ts >= next_ts) {
|
||||
io_gen->issue(&io_ctx, io_req->dma_buf);
|
||||
|
||||
a_offset = io_ctx.offset + ctx->start_region_offset;
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d: issuing IO type %d at offset 0x%lx size 0x%lx...\n", ctx->tid, io_ctx.op, a_offset, io_ctx.size);
|
||||
|
||||
io_req->start_ts = cur_ts;
|
||||
io_req->op = io_ctx.op;
|
||||
|
||||
if(io_ctx.op == IOGEN_READ) {
|
||||
rc = driver_thread_ctx->read(a_offset, io_ctx.size, io_req->dma_buf, worker_io_complete, io_req);
|
||||
} else {
|
||||
rc = driver_thread_ctx->write(a_offset, io_ctx.size, io_req->dma_buf, worker_io_complete, io_req);
|
||||
}
|
||||
|
||||
if (rc != 0) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING, "thread %d: failed to issue io %d, retrying...", ctx->tid, rc);
|
||||
} else {
|
||||
free_ios.pop_front();
|
||||
next_ts = next_ts + ia_gen->generate() * S2NS;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cleanup:
|
||||
while (!free_ios.empty()) {
|
||||
auto req = free_ios.front();
|
||||
free_ios.pop_front();
|
||||
spdk_dma_free(req->dma_buf);
|
||||
nm_free(ctx->sockid, req->user_buf);
|
||||
}
|
||||
|
||||
if (ia_gen != nullptr) {
|
||||
delete ia_gen;
|
||||
}
|
||||
|
||||
if (io_gen != nullptr) {
|
||||
delete io_gen;
|
||||
}
|
||||
|
||||
if (cpuset != nullptr) {
|
||||
spdk_cpuset_free(cpuset);
|
||||
}
|
||||
|
||||
if (driver_thread_ctx != nullptr) {
|
||||
birb_destroy_thread_context(driver_thread_ctx);
|
||||
}
|
||||
|
||||
if (rc == 0) {
|
||||
if ((rc = spdk_thread_send_msg(ctx->main_thread, cb_notify_main_stop, ctx)) != 0) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "thread %d: could not send message %d\n", ctx->tid, rc);
|
||||
}
|
||||
}
|
||||
|
||||
spdk_thread_exit(ctx->sp_thread);
|
||||
|
||||
while (!spdk_thread_is_exited(ctx->sp_thread)) {
|
||||
spdk_thread_poll(ctx->sp_thread, 0, 0);
|
||||
};
|
||||
|
||||
if (ctx->sp_thread != nullptr) {
|
||||
spdk_set_thread(nullptr);
|
||||
spdk_thread_destroy(ctx->sp_thread);
|
||||
}
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d: stopped...\n", ctx->tid);
|
||||
|
||||
if (rc != 0) {
|
||||
spdk_app_stop(rc);
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
parse_pattern(char * pattern, unsigned int * read_pct, io_generator_address_mode * addr_mode)
|
||||
{
|
||||
char * token = strtok(pattern, ",");
|
||||
|
||||
if (strcmp(token, "M") == 0) {
|
||||
*addr_mode = IOGEN_ADDR_MONOTONIC_INCREASING;
|
||||
} else {
|
||||
*addr_mode = IOGEN_ADDR_UNIFORM_RANDOM;
|
||||
}
|
||||
|
||||
token = strtok(nullptr, ",");
|
||||
*read_pct = strtoull(token, nullptr, 10);
|
||||
}
|
||||
|
||||
static void
|
||||
birb_main(void * arg1 UNUSED)
|
||||
{
|
||||
int rc = 0;
|
||||
std::list<struct thread_context *> worker_threads;
|
||||
std::ofstream output_file;
|
||||
struct main_thread_cb_vars vars;
|
||||
birb_driver * drv = nullptr;
|
||||
|
||||
unsigned long record_cutoff_time = 0;
|
||||
unsigned long current_s = 0;
|
||||
unsigned int total_reqs = 0;
|
||||
unsigned int tid = 0;
|
||||
unsigned long per_thread_cap = 0;
|
||||
int cur_core;
|
||||
|
||||
/* initialize driver */
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: initializing device driver for device %s\n", options.dev_name);
|
||||
drv = birb_create_driver(options.driver_name, options.dev_name);
|
||||
if (drv == nullptr || drv->get_status() != birb_driver::BIRB_SUCCESS) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: failed to create device driver.\n");
|
||||
rc = EINVAL;
|
||||
goto end;
|
||||
}
|
||||
per_thread_cap = drv->get_capacity() / options.num_threads;
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: initialized device with capacity %zu bytes ~= %zu MB\n", drv->get_capacity(), drv->get_capacity() / 1024 / 1024);
|
||||
|
||||
/* misc init */
|
||||
main_thread_cb_vars_init(&vars);
|
||||
cb_vars = &vars;
|
||||
|
||||
parse_pattern(options.pattern_spec, &options.read_pct, &options.addr_mode);
|
||||
dump_options();
|
||||
|
||||
output_file.open(options.output_file, std::ofstream::out);
|
||||
if (!output_file) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: failed to open output file %s\n", options.output_file);
|
||||
rc = EINVAL;
|
||||
goto end;
|
||||
}
|
||||
|
||||
cur_core = cmask_get_next_cpu(&options.cpumask);
|
||||
while(cur_core != NEXT_CPU_NULL) {
|
||||
auto * ctx = new struct thread_context;
|
||||
memset(ctx, 0, sizeof(struct thread_context));
|
||||
|
||||
if (ctx == NULL) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: failed to alloc thread ctx.\n");
|
||||
spdk_app_stop(ENOMEM);
|
||||
return;
|
||||
}
|
||||
|
||||
ctx->tid = tid++;
|
||||
ctx->driver = drv;
|
||||
ctx->main_thread = spdk_get_thread();
|
||||
ctx->sockid = rte_lcore_to_socket_id(cur_core);
|
||||
ctx->coreid = cur_core;
|
||||
ctx->io_records = new std::list<struct io_record *>();
|
||||
ctx->start_region_length = per_thread_cap;
|
||||
ctx->start_region_offset = per_thread_cap * ctx->tid;
|
||||
|
||||
// create sys thread
|
||||
pthread_attr_t attr;
|
||||
cpuset_t scpuset;
|
||||
CPU_ZERO(&scpuset);
|
||||
CPU_SET(cur_core, &scpuset);
|
||||
pthread_attr_init(&attr);
|
||||
pthread_attr_setaffinity_np(&attr, sizeof(cpuset_t), &scpuset);
|
||||
rc = pthread_create(&ctx->sys_thread, &attr, worker_thread_main, ctx);
|
||||
if (rc != 0) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: failed to create sys thread: %d\n", rc);
|
||||
rc = EINVAL;
|
||||
goto end;
|
||||
}
|
||||
worker_threads.push_back(ctx);
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: created worker thread %d on core %d socket %d offset 0x%lx length %ld\n", ctx->tid, cur_core, ctx->sockid,
|
||||
ctx->start_region_offset,
|
||||
ctx->start_region_length);
|
||||
|
||||
cur_core = cmask_get_next_cpu(&options.cpumask);
|
||||
}
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "main: waiting for worker thread init...\n");
|
||||
while(vars.worker_thread_init_cnt < (uint32_t)options.num_threads) {
|
||||
spdk_thread_poll(spdk_get_thread(), 0, 0);
|
||||
}
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "main: starting worker threads...\n");
|
||||
for (struct thread_context * tctx : worker_threads) {
|
||||
rc = spdk_thread_send_msg(tctx->sp_thread, cb_notify_worker_start, tctx);
|
||||
|
||||
if (rc != 0) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: failed to send message %d\n", rc);
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
|
||||
/* main event loop */
|
||||
while(current_s < options.time) {
|
||||
if (current_s >= options.warmup && record_cutoff_time == 0) {
|
||||
record_cutoff_time = get_cur_ts_nano();
|
||||
}
|
||||
usleep(1 * S2US);
|
||||
current_s++;
|
||||
}
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "main: stopping worker threads...\n");
|
||||
for (struct thread_context * tctx : worker_threads) {
|
||||
rc = spdk_thread_send_msg(tctx->sp_thread, cb_notify_worker_stop, tctx);
|
||||
|
||||
if (rc != 0) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: failed to send message %d\n", rc);
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
|
||||
while(vars.worker_thread_stop_cnt < (uint32_t)options.num_threads) {
|
||||
spdk_thread_poll(spdk_get_thread(), 0, 0);
|
||||
}
|
||||
|
||||
// keep stats
|
||||
for (struct thread_context * tctx : worker_threads) {
|
||||
uint64_t last_ts = 0;
|
||||
uint64_t processed = 0;
|
||||
for (struct io_record * r : *tctx->io_records) {
|
||||
if (r->start_ts >= record_cutoff_time) {
|
||||
if (r->end_ts > last_ts) {
|
||||
last_ts = r->end_ts;
|
||||
}
|
||||
|
||||
processed++;
|
||||
output_file << r->end_ts - r->start_ts << std::endl;
|
||||
total_reqs++;
|
||||
}
|
||||
}
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: thread %d processed requests: %lu, last request %lu. Overhead - avg %lu min %lu max %lu\n",
|
||||
tctx->tid, processed, last_ts, tctx->overhead_avg, tctx->overhead_min, tctx->overhead_max);
|
||||
}
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: total requests: %u, bytes per second: %lu\n",
|
||||
total_reqs, total_reqs * options.req_size / (options.time - options.warmup));
|
||||
|
||||
end:
|
||||
if (drv != nullptr) {
|
||||
birb_destroy_driver(drv);
|
||||
}
|
||||
|
||||
output_file.close();
|
||||
|
||||
for (struct thread_context * tctx : worker_threads) {
|
||||
for (struct io_record * r : *tctx->io_records) {
|
||||
delete r;
|
||||
}
|
||||
delete tctx->io_records;
|
||||
delete tctx;
|
||||
}
|
||||
|
||||
exit(0);
|
||||
spdk_app_stop(rc);
|
||||
return;
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
struct spdk_app_opts opts = {};
|
||||
int rc = 0;
|
||||
|
||||
ntr_init();
|
||||
ntr_set_level(NTR_DEP_USER1, NTR_LEVEL_INFO);
|
||||
|
||||
/* Set default values in opts structure. */
|
||||
spdk_app_opts_init(&opts, sizeof(opts));
|
||||
opts.name = "birb";
|
||||
|
||||
/*
|
||||
* Parse built-in SPDK command line parameters as well
|
||||
* as our custom one(s).
|
||||
*/
|
||||
if ((rc = spdk_app_parse_args(argc, argv, &opts, "VD:k:a:b:q:Q:P:I:t:w:o:", NULL, parse_arg,
|
||||
usage)) != SPDK_APP_PARSE_ARGS_SUCCESS) {
|
||||
exit(rc);
|
||||
}
|
||||
|
||||
nm_init(options.verbosity);
|
||||
|
||||
/*
|
||||
* spdk_app_start() will initialize the SPDK framework, call hello_start(),
|
||||
* and then block until spdk_app_stop() is called (or if an initialization
|
||||
* error occurs, spdk_app_start() will return with rc even without calling
|
||||
* hello_start().
|
||||
*/
|
||||
rc = spdk_app_start(&opts, birb_main, NULL);
|
||||
if (rc) {
|
||||
SPDK_ERRLOG("ERROR starting application\n");
|
||||
}
|
||||
|
||||
/* At this point either spdk_app_stop() was called, or spdk_app_start()
|
||||
* failed because of internal error.
|
||||
*/
|
||||
|
||||
/* Gracefully close out all of the SPDK subsystems. */
|
||||
spdk_app_fini();
|
||||
return rc;
|
||||
}
|
@ -1,585 +0,0 @@
|
||||
#include <sys/endian.h>
|
||||
#include <sys/errno.h>
|
||||
#include <sys/signal.h>
|
||||
#include <sys/types.h>
|
||||
#include <fcntl.h>
|
||||
#include <getopt.h>
|
||||
#include <pthread.h>
|
||||
#include <pthread_np.h>
|
||||
#include <threads.h>
|
||||
#include <unistd.h>
|
||||
#include <aio.h>
|
||||
#include <getopt.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/disk.h>
|
||||
|
||||
#include <cerrno>
|
||||
#include <cstddef>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <chrono>
|
||||
#include <list>
|
||||
#include <set>
|
||||
|
||||
#include "gen.hh"
|
||||
#include "ntr.h"
|
||||
#include "defs.hh"
|
||||
#include "nm.hh"
|
||||
#include "storage/io_gen.hh"
|
||||
|
||||
static inline uint64_t get_cur_ts_nano()
|
||||
{
|
||||
return std::chrono::duration_cast<std::chrono::nanoseconds>
|
||||
(std::chrono::high_resolution_clock::now().time_since_epoch()).count();
|
||||
}
|
||||
|
||||
/*
|
||||
* We'll use this struct to gather housekeeping hello_context to pass between
|
||||
* our events and callbacks.
|
||||
*/
|
||||
static constexpr unsigned long MAX_SPEC_LEN = 32;
|
||||
static constexpr unsigned long MAX_DEV_NAME_LEN = 32;
|
||||
static constexpr unsigned long MAX_OUTPUT_FILE_LEN = 256;
|
||||
struct options_t {
|
||||
// args
|
||||
int verbosity = NTR_LEVEL_DEFAULT;
|
||||
int num_threads = 1;
|
||||
unsigned long cpumask = 1;
|
||||
char pattern_spec[MAX_SPEC_LEN] = "R,100";
|
||||
char ia_spec[MAX_SPEC_LEN] = "fixed";
|
||||
|
||||
unsigned int time = 5;
|
||||
unsigned int warmup = 2;
|
||||
unsigned int queue_depth = 1;
|
||||
char dev_name[MAX_DEV_NAME_LEN] = "Malloc0";
|
||||
char driver_name[MAX_DEV_NAME_LEN] = "bdev";
|
||||
unsigned int read_pct = 0;
|
||||
io_generator_address_mode addr_mode = IOGEN_ADDR_UNIFORM_RANDOM;
|
||||
|
||||
char output_file[MAX_OUTPUT_FILE_LEN] = "output.txt";
|
||||
|
||||
unsigned long req_size = 4096;
|
||||
unsigned long rps = 0;
|
||||
};
|
||||
|
||||
|
||||
std::atomic<int> worker_thread_init_cnt(0);
|
||||
std::atomic<int> worker_thread_stop_cnt(0);
|
||||
std::atomic<int> worker_start(0);
|
||||
std::atomic<int> worker_stop(0);
|
||||
static struct options_t options;
|
||||
|
||||
struct io_record {
|
||||
uint64_t start_ts;
|
||||
uint64_t end_ts;
|
||||
};
|
||||
|
||||
struct io_request {
|
||||
uint64_t start_ts;
|
||||
io_generator_opcode op;
|
||||
char * user_buf;
|
||||
char * dma_buf;
|
||||
struct aiocb aio;
|
||||
};
|
||||
|
||||
struct thread_context {
|
||||
unsigned int tid;
|
||||
unsigned int coreid;
|
||||
unsigned int sockid;
|
||||
pthread_t sys_thread;
|
||||
int disk_fd;
|
||||
|
||||
unsigned long start_region_offset;
|
||||
unsigned long start_region_length;
|
||||
|
||||
/* modified by worker threads */
|
||||
std::list<io_record *> *io_records;
|
||||
uint64_t overhead_avg;
|
||||
uint32_t overhead_cnt;
|
||||
uint64_t overhead_max;
|
||||
uint64_t overhead_min;
|
||||
};
|
||||
|
||||
static void dump_options()
|
||||
{
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: Options:\n"
|
||||
" dev name: %s\n"
|
||||
" driver name: %s\n"
|
||||
" worker threads: 0x%lx\n"
|
||||
" number of threads: %d\n"
|
||||
" IO request size: %lu\n"
|
||||
" IO requests per second: %lu\n"
|
||||
" IO pattern: %s\n"
|
||||
" IO queue depth: %d\n"
|
||||
" IO addressing mode: %d\n"
|
||||
" read percent: %u\n"
|
||||
" inter-arrival dist: %s\n"
|
||||
" run time: %d\n"
|
||||
" warmup time: %d\n"
|
||||
" output file: %s\n",
|
||||
options.dev_name,
|
||||
options.driver_name,
|
||||
options.cpumask,
|
||||
options.num_threads,
|
||||
options.req_size,
|
||||
options.rps,
|
||||
options.pattern_spec,
|
||||
options.queue_depth,
|
||||
options.addr_mode,
|
||||
options.read_pct,
|
||||
options.ia_spec,
|
||||
options.time,
|
||||
options.warmup,
|
||||
options.output_file
|
||||
);
|
||||
}
|
||||
|
||||
static void usage()
|
||||
{
|
||||
fprintf(stdout,
|
||||
" -V(VV): verbose mode\n"
|
||||
" -D: dev name\n"
|
||||
" -k: driver to use (default bdev)\n"
|
||||
" -a: worker threads spec (0x3 = spawn 2 threads on core 1 & 2)\n"
|
||||
" -b: IO request size\n"
|
||||
" -q: IO requests per second\n"
|
||||
" -P: IO request pattern\n"
|
||||
" -Q: IO request queue depth\n"
|
||||
" -I: inter-arrival time distribution\n"
|
||||
" -t: total run time\n"
|
||||
" -w: warm up time\n"
|
||||
" -o: latency response output file\n");
|
||||
}
|
||||
|
||||
static void *
|
||||
worker_thread_main(void * arg)
|
||||
{
|
||||
int rc = 0;
|
||||
|
||||
auto *ctx = (struct thread_context *)arg;
|
||||
std::list<struct io_request *> free_ios;
|
||||
std::list<struct io_request *> prog_ios;
|
||||
|
||||
Generator * ia_gen = nullptr;
|
||||
io_generator * io_gen = nullptr;
|
||||
|
||||
struct io_generator_ctx io_ctx;
|
||||
uint64_t next_ts;
|
||||
uint64_t a_offset;
|
||||
uint64_t last_loop_ts = 0;
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d: init...\n", ctx->tid);
|
||||
|
||||
// create io request objects
|
||||
for (unsigned int i = 0; i < options.queue_depth; i++) {
|
||||
auto buf = (char *)nm_malloc(ctx->sockid, options.req_size);
|
||||
auto user_buf = (char *)nm_malloc(ctx->sockid, options.req_size);
|
||||
|
||||
if (buf == nullptr || user_buf == nullptr) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "thread %d: could not allocate buffers!\n", ctx->tid);
|
||||
rc = ENOMEM;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
auto io_req = new struct io_request;
|
||||
io_req->dma_buf = buf;
|
||||
io_req->user_buf = user_buf;
|
||||
io_req->aio.aio_fildes = ctx->disk_fd;
|
||||
io_req->aio.aio_nbytes = options.req_size;
|
||||
io_req->aio.aio_buf = buf;
|
||||
io_req->aio.aio_sigevent.sigev_notify = SIGEV_NONE;
|
||||
io_req->aio.aio_reqprio = 0;
|
||||
|
||||
free_ios.push_back(io_req);
|
||||
}
|
||||
|
||||
// init thread local states
|
||||
ia_gen = createGenerator(options.ia_spec);
|
||||
if (ia_gen == nullptr) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "thread %d: could not allocate ia generator!\n", ctx->tid);
|
||||
rc = EINVAL;
|
||||
goto cleanup;
|
||||
}
|
||||
ia_gen->set_lambda((double)options.rps / (double)(options.num_threads));
|
||||
|
||||
io_gen = new io_generator(options.req_size, ctx->start_region_length, options.read_pct, options.addr_mode);
|
||||
if (io_gen == nullptr) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "thread %d: could not allocate ia generator!\n", ctx->tid);
|
||||
rc = EINVAL;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d: init complete.\n", ctx->tid);
|
||||
|
||||
worker_thread_init_cnt.fetch_add(1);
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d: waiting for start...\n", ctx->tid);
|
||||
|
||||
while (worker_start.load() == 0) {}
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d: started...\n", ctx->tid);
|
||||
|
||||
/* random delay 0-100 us */
|
||||
usleep(nm_get_uptime_ns() % 100);
|
||||
|
||||
next_ts = get_cur_ts_nano();
|
||||
|
||||
while (true) {
|
||||
uint64_t cur_ts = get_cur_ts_nano();
|
||||
if (last_loop_ts > 0) {
|
||||
uint64_t overhead = cur_ts - last_loop_ts;
|
||||
if (ctx->overhead_max < overhead) {
|
||||
ctx->overhead_max = overhead;
|
||||
}
|
||||
|
||||
if (ctx->overhead_min > overhead) {
|
||||
ctx->overhead_min = overhead;
|
||||
}
|
||||
|
||||
ctx->overhead_avg = ctx->overhead_avg * ctx->overhead_cnt + overhead;
|
||||
ctx->overhead_cnt++;
|
||||
ctx->overhead_avg /= ctx->overhead_cnt;
|
||||
}
|
||||
last_loop_ts = cur_ts;
|
||||
|
||||
// process io completion
|
||||
auto itr = prog_ios.begin();
|
||||
while (itr != prog_ios.end()) {
|
||||
int err;
|
||||
struct io_request * ioreq = *itr;
|
||||
if ((err = aio_error(&ioreq->aio)) != EINPROGRESS) {
|
||||
if (err == 0) {
|
||||
auto rec = new struct io_record;
|
||||
rec->start_ts = ioreq->start_ts;
|
||||
rec->end_ts = cur_ts;
|
||||
|
||||
ctx->io_records->push_back(rec);
|
||||
if (ioreq->op == IOGEN_READ) {
|
||||
memcpy(ioreq->user_buf, ioreq->dma_buf, options.req_size);
|
||||
}
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d <worker_io_complete>: completed io request type %d\n", ctx->tid, ioreq->op);
|
||||
|
||||
} else {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING, "thread %d: aio failed with %d...\n", ctx->tid, err);
|
||||
}
|
||||
|
||||
if (aio_return(&ioreq->aio) == -1) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "thread %d: aio_return failed with %d...\n", ctx->tid, errno);
|
||||
exit(errno);
|
||||
}
|
||||
|
||||
/* cleanup */
|
||||
itr = prog_ios.erase(itr);
|
||||
free_ios.push_back(ioreq);
|
||||
} else {
|
||||
++itr;
|
||||
}
|
||||
}
|
||||
|
||||
if (worker_stop.load() == 1) {
|
||||
if (free_ios.size() >= options.queue_depth) {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if (!free_ios.empty()) {
|
||||
auto io_req = free_ios.front();
|
||||
|
||||
cur_ts = get_cur_ts_nano();
|
||||
|
||||
if (cur_ts >= next_ts) {
|
||||
io_gen->issue(&io_ctx, io_req->dma_buf);
|
||||
|
||||
a_offset = io_ctx.offset + ctx->start_region_offset;
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d: issuing IO type %d at offset 0x%lx size 0x%lx...\n", ctx->tid, io_ctx.op, a_offset, io_ctx.size);
|
||||
|
||||
io_req->start_ts = cur_ts;
|
||||
io_req->op = io_ctx.op;
|
||||
io_req->aio.aio_offset = a_offset;
|
||||
|
||||
if(io_ctx.op == IOGEN_READ) {
|
||||
rc = aio_read(&io_req->aio);
|
||||
} else {
|
||||
rc = aio_write(&io_req->aio);
|
||||
}
|
||||
|
||||
if (rc != 0) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING, "thread %d: failed to issue io %d, retrying...\n", ctx->tid, errno);
|
||||
} else {
|
||||
free_ios.pop_front();
|
||||
prog_ios.push_back(io_req);
|
||||
next_ts = next_ts + ia_gen->generate() * S2NS;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cleanup:
|
||||
while (!free_ios.empty()) {
|
||||
auto req = free_ios.front();
|
||||
free_ios.pop_front();
|
||||
nm_free(ctx->sockid, req->dma_buf);
|
||||
nm_free(ctx->sockid, req->user_buf);
|
||||
}
|
||||
|
||||
if (ia_gen != nullptr) {
|
||||
delete ia_gen;
|
||||
}
|
||||
|
||||
if (io_gen != nullptr) {
|
||||
delete io_gen;
|
||||
}
|
||||
|
||||
worker_thread_stop_cnt.fetch_add(1);
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d: stopped...\n", ctx->tid);
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
parse_pattern(char * pattern, unsigned int * read_pct, io_generator_address_mode * addr_mode)
|
||||
{
|
||||
char * token = strtok(pattern, ",");
|
||||
|
||||
if (strcmp(token, "M") == 0) {
|
||||
*addr_mode = IOGEN_ADDR_MONOTONIC_INCREASING;
|
||||
} else {
|
||||
*addr_mode = IOGEN_ADDR_UNIFORM_RANDOM;
|
||||
}
|
||||
|
||||
token = strtok(nullptr, ",");
|
||||
*read_pct = strtoull(token, nullptr, 10);
|
||||
}
|
||||
|
||||
static void
|
||||
birb_main()
|
||||
{
|
||||
int rc = 0;
|
||||
std::list<struct thread_context *> worker_threads;
|
||||
std::ofstream output_file;
|
||||
|
||||
unsigned long record_cutoff_time = 0;
|
||||
unsigned long current_s = 0;
|
||||
unsigned int total_reqs = 0;
|
||||
unsigned int tid = 0;
|
||||
unsigned long per_thread_cap = 0;
|
||||
int cur_core;
|
||||
int disk_fd;
|
||||
off_t disk_size;
|
||||
u_int disk_sec_size;
|
||||
|
||||
/* initialize driver */
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: initializing device driver for device %s\n", options.dev_name);
|
||||
disk_fd = open(options.dev_name, O_RDWR | O_DIRECT);
|
||||
if (disk_fd == -1) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: failed to open device - %d\n", errno);
|
||||
exit(errno);
|
||||
}
|
||||
|
||||
rc = ioctl(disk_fd, DIOCGMEDIASIZE, &disk_size);
|
||||
if (rc == -1) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: failed to obtain disk size - %d\n", errno);
|
||||
exit(errno);
|
||||
}
|
||||
|
||||
rc = ioctl(disk_fd, DIOCGSECTORSIZE, &disk_sec_size);
|
||||
if (rc == -1) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: failed to obtain disk sector size - %d\n", errno);
|
||||
exit(errno);
|
||||
}
|
||||
|
||||
per_thread_cap = disk_size / options.num_threads;
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: initialized device with capacity %zu bytes ~= %zu MB, sector %u bytes\n", disk_size, disk_size / 1024 / 1024, disk_sec_size);
|
||||
|
||||
parse_pattern(options.pattern_spec, &options.read_pct, &options.addr_mode);
|
||||
dump_options();
|
||||
|
||||
output_file.open(options.output_file, std::ofstream::out);
|
||||
if (!output_file) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: failed to open output file %s\n", options.output_file);
|
||||
rc = EINVAL;
|
||||
goto end;
|
||||
}
|
||||
|
||||
cur_core = cmask_get_next_cpu(&options.cpumask);
|
||||
while(cur_core != NEXT_CPU_NULL) {
|
||||
auto * ctx = new struct thread_context;
|
||||
memset(ctx, 0, sizeof(struct thread_context));
|
||||
|
||||
if (ctx == NULL) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: failed to alloc thread ctx.\n");
|
||||
exit(ENOMEM);
|
||||
}
|
||||
|
||||
ctx->tid = tid++;
|
||||
|
||||
ctx->sockid = nm_get_node_from_core(cur_core);
|
||||
ctx->coreid = cur_core;
|
||||
ctx->io_records = new std::list<struct io_record *>();
|
||||
ctx->start_region_length = per_thread_cap;
|
||||
ctx->start_region_offset = per_thread_cap * ctx->tid;
|
||||
ctx->disk_fd = disk_fd;
|
||||
|
||||
// create sys thread
|
||||
pthread_attr_t attr;
|
||||
cpuset_t scpuset;
|
||||
CPU_ZERO(&scpuset);
|
||||
CPU_SET(cur_core, &scpuset);
|
||||
pthread_attr_init(&attr);
|
||||
pthread_attr_setaffinity_np(&attr, sizeof(cpuset_t), &scpuset);
|
||||
rc = pthread_create(&ctx->sys_thread, &attr, worker_thread_main, ctx);
|
||||
if (rc != 0) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: failed to create sys thread: %d\n", rc);
|
||||
rc = EINVAL;
|
||||
goto end;
|
||||
}
|
||||
worker_threads.push_back(ctx);
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: created worker thread %d on core %d socket %d offset 0x%lx length %ld\n", ctx->tid, cur_core, ctx->sockid,
|
||||
ctx->start_region_offset,
|
||||
ctx->start_region_length);
|
||||
|
||||
cur_core = cmask_get_next_cpu(&options.cpumask);
|
||||
}
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "main: waiting for worker thread init...\n");
|
||||
while(worker_thread_init_cnt.load() < options.num_threads) {
|
||||
}
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "main: starting worker threads...\n");
|
||||
worker_start.store(1);
|
||||
|
||||
/* main event loop */
|
||||
while(current_s < options.time) {
|
||||
if (current_s >= options.warmup && record_cutoff_time == 0) {
|
||||
record_cutoff_time = get_cur_ts_nano();
|
||||
}
|
||||
usleep(1 * S2US);
|
||||
current_s++;
|
||||
}
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "main: stopping worker threads...\n");
|
||||
worker_stop.store(1);
|
||||
|
||||
while(worker_thread_stop_cnt.load() < options.num_threads) {
|
||||
}
|
||||
|
||||
// keep stats
|
||||
for (struct thread_context * tctx : worker_threads) {
|
||||
uint64_t last_ts = 0;
|
||||
uint64_t processed = 0;
|
||||
for (struct io_record * r : *tctx->io_records) {
|
||||
if (r->start_ts >= record_cutoff_time) {
|
||||
if (r->end_ts > last_ts) {
|
||||
last_ts = r->end_ts;
|
||||
}
|
||||
|
||||
processed++;
|
||||
output_file << r->end_ts - r->start_ts << std::endl;
|
||||
total_reqs++;
|
||||
}
|
||||
}
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: thread %d processed requests: %lu, last request %lu. Overhead - avg %lu min %lu max %lu\n",
|
||||
tctx->tid, processed, last_ts, tctx->overhead_avg, tctx->overhead_min, tctx->overhead_max);
|
||||
}
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: total requests: %u, bytes per second: %lu\n",
|
||||
total_reqs, total_reqs * options.req_size / (options.time - options.warmup));
|
||||
|
||||
end:
|
||||
if (disk_fd != -1) {
|
||||
close(disk_fd);
|
||||
}
|
||||
|
||||
output_file.close();
|
||||
|
||||
for (struct thread_context * tctx : worker_threads) {
|
||||
for (struct io_record * r : *tctx->io_records) {
|
||||
delete r;
|
||||
}
|
||||
delete tctx->io_records;
|
||||
delete tctx;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
ntr_init();
|
||||
ntr_set_level(NTR_DEP_USER1, NTR_LEVEL_INFO);
|
||||
|
||||
int c;
|
||||
while (( c = getopt(argc, argv, "VD:k:a:b:q:Q:P:I:t:w:o:")) != -1)
|
||||
{
|
||||
switch (c) {
|
||||
case 'V':
|
||||
ntr_set_level(NTR_DEP_USER1,
|
||||
ntr_get_level(NTR_DEP_USER1) + 1);
|
||||
break;
|
||||
case 'D':
|
||||
strncpy(options.dev_name, optarg, MAX_DEV_NAME_LEN);
|
||||
break;
|
||||
case 'k':
|
||||
strncpy(options.driver_name, optarg, MAX_DEV_NAME_LEN);
|
||||
break;
|
||||
case 'a':
|
||||
options.cpumask = strtoull(optarg, nullptr, 16);
|
||||
options.num_threads = cmask_get_num_cpus(
|
||||
options.cpumask);
|
||||
|
||||
if (options.num_threads == 0) {
|
||||
fprintf(stderr,
|
||||
"must run at least one thread\n");
|
||||
return EINVAL;
|
||||
}
|
||||
break;
|
||||
case 'b':
|
||||
options.req_size = strtoull(
|
||||
optarg, nullptr, 10);
|
||||
break;
|
||||
case 'q':
|
||||
options.rps = strtoull(
|
||||
optarg, nullptr, 10);
|
||||
break;
|
||||
case 'Q':
|
||||
options.queue_depth = strtoull(
|
||||
optarg, nullptr, 10);
|
||||
break;
|
||||
case 'P':
|
||||
strncpy(options.pattern_spec, optarg, MAX_SPEC_LEN);
|
||||
break;
|
||||
case 'I':
|
||||
strncpy(options.ia_spec, optarg, MAX_SPEC_LEN);
|
||||
break;
|
||||
case 't':
|
||||
options.time = strtoull(
|
||||
optarg, nullptr, 10);
|
||||
break;
|
||||
case 'w':
|
||||
options.warmup = strtoull(
|
||||
optarg, nullptr, 10);
|
||||
break;
|
||||
case 'o':
|
||||
strncpy(options.output_file, optarg, MAX_OUTPUT_FILE_LEN);
|
||||
break;
|
||||
case 'h':
|
||||
usage();
|
||||
exit(0);
|
||||
default:
|
||||
usage();
|
||||
exit(EINVAL);
|
||||
}
|
||||
}
|
||||
|
||||
nm_init(options.verbosity);
|
||||
birb_main();
|
||||
|
||||
return 0;
|
||||
}
|
@ -1,95 +0,0 @@
|
||||
#include <sys/endian.h>
|
||||
#include "storage/drivers/bdev.hh"
|
||||
#include "ntr.h"
|
||||
#include "spdk/bdev.h"
|
||||
#include "spdk/thread.h"
|
||||
|
||||
size_t
|
||||
birb_bdev_driver::get_capacity()
|
||||
{
|
||||
return block_num * block_sz;
|
||||
}
|
||||
|
||||
birb_driver::birb_driver_status
|
||||
birb_bdev_driver::get_status()
|
||||
{
|
||||
return this->status;
|
||||
}
|
||||
|
||||
void
|
||||
birb_bdev_driver::bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev * bdev UNUSED,
|
||||
void * event_ctx UNUSED)
|
||||
{
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING, "bdev_event_cb: unsupported bdev event: type %d\n", type);
|
||||
}
|
||||
|
||||
void
|
||||
birb_bdev_driver::print_all_bdev()
|
||||
{
|
||||
struct spdk_bdev * cur = spdk_bdev_first();
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "birb_bdev_driver: all registered block devices: ");
|
||||
|
||||
while(cur != NULL) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "%s, ", spdk_bdev_get_name(cur));
|
||||
cur = spdk_bdev_next(cur);
|
||||
}
|
||||
}
|
||||
|
||||
birb_bdev_driver::birb_bdev_driver(const char * dev_name) : bdev_desc(nullptr),
|
||||
bdev(nullptr),
|
||||
block_sz(0),
|
||||
block_num(0),
|
||||
status(BIRB_FAIL)
|
||||
{
|
||||
int rc;
|
||||
|
||||
rc = spdk_bdev_open_ext(dev_name, true, birb_bdev_driver::bdev_event_cb, NULL, &this->bdev_desc);
|
||||
|
||||
if (rc != 0) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "birb_bdev_driver: failed to open bdev: %d\n", rc);
|
||||
return;
|
||||
}
|
||||
|
||||
/* A bdev pointer is valid while the bdev is opened. */
|
||||
this->bdev = spdk_bdev_desc_get_bdev(this->bdev_desc);
|
||||
this->block_sz = spdk_bdev_get_block_size(this->bdev);
|
||||
this->block_num = spdk_bdev_get_num_blocks(this->bdev);
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "birb_bdev_driver: bdev block size %zu bytes, blocks count %zu\n", this->block_sz, this->block_num);
|
||||
|
||||
this->status = BIRB_SUCCESS;
|
||||
}
|
||||
|
||||
birb_bdev_driver::~birb_bdev_driver()
|
||||
{
|
||||
if (this->status == BIRB_SUCCESS) {
|
||||
spdk_bdev_close(this->bdev_desc);
|
||||
}
|
||||
}
|
||||
|
||||
birb_driver::birb_driver_type
|
||||
birb_bdev_driver::get_type()
|
||||
{
|
||||
return BIRB_DRV_BDEV;
|
||||
}
|
||||
|
||||
size_t
|
||||
birb_bdev_driver::get_align()
|
||||
{
|
||||
return spdk_bdev_get_buf_align(this->bdev);
|
||||
}
|
||||
|
||||
|
||||
struct spdk_bdev *
|
||||
birb_bdev_driver::get_bdev()
|
||||
{
|
||||
return this->bdev;
|
||||
}
|
||||
|
||||
|
||||
struct spdk_bdev_desc *
|
||||
birb_bdev_driver::get_bdev_desc()
|
||||
{
|
||||
return this->bdev_desc;
|
||||
}
|
@ -1,72 +0,0 @@
|
||||
#include <sys/endian.h>
|
||||
#include "storage/drivers/bdev.hh"
|
||||
#include "ntr.h"
|
||||
#include "spdk/bdev.h"
|
||||
#include "spdk/thread.h"
|
||||
|
||||
birb_bdev_thread_context::birb_bdev_thread_context(birb_bdev_driver * driver) : io_channel(nullptr),
|
||||
status(birb_driver::BIRB_FAIL),
|
||||
driver(driver)
|
||||
{
|
||||
struct spdk_bdev_desc * desc = driver->get_bdev_desc();
|
||||
|
||||
// obtain io channel
|
||||
this->io_channel = spdk_bdev_get_io_channel(desc);
|
||||
if (io_channel == nullptr) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "birb_bdev_thread_context: could not create bdev I/O channel!\n");
|
||||
}
|
||||
|
||||
|
||||
this->status = birb_driver::BIRB_SUCCESS;
|
||||
}
|
||||
|
||||
birb_driver::birb_driver_status
|
||||
birb_bdev_thread_context::get_status()
|
||||
{
|
||||
return this->status;
|
||||
}
|
||||
|
||||
birb_bdev_thread_context::~birb_bdev_thread_context()
|
||||
{
|
||||
if (this->io_channel != nullptr) {
|
||||
spdk_put_io_channel(this->io_channel);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Callback function for io completion.
|
||||
*/
|
||||
|
||||
void
|
||||
birb_bdev_thread_context::io_callback(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
|
||||
{
|
||||
spdk_bdev_free_io(bdev_io);
|
||||
|
||||
auto ctx = reinterpret_cast<struct cb_context *>(cb_arg);
|
||||
ctx->cb(success, ctx->ctx);
|
||||
delete ctx;
|
||||
}
|
||||
|
||||
int
|
||||
birb_bdev_thread_context::read(size_t offset, size_t size, char * buffer, callback callback, void * context)
|
||||
{
|
||||
auto ctx = new struct cb_context;
|
||||
ctx->cb = callback;
|
||||
ctx->ctx = context;
|
||||
return spdk_bdev_read(driver->get_bdev_desc(), this->io_channel, buffer, offset, size, io_callback, reinterpret_cast<void*>(ctx));
|
||||
}
|
||||
|
||||
int
|
||||
birb_bdev_thread_context::write(size_t offset, size_t size, char * buffer, callback callback, void * context)
|
||||
{
|
||||
auto ctx = new struct cb_context;
|
||||
ctx->cb = callback;
|
||||
ctx->ctx = context;
|
||||
return spdk_bdev_write(driver->get_bdev_desc(), this->io_channel, buffer, offset, size, io_callback, reinterpret_cast<void*>(ctx));
|
||||
}
|
||||
|
||||
void
|
||||
birb_bdev_thread_context::poll()
|
||||
{
|
||||
return;
|
||||
}
|
@ -1,135 +0,0 @@
|
||||
#include <sys/endian.h>
|
||||
#include "ntr.h"
|
||||
#include "spdk/nvme.h"
|
||||
#include "spdk/thread.h"
|
||||
#include "storage/drivers/nvme.hh"
|
||||
|
||||
size_t
|
||||
birb_nvme_driver::get_capacity()
|
||||
{
|
||||
return spdk_nvme_ns_get_size(this->ns);
|
||||
}
|
||||
|
||||
birb_driver::birb_driver_status
|
||||
birb_nvme_driver::get_status()
|
||||
{
|
||||
return this->status;
|
||||
}
|
||||
|
||||
void
|
||||
birb_nvme_driver::attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
|
||||
struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts UNUSED)
|
||||
{
|
||||
struct spdk_nvme_ns * ns;
|
||||
auto ctx = reinterpret_cast<struct attach_context *>(cb_ctx);
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "birb_nvme_driver: attached to nvme at %s\n", trid->traddr);
|
||||
|
||||
for (int nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); nsid != 0;
|
||||
nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid)) {
|
||||
ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
|
||||
if (ns == nullptr || !spdk_nvme_ns_is_active(ns)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "birb_nvme_driver: namespace id: %d size: %zu LBA size: %u\n", spdk_nvme_ns_get_id(ns), spdk_nvme_ns_get_size(ns), spdk_nvme_ns_get_sector_size(ns));
|
||||
/* XXX: use the first namespace */
|
||||
break;
|
||||
}
|
||||
|
||||
*ctx->ns = ns;
|
||||
*ctx->ctrlr = ctrlr;
|
||||
ctx->valid = 1;
|
||||
}
|
||||
|
||||
bool
|
||||
birb_nvme_driver::probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
|
||||
struct spdk_nvme_ctrlr_opts *opts UNUSED)
|
||||
{
|
||||
printf("birb_nvme_driver: found nvme at %s\n", trid->traddr);
|
||||
auto ctx = reinterpret_cast<struct attach_context *>(cb_ctx);
|
||||
|
||||
if (strcmp(trid->traddr, ctx->dev_name) == 0) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
birb_nvme_driver::birb_nvme_driver(const char * dev_name) : status(BIRB_FAIL),
|
||||
ctrlr(nullptr),
|
||||
ns(nullptr),
|
||||
opts()
|
||||
{
|
||||
int rc;
|
||||
struct spdk_nvme_transport_id trid;
|
||||
struct attach_context ctx;
|
||||
ctx.ctrlr = &this->ctrlr;
|
||||
ctx.ns = &this->ns;
|
||||
ctx.dev_name = dev_name;
|
||||
ctx.valid = 0;
|
||||
|
||||
spdk_nvme_trid_populate_transport(&trid, SPDK_NVME_TRANSPORT_PCIE);
|
||||
snprintf(trid.subnqn, sizeof(trid.subnqn), "%s", SPDK_NVMF_DISCOVERY_NQN);
|
||||
|
||||
rc = spdk_nvme_probe(&trid, reinterpret_cast<void *>(&ctx), probe_cb, attach_cb, nullptr);
|
||||
if (rc != 0) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "birb_nvme_driver: failed to probe nvme device: %d\n", rc);
|
||||
goto end;
|
||||
}
|
||||
|
||||
if (ctx.valid != 1) {
|
||||
rc = EINVAL;
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "birb_nvme_driver: could not find device: %s\n", dev_name);
|
||||
goto end;
|
||||
}
|
||||
|
||||
if (spdk_nvme_ns_get_csi(this->ns) == SPDK_NVME_CSI_ZNS) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "birb_nvme_driver: zoned nvme namespace is unsupported\n");
|
||||
spdk_nvme_detach(this->ctrlr);
|
||||
goto end;
|
||||
} else {
|
||||
spdk_nvme_ctrlr_get_default_io_qpair_opts(ctrlr, &this->opts, sizeof(this->opts));
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "birb_nvme_driver: io queue depth: %d io queue requests: %d\n", opts.io_queue_size, opts.io_queue_requests);
|
||||
this->status = BIRB_SUCCESS;
|
||||
}
|
||||
|
||||
end:
|
||||
return;
|
||||
}
|
||||
|
||||
birb_nvme_driver::~birb_nvme_driver()
|
||||
{
|
||||
if (this->ctrlr != nullptr) {
|
||||
spdk_nvme_detach(this->ctrlr);
|
||||
}
|
||||
}
|
||||
|
||||
birb_driver::birb_driver_type
|
||||
birb_nvme_driver::get_type()
|
||||
{
|
||||
return BIRB_DRV_NVME;
|
||||
}
|
||||
|
||||
size_t
|
||||
birb_nvme_driver::get_align()
|
||||
{
|
||||
return 0x1000;
|
||||
}
|
||||
|
||||
spdk_nvme_ctrlr *
|
||||
birb_nvme_driver::get_ctrlr()
|
||||
{
|
||||
return this->ctrlr;
|
||||
}
|
||||
|
||||
spdk_nvme_ns *
|
||||
birb_nvme_driver::get_ns()
|
||||
{
|
||||
return this->ns;
|
||||
}
|
||||
|
||||
spdk_nvme_io_qpair_opts *
|
||||
birb_nvme_driver::get_io_qpair_opts()
|
||||
{
|
||||
return &this->opts;
|
||||
}
|
@ -1,90 +0,0 @@
|
||||
#include <sys/endian.h>
|
||||
|
||||
#include "storage/drivers/nvme.hh"
|
||||
#include "ntr.h"
|
||||
#include "spdk/bdev.h"
|
||||
#include "spdk/nvme.h"
|
||||
#include "spdk/nvme_spec.h"
|
||||
#include "spdk/thread.h"
|
||||
|
||||
birb_nvme_thread_context::birb_nvme_thread_context(birb_nvme_driver * driver) : status(birb_driver::BIRB_FAIL),
|
||||
driver(driver),
|
||||
qpair(nullptr)
|
||||
{
|
||||
struct spdk_nvme_ctrlr * ctrlr = driver->get_ctrlr();
|
||||
struct spdk_nvme_qpair * qpair = spdk_nvme_ctrlr_alloc_io_qpair(ctrlr, driver->get_io_qpair_opts(), sizeof(struct spdk_nvme_io_qpair_opts));
|
||||
if (qpair == nullptr) {
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "birb_nvme_thread_context: could not allocate qpairs.\n");
|
||||
} else {
|
||||
this->qpair = qpair;
|
||||
status = birb_driver::BIRB_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
birb_driver::birb_driver_status
|
||||
birb_nvme_thread_context::get_status()
|
||||
{
|
||||
return this->status;
|
||||
}
|
||||
|
||||
birb_nvme_thread_context::~birb_nvme_thread_context()
|
||||
{
|
||||
if (this->qpair != nullptr) {
|
||||
spdk_nvme_ctrlr_free_io_qpair(this->qpair);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Callback function for io completion.
|
||||
*/
|
||||
void
|
||||
birb_nvme_thread_context::io_callback(void *arg, const struct spdk_nvme_cpl *completion)
|
||||
{
|
||||
bool success = !spdk_nvme_cpl_is_error(completion);
|
||||
auto ctx = reinterpret_cast<struct cb_context *>(arg);
|
||||
ctx->cb(success, ctx->ctx);
|
||||
delete ctx;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
birb_nvme_thread_context::size_to_lba(size_t size, int lba_size)
|
||||
{
|
||||
return (size - 1) / lba_size + 1;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
birb_nvme_thread_context::addr_to_lba(size_t addr, int lba_size)
|
||||
{
|
||||
return addr / lba_size;
|
||||
}
|
||||
|
||||
int
|
||||
birb_nvme_thread_context::read(size_t offset, size_t size, char * buffer, callback callback, void * context)
|
||||
{
|
||||
auto ctx = new struct cb_context;
|
||||
ctx->cb = callback;
|
||||
ctx->ctx = context;
|
||||
|
||||
struct spdk_nvme_ns * ns = this->driver->get_ns();
|
||||
int lba_size = spdk_nvme_ns_get_sector_size(ns);
|
||||
return spdk_nvme_ns_cmd_read(ns, this->qpair, buffer, addr_to_lba(offset, lba_size), size_to_lba(size, lba_size), io_callback, reinterpret_cast<void*>(ctx), 0);
|
||||
}
|
||||
|
||||
int
|
||||
birb_nvme_thread_context::write(size_t offset, size_t size, char * buffer, callback callback, void * context)
|
||||
{
|
||||
auto ctx = new struct cb_context;
|
||||
ctx->cb = callback;
|
||||
ctx->ctx = context;
|
||||
|
||||
struct spdk_nvme_ns * ns = this->driver->get_ns();
|
||||
int lba_size = spdk_nvme_ns_get_sector_size(ns);
|
||||
|
||||
return spdk_nvme_ns_cmd_write(ns, this->qpair, buffer, addr_to_lba(offset, lba_size), size_to_lba(size, lba_size), io_callback, reinterpret_cast<void*>(ctx), 0);
|
||||
}
|
||||
|
||||
void
|
||||
birb_nvme_thread_context::poll()
|
||||
{
|
||||
spdk_nvme_qpair_process_completions(this->qpair, 0);
|
||||
}
|
@ -1,57 +0,0 @@
|
||||
#include <sys/endian.h>
|
||||
#include <random>
|
||||
|
||||
#include "nm.hh"
|
||||
#include "storage/io_gen.hh"
|
||||
|
||||
io_generator::io_generator(
|
||||
unsigned long req_size,
|
||||
unsigned long capacity,
|
||||
unsigned int read_pct,
|
||||
io_generator_address_mode addr_mode) : cur_offset(0),
|
||||
capacity(capacity),
|
||||
req_size(req_size),
|
||||
read_pct(read_pct),
|
||||
addr_mode(addr_mode),
|
||||
rng(rd()),
|
||||
dist(std::uniform_int_distribution<int>(0, 99)),
|
||||
addr_rng(addr_rd()),
|
||||
addr_dist(std::uniform_int_distribution<uint64_t>(0, capacity - 1))
|
||||
{
|
||||
rng.seed(nm_get_uptime_ns());
|
||||
addr_rng.seed(nm_get_uptime_ns());
|
||||
}
|
||||
|
||||
|
||||
/* returns 0 on success */
|
||||
int io_generator::issue(struct io_generator_ctx *ctx, char * buf)
|
||||
{
|
||||
ctx->size = req_size;
|
||||
|
||||
// determine next IO offset
|
||||
if (addr_mode == IOGEN_ADDR_MONOTONIC_INCREASING) {
|
||||
if (cur_offset + req_size > capacity) {
|
||||
cur_offset = 0;
|
||||
}
|
||||
|
||||
ctx->offset = cur_offset;
|
||||
cur_offset = cur_offset + req_size;
|
||||
} else {
|
||||
ctx->offset = (addr_dist(addr_rng) / req_size) * req_size;
|
||||
if (ctx->offset + req_size > capacity) {
|
||||
ctx->offset -= req_size;
|
||||
}
|
||||
}
|
||||
|
||||
// determine next IO data
|
||||
int op_rng = dist(rng);
|
||||
if (op_rng < (int)read_pct) {
|
||||
ctx->op = IOGEN_READ;
|
||||
} else {
|
||||
ctx->op = IOGEN_WRITE;
|
||||
int data = dist(rng);
|
||||
memset(buf, data, req_size);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
@ -1,32 +0,0 @@
|
||||
#include "nms.h"
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
|
||||
int main(void)
|
||||
{
|
||||
void * ret;
|
||||
|
||||
nms_init(1);
|
||||
// duplicate init
|
||||
nms_init(1);
|
||||
|
||||
// 1G
|
||||
ret = nms_malloc(0, 1024 * 1024 * 1024);
|
||||
assert(ret != NULL);
|
||||
printf("1G: %p\n", ret);
|
||||
|
||||
// two 511Ms
|
||||
ret = nms_malloc(0, 511 * 1024 * 1024);
|
||||
assert(ret != NULL);
|
||||
printf("511M: %p\n", ret);
|
||||
ret = nms_malloc(0, 511 * 1024 * 1024);
|
||||
assert(ret != NULL);
|
||||
printf("511M: %p\n", ret);
|
||||
|
||||
// another 1G
|
||||
ret = nms_malloc(0, 1024 * 1024 * 1024);
|
||||
assert(ret != NULL);
|
||||
printf("1G: %p\n", ret);
|
||||
|
||||
return 0;
|
||||
}
|
@ -1,239 +0,0 @@
|
||||
#include <sys/endian.h>
|
||||
#include <sys/select.h>
|
||||
#include <sys/signal.h>
|
||||
#include "gen.hh"
|
||||
#include <array>
|
||||
#include <atomic>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <list>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include "ntr.h"
|
||||
#include "nms.h"
|
||||
#include <getopt.h>
|
||||
#include <pthread.h>
|
||||
#include <unistd.h>
|
||||
#include <topo.h>
|
||||
|
||||
static void
|
||||
usage()
|
||||
{
|
||||
fprintf(stdout,
|
||||
"Usage:\n"
|
||||
" -v: verbose mode\n"
|
||||
" -b: buffer size\n"
|
||||
" -q: bytes per second\n"
|
||||
" -d: destination domain index\n"
|
||||
" -s: worker threads cpu list\n"
|
||||
" -m: pull mode cpu list\n"
|
||||
" -S: enable shared buffer\n"
|
||||
" -t: time to run\n"
|
||||
" -T: transaction size\n"
|
||||
" -i: inter arrival time distribution\n"
|
||||
" -o: output file path\n"
|
||||
" -H: history size for pct adjustment\n"
|
||||
" -M: print this string when threads are ready to run\n");
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
static char output_file[256] = "memloadgen_samples.txt";
|
||||
|
||||
int main(int argc, char * argv[])
|
||||
{
|
||||
ntr_init();
|
||||
ntr_set_level(NTR_DEP_USER1, NTR_LEVEL_WARNING);
|
||||
|
||||
size_t arr_sz = 64 * 1024 * 1024;
|
||||
uint32_t time = -1;
|
||||
uint64_t bps = 0;
|
||||
uint64_t transaction_size = arr_sz;
|
||||
cpuset_t threads, modes;
|
||||
char magic[256] = {0};
|
||||
CPU_ZERO(&threads);
|
||||
CPU_ZERO(&modes);
|
||||
CPU_SET(0, &threads);
|
||||
char ia_dist[32] = "fixed";
|
||||
int history_sz = 5;
|
||||
std::list<uint64_t> history;
|
||||
|
||||
int shared_buffer = 0;
|
||||
int rate_ctrl = 0;
|
||||
cpuset_t domain_mask;
|
||||
CPU_ZERO(&domain_mask);
|
||||
CPU_SET(0, &domain_mask);
|
||||
{
|
||||
int c;
|
||||
// parse arguments
|
||||
while ((c = getopt(argc, argv, "vhb:d:s:m:So:T:t:q:i:H:M:")) != -1) {
|
||||
switch (c) {
|
||||
case 'v':
|
||||
ntr_set_level(NTR_DEP_USER1, ntr_get_level(NTR_DEP_USER1) + 1);
|
||||
break;
|
||||
case 'h':
|
||||
usage();
|
||||
exit(0);
|
||||
case 'b':
|
||||
arr_sz = strtoull(optarg, nullptr, 10);
|
||||
break;
|
||||
case 'd':
|
||||
cpulist_to_cpuset(optarg, &domain_mask);
|
||||
break;
|
||||
case 's':
|
||||
cpulist_to_cpuset(optarg, &threads);
|
||||
break;
|
||||
case 'm':
|
||||
cpulist_to_cpuset(optarg, &modes);
|
||||
break;
|
||||
case 'S':
|
||||
shared_buffer = 1;
|
||||
break;
|
||||
case 'o':
|
||||
strncpy(output_file, optarg, 256);
|
||||
break;
|
||||
case 't':
|
||||
time = strtoul(optarg, nullptr, 10);
|
||||
break;
|
||||
case 'T':
|
||||
transaction_size = strtoul(optarg, nullptr, 10);
|
||||
break;
|
||||
case 'q':
|
||||
bps = (uint64_t)strtoull(optarg, nullptr, 10);
|
||||
break;
|
||||
case 'i':
|
||||
strncpy(ia_dist, optarg, sizeof(ia_dist));
|
||||
break;
|
||||
case 'H':
|
||||
history_sz = strtol(optarg, nullptr, 10);
|
||||
break;
|
||||
case 'M':
|
||||
strncpy(magic, optarg, sizeof(magic));
|
||||
break;
|
||||
default:
|
||||
usage();
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "Configruation:\n"
|
||||
" buffer size: %ld\n"
|
||||
" num threads: %d\n"
|
||||
" target domain: %ld\n"
|
||||
" bytes per second: %lu\n"
|
||||
" interarrival distribution: %s\n"
|
||||
" shared buffer: %d\n"
|
||||
" transaction time: %lu\n"
|
||||
" runtime: %d\n"
|
||||
" history: %d\n"
|
||||
" magic: %s\n",
|
||||
arr_sz, CPU_COUNT(&threads),
|
||||
CPU_FFS(&domain_mask) - 1, bps,
|
||||
ia_dist, shared_buffer,
|
||||
transaction_size,time, history_sz, magic);
|
||||
|
||||
// init topo
|
||||
if (topo_init(ntr_get_level(NTR_DEP_USER1) != NTR_LEVEL_DEFAULT)) {
|
||||
fprintf(stderr, "libtopo init failed!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// init
|
||||
if (nms_init(ntr_get_level(NTR_DEP_USER1) != NTR_LEVEL_DEFAULT)) {
|
||||
fprintf(stderr, "libnms init failed!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
bool success = false;
|
||||
memload_generator::memload_generator_options opts;
|
||||
opts.buffer_size = arr_sz;
|
||||
opts.trans_per_second = bps / transaction_size;
|
||||
opts.shared_buffer = shared_buffer;
|
||||
opts.transaction_size = transaction_size;
|
||||
opts.verbose = ntr_get_level(NTR_DEP_USER1) != NTR_LEVEL_DEFAULT;
|
||||
strncpy(opts.ia_dist, ia_dist, sizeof(opts.ia_dist));
|
||||
std::ofstream ofile;
|
||||
ofile.open(output_file, std::ios::out | std::ios::trunc);
|
||||
|
||||
auto mgen = new memload_generator(&threads, &modes, &domain_mask, &opts, &success);
|
||||
if (strlen(magic) > 0) {
|
||||
fprintf(stdout, "%s\n", magic);
|
||||
fflush(stdout);
|
||||
}
|
||||
if (!mgen->start()) {
|
||||
fprintf(stderr, "failed to start memloadgen!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
struct timeval stval;
|
||||
stval.tv_sec = 0;
|
||||
stval.tv_usec = 0;
|
||||
char pct_line[64] = {0};
|
||||
|
||||
uint64_t prev_ts = topo_uptime_ns();
|
||||
uint64_t prev_trans = mgen->get_transactions();
|
||||
uint32_t cur_time = 0;
|
||||
while(cur_time < time) {
|
||||
usleep(S2US);
|
||||
uint64_t cur_ts = topo_uptime_ns();
|
||||
uint64_t trans = mgen->get_transactions();
|
||||
uint64_t bps = (uint64_t)((double)((trans - prev_trans) * transaction_size) / ((double)(cur_ts - prev_ts) / (double)S2NS));
|
||||
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "%ldB,%ldM\n", bps, bps / 1024 / 1024);
|
||||
ofile << "s," << cur_time << "," << bps << std::endl;
|
||||
ofile.flush();
|
||||
|
||||
prev_ts = cur_ts;
|
||||
prev_trans = trans;
|
||||
cur_time++;
|
||||
|
||||
if (rate_ctrl == 0) {
|
||||
// keep history
|
||||
history.emplace_back(bps);
|
||||
if ((int)history.size() > history_sz) {
|
||||
history.pop_front();
|
||||
}
|
||||
|
||||
fd_set fdset;
|
||||
FD_ZERO(&fdset);
|
||||
FD_SET(STDIN_FILENO, &fdset);
|
||||
int ret = select(1, &fdset, NULL, NULL, &stval);
|
||||
if (ret < 0) {
|
||||
if (errno != EINTR) {
|
||||
fprintf(stderr, "select() failed with %d\n", errno);
|
||||
exit(1);
|
||||
}
|
||||
} else if (ret > 0) {
|
||||
if (FD_ISSET(STDIN_FILENO, &fdset)) {
|
||||
ret = read(STDIN_FILENO, pct_line, sizeof(pct_line) - 1);
|
||||
if (ret < 0) {
|
||||
fprintf(stderr, "read() failed with %d\n", errno);
|
||||
exit(1);
|
||||
}
|
||||
unsigned int pct = strtoul(pct_line, NULL, 10);
|
||||
uint64_t sum = 0;
|
||||
size_t sz = history.size();
|
||||
while (history.size() > 0) {
|
||||
sum += history.front();
|
||||
history.pop_front();
|
||||
}
|
||||
|
||||
uint64_t newbps = ((sum / sz) * (double)pct / 100.0);
|
||||
mgen->set_transactions(newbps / transaction_size);
|
||||
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "adjusted target bps to %u%% = %ldB ~= %ldM\n", pct, newbps, newbps / 1024 / 1024);
|
||||
|
||||
ofile << "p," << cur_time << "," << pct << std::endl;
|
||||
ofile.flush();
|
||||
|
||||
rate_ctrl = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
mgen->stop();
|
||||
delete mgen;
|
||||
ofile.close();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
237
util/mornafah.c
237
util/mornafah.c
@ -1,237 +0,0 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "nms.h"
|
||||
#include <getopt.h>
|
||||
#include <unistd.h>
|
||||
#include <topo.h>
|
||||
#include <immintrin.h>
|
||||
#include <x86intrin.h>
|
||||
#include <stdatomic.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
#include <math.h>
|
||||
#include <errno.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <sys/cpuset.h>
|
||||
#include <sys/sysctl.h>
|
||||
#include <pthread.h>
|
||||
#include <pthread_np.h>
|
||||
|
||||
#define BUFFER_SIZE (128 * 1024 * 1024)
|
||||
#define BUFFER_CNT (BUFFER_SIZE / sizeof(int))
|
||||
|
||||
static _Atomic int flush = 0;
|
||||
static _Atomic uint64_t offset = 0;
|
||||
static int * remote_buffer = NULL;
|
||||
static uint64_t * latencies;
|
||||
static int times = 100;
|
||||
static int local_core = 0;
|
||||
static int remote_core = 1;
|
||||
static int cache_mode = 0;
|
||||
static int verbose = 0;
|
||||
static int random_access = 0;
|
||||
static uint64_t tsc_freq = 0;
|
||||
|
||||
static inline uint64_t cyc2ns(uint64_t cyc)
|
||||
{
|
||||
return (double)cyc / ((double)tsc_freq / 1000000000.0);
|
||||
}
|
||||
|
||||
static inline uint64_t read_time(void)
|
||||
{
|
||||
uint64_t l;
|
||||
unsigned int a;
|
||||
l = __rdtscp(&a);
|
||||
_mm_lfence();
|
||||
return l;
|
||||
}
|
||||
|
||||
static void * local_thread(void *)
|
||||
{
|
||||
int temp, *addr;
|
||||
uint64_t start, end;
|
||||
printf("Local thread running...\n");
|
||||
while(times > 0) {
|
||||
if (random_access) {
|
||||
// change offset
|
||||
offset = (rand() % BUFFER_CNT) * sizeof(int);
|
||||
}
|
||||
|
||||
flush = 1;
|
||||
while(flush != 0) {
|
||||
}
|
||||
|
||||
addr = (int *)((char *)remote_buffer + offset);
|
||||
|
||||
if (verbose > 1) {
|
||||
printf("Local thread(%d): flushing %p.\n", local_core, addr);
|
||||
}
|
||||
|
||||
_mm_clflush(addr);
|
||||
_mm_mfence();
|
||||
|
||||
atomic_signal_fence(memory_order_seq_cst);
|
||||
|
||||
start = read_time();
|
||||
temp = *addr;
|
||||
end = read_time();
|
||||
|
||||
atomic_signal_fence(memory_order_seq_cst);
|
||||
|
||||
if (verbose > 1) {
|
||||
printf("Local thread(%d): read %p.\n", local_core, addr);
|
||||
}
|
||||
|
||||
latencies[times - 1] = end - start;
|
||||
times--;
|
||||
}
|
||||
|
||||
return (void *)(uintptr_t)temp;
|
||||
}
|
||||
|
||||
static void * remote_thread(void *)
|
||||
{
|
||||
int temp;
|
||||
int * addr;
|
||||
printf("Remote thread running...\n");
|
||||
while(1) {
|
||||
while(flush == 0) {
|
||||
}
|
||||
|
||||
addr = (int *)((char *)remote_buffer + offset);
|
||||
|
||||
if(cache_mode) {
|
||||
temp = *addr;
|
||||
_mm_mfence();
|
||||
} else {
|
||||
_mm_clflush(addr);
|
||||
_mm_mfence();
|
||||
}
|
||||
|
||||
if (verbose > 1) {
|
||||
printf("Remote thread(%d): %p %s.\n", remote_core, addr, cache_mode ? "read into cache" : "flushed");
|
||||
}
|
||||
|
||||
flush = 0;
|
||||
}
|
||||
return (void *)(uintptr_t)temp;
|
||||
}
|
||||
|
||||
int main(int argc, char * argv[])
|
||||
{
|
||||
{
|
||||
int c;
|
||||
// parse arguments
|
||||
while ((c = getopt(argc, argv, "l:r:t:vR")) != -1) {
|
||||
switch (c) {
|
||||
case 'l':
|
||||
local_core = atoi(optarg);
|
||||
break;
|
||||
case 'r':
|
||||
remote_core = atoi(optarg);
|
||||
break;
|
||||
case 't':
|
||||
times = atoi(optarg);
|
||||
break;
|
||||
case 'R':
|
||||
random_access = 1;
|
||||
break;
|
||||
case 'v':
|
||||
verbose++;
|
||||
break;
|
||||
default:
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
srand(time(NULL));
|
||||
|
||||
// init topo
|
||||
if (topo_init(1)) {
|
||||
fprintf(stderr, "libtopo init failed!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// init
|
||||
if (nms_init(1)) {
|
||||
fprintf(stderr, "libnms init failed!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
size_t sz = sizeof(tsc_freq);
|
||||
int rc;
|
||||
if ((rc = sysctlbyname("machdep.tsc_freq", &tsc_freq, &sz, NULL, 0)) < 0) {
|
||||
fprintf(stderr,"failed to query tsc frequency via sysctl (%d)\n", errno);
|
||||
} else {
|
||||
fprintf(stdout,"system tsc frequency = %lu\n", tsc_freq);
|
||||
}
|
||||
|
||||
latencies = malloc(sizeof(uint64_t) * times);
|
||||
const int remote_numa = topo_core_to_numa(remote_core);
|
||||
const int local_numa = topo_core_to_numa(local_core);
|
||||
const int total = times;
|
||||
|
||||
remote_buffer = nms_malloc(remote_numa, BUFFER_SIZE);
|
||||
// fill with random values
|
||||
for (int i = 0; i < BUFFER_SIZE; i++) {
|
||||
remote_buffer[i] = rand();
|
||||
}
|
||||
|
||||
pthread_attr_t lattr, rattr;
|
||||
pthread_t lthread, rthread;
|
||||
cpuset_t lcpuset, rcpuset;
|
||||
CPU_ZERO(&lcpuset);
|
||||
CPU_ZERO(&rcpuset);
|
||||
|
||||
CPU_SET(local_core, &lcpuset);
|
||||
CPU_SET(remote_core, &rcpuset);
|
||||
|
||||
pthread_attr_init(&rattr);
|
||||
pthread_attr_setaffinity_np(&rattr, sizeof(cpuset_t), &rcpuset);
|
||||
pthread_attr_init(&lattr);
|
||||
pthread_attr_setaffinity_np(&lattr, sizeof(cpuset_t), &lcpuset);
|
||||
|
||||
printf("local thread: %d numa: %d, remote: %d numa: %d\n", local_core, local_numa, remote_core, remote_numa);
|
||||
pthread_create(<hread, &lattr, local_thread, NULL);
|
||||
pthread_create(&rthread, &rattr, remote_thread, NULL);
|
||||
|
||||
pthread_join(lthread, NULL);
|
||||
|
||||
uint64_t min = UINT64_MAX;
|
||||
uint64_t max = 0;
|
||||
uint64_t sum = 0;
|
||||
for (int i = total - 1; i >= 0; i--) {
|
||||
if (verbose) {
|
||||
printf("%lu,\n", latencies[i]);
|
||||
}
|
||||
if (min > latencies[i]) {
|
||||
min = latencies[i];
|
||||
}
|
||||
if (max < latencies[i]) {
|
||||
max = latencies[i];
|
||||
}
|
||||
sum += latencies[i];
|
||||
}
|
||||
|
||||
double var = 0.0;
|
||||
double avg = (double)sum / (double)total;
|
||||
for (int i = total - 1; i >= 0; i--) {
|
||||
var += pow(latencies[i] - avg, 2);
|
||||
}
|
||||
var = sqrt(var / avg);
|
||||
|
||||
printf("Avg: %lu cycles (%lu ns)\n"
|
||||
"Std: %lu cycles (%lu ns)\n"
|
||||
"Min: %lu cycles (%lu ns)\n"
|
||||
"Max: %lu cycles (%lu ns)\n",
|
||||
(uint64_t)avg, cyc2ns((uint64_t)avg),
|
||||
(uint64_t)var, cyc2ns((uint64_t)var),
|
||||
min, cyc2ns(min),
|
||||
max, cyc2ns(max));
|
||||
|
||||
free(latencies);
|
||||
return 0;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user