2016-07-18 15:14:08 -07:00
|
|
|
# NVMf Target Configuration File
|
2016-06-06 14:44:30 -07:00
|
|
|
#
|
|
|
|
# Please write all parameters using ASCII.
|
|
|
|
# The parameter must be quoted if it includes whitespace.
|
|
|
|
#
|
|
|
|
# Configuration syntax:
|
2016-07-18 15:14:08 -07:00
|
|
|
# Leading whitespace is ignored.
|
|
|
|
# Lines starting with '#' are comments.
|
2016-06-06 14:44:30 -07:00
|
|
|
# Lines ending with '\' are concatenated with the next line.
|
2016-07-18 15:14:08 -07:00
|
|
|
# Bracketed ([]) names define sections
|
2016-06-06 14:44:30 -07:00
|
|
|
|
2016-07-18 15:14:08 -07:00
|
|
|
[Global]
|
2016-06-06 14:44:30 -07:00
|
|
|
# Tracepoint group mask for spdk trace buffers
|
|
|
|
# Default: 0x0 (all tracepoint groups disabled)
|
2018-08-16 13:01:26 +08:00
|
|
|
# Set to 0xFFFF to enable all tracepoint groups.
|
2016-06-06 14:44:30 -07:00
|
|
|
#TpointGroupMask 0x0
|
|
|
|
|
2018-03-30 10:39:37 -07:00
|
|
|
# PciBlacklist and PciWhitelist cannot be used at the same time
|
|
|
|
#PciBlacklist 0000:01:00.0
|
|
|
|
#PciBlacklist 0000:02:00.0
|
|
|
|
#PciWhitelist 0000:03:00.0
|
|
|
|
#PciWhitelist 0000:04:00.0
|
|
|
|
|
2018-06-11 07:35:48 -07:00
|
|
|
# Users may activate entries in this section to override default values for
|
|
|
|
# global parameters in the block device (bdev) subsystem.
|
|
|
|
[Bdev]
|
|
|
|
# Number of spdk_bdev_io structures allocated in the global bdev subsystem pool.
|
|
|
|
#BdevIoPoolSize 65536
|
|
|
|
|
|
|
|
# Maximum number of spdk_bdev_io structures to cache per thread.
|
|
|
|
#BdevIoCacheSize 256
|
|
|
|
|
2016-08-15 09:16:49 +08:00
|
|
|
# Users may change this section to create a different number or size of
|
|
|
|
# malloc LUNs.
|
|
|
|
# This will generate 8 LUNs with a malloc-allocated backend.
|
|
|
|
# Each LUN will be size 64MB and these will be named
|
|
|
|
# Malloc0 through Malloc7. Not all LUNs defined here are necessarily
|
|
|
|
# used below.
|
|
|
|
[Malloc]
|
|
|
|
NumberOfLuns 8
|
|
|
|
LunSizeInMB 64
|
|
|
|
|
2017-03-01 16:50:29 -07:00
|
|
|
# Users must change this section to match the /dev/sdX devices to be
|
|
|
|
# exported as iSCSI LUNs. The devices are accessed using Linux AIO.
|
|
|
|
# The format is:
|
|
|
|
# AIO <file name> <bdev name>
|
|
|
|
# The file name is the backing device
|
|
|
|
# The bdev name can be referenced from elsewhere in the configuration file.
|
2017-08-09 17:28:32 -07:00
|
|
|
# Block size may be omitted to automatically detect the block size of a disk.
|
2017-01-18 12:10:01 +08:00
|
|
|
[AIO]
|
2017-03-01 16:50:29 -07:00
|
|
|
AIO /dev/sdb AIO0
|
|
|
|
AIO /dev/sdc AIO1
|
2017-08-09 17:28:32 -07:00
|
|
|
AIO /tmp/myfile AIO2 4096
|
2017-01-18 12:10:01 +08:00
|
|
|
|
2018-03-30 10:08:58 -07:00
|
|
|
# PMDK libpmemblk-based block device
|
|
|
|
[Pmem]
|
|
|
|
# Syntax:
|
|
|
|
# Blk <pmemblk pool file name> <bdev name>
|
|
|
|
Blk /path/to/pmem-pool Pmem0
|
|
|
|
|
2016-07-18 15:14:08 -07:00
|
|
|
# Define NVMf protocol global options
|
2016-06-06 14:44:30 -07:00
|
|
|
[Nvmf]
|
2018-11-09 17:02:25 +08:00
|
|
|
# Set how often the acceptor polls for incoming connections. The acceptor is also
|
|
|
|
# responsible for polling existing connections that have gone idle. 0 means continuously
|
|
|
|
# poll. Units in microseconds.
|
|
|
|
AcceptorPollRate 10000
|
|
|
|
|
2019-05-15 20:40:12 +08:00
|
|
|
# Set how the connection is scheduled among multiple threads, current supported string value are
|
|
|
|
# "RoundRobin", "Host", "Transport".
|
|
|
|
# RoundRobin: Schedule the connection with roundrobin manner.
|
|
|
|
# Host: Schedule the connection according to host IP.
|
|
|
|
# Transport: Schedule the connection according to the transport characteristics.
|
|
|
|
# For example, for TCP transport, we can schedule the connection according to socket NAPI_ID info.
|
|
|
|
# The connection which has the same socket NAPI_ID info will be grouped in the same polling group.
|
|
|
|
ConnectionScheduler RoundRobin
|
|
|
|
|
2019-01-30 23:52:04 +08:00
|
|
|
# One valid transport type must be set in each [Transport].
|
|
|
|
# The first is the case of RDMA transport and the second is the case of TCP transport.
|
2018-11-09 17:02:25 +08:00
|
|
|
[Transport]
|
2019-01-27 23:52:39 -05:00
|
|
|
# Set RDMA transport type.
|
2018-11-09 17:02:25 +08:00
|
|
|
Type RDMA
|
2019-01-30 23:52:04 +08:00
|
|
|
|
|
|
|
# Set the maximum number of outstanding I/O per queue.
|
|
|
|
#MaxQueueDepth 128
|
|
|
|
|
|
|
|
# Set the maximum number of submission and completion queues per session.
|
|
|
|
# Setting this to '8', for example, allows for 8 submission and 8 completion queues
|
|
|
|
# per session.
|
|
|
|
#MaxQueuesPerSession 4
|
|
|
|
|
|
|
|
# Set the maximum in-capsule data size. Must be a multiple of 16.
|
|
|
|
# 0 is a valid choice.
|
|
|
|
#InCapsuleDataSize 4096
|
|
|
|
|
|
|
|
# Set the maximum I/O size. Must be a multiple of 4096.
|
|
|
|
#MaxIOSize 131072
|
|
|
|
|
|
|
|
# Set the I/O unit size, and this value should not be larger than MaxIOSize
|
|
|
|
#IOUnitSize 131072
|
|
|
|
|
|
|
|
# Set the maximum number of IO for admin queue
|
|
|
|
#MaxAQDepth 32
|
|
|
|
|
|
|
|
# Set the number of pooled data buffers available to the transport
|
|
|
|
# It is used to provide the read/write data buffers for the qpairs on this transport.
|
|
|
|
#NumSharedBuffers 512
|
|
|
|
|
|
|
|
# Set the number of shared buffers to be cached per poll group
|
|
|
|
#BufCacheSize 32
|
|
|
|
|
nvmf/rdma: Add shared receive queue support
This is a new feature for NVMEoF RDMA target, that is intended to save
resource allocation (by sharing them) and utilize the
locality (completions and memory) to get the best performance with
Shared Receive Queues (SRQs). We'll create a SRQ per core (poll
group), per device and associate each created QP/CQ with an
appropriate SRQ.
Our testing environment has 2 hosts.
Host 1:
CPU: Intel(R) Xeon(R) CPU E5-2609 0 @ 2.40GHz dual socket (8 cores total)
Network: ConnectX-5, ConnectX-5 VPI , 100GbE, single-port QSFP28, PCIe3.0 x16
Disk: Intel Optane SSD 900P Series
OS: Fedora 27 x86_64
Host 2:
CPU: Intel(R) Xeon(R) CPU E5-2630 v2 @ 2.60GHz dual-socket (24 cores total)
Network: ConnectX-4 VPI , 100GbE, dual-port QSFP28
Disk: Intel Optane SSD 900P Series
OS : CentOS 7.5.1804 x86_64
Hosts are connected via Spectrum switch.
Host 1 is running SPDK NVMeoF target.
Host 2 is used as initiator running fio with SPDK plugin.
Configuration:
- SPDK NVMeoF target: cpu mask 0x0F (4 cores), max queue depth 128,
max SRQ depth 1024, max QPs per controller 1024
- Single NVMf subsystem with single namespace backed by physical SSD disk
- fio with SPDK plugin: randread pattern, 1-256 jobs, block size 4k,
IO depth 16, cpu_mask 0xFFF0, IO rate 10k, rate process “poisson”
Here is a full fio command line:
fio --name=Job --stats=1 --group_reporting=1 --idle-prof=percpu \
--loops=1 --numjobs=1 --thread=1 --time_based=1 --runtime=30s \
--ramp_time=5s --bs=4k --size=4G --iodepth=16 --readwrite=randread \
--rwmixread=75 --randrepeat=1 --ioengine=spdk --direct=1 \
--gtod_reduce=0 --cpumask=0xFFF0 --rate_iops=10k \
--rate_process=poisson \
--filename='trtype=RDMA adrfam=IPv4 traddr=1.1.79.1 trsvcid=4420 ns=1'
SPDK allocates the following entities for every work request in
receive queue (shared or not): reqs (1024 bytes), recvs (96 bytes),
cmds (64 bytes), cpls (16 bytes), in_capsule_buffer. All except the
last one are fixed size. In capsule data size is configured to 4096.
Memory consumption calculation (target):
- Multiple SRQ: core_num * ib_devs_num * SRQ_depth * (1200 +
in_capsule_data_size)
- Multiple RQ: queue_num * RQ_depth * (1200 + in_capsule_data_size)
We ignore admin queues in calculations for simplicity.
Cases:
1. Multiple SRQ with 1024 entries:
- Mem = 4 * 1 * 1024 * (1200 + 4096) = 20.7 MiB
(Constant number – does not depend on initiators number)
2. RQ with 128 entries for 64 initiators:
- Mem = 64 * 128 * (1200 + 4096) = 41.4 MiB
Results:
FIO_JOBS kIOPS Bandwidth,MiB/s AvgLatency,us MaxResidentSize,kiB
RQ SRQ RQ SRQ RQ SRQ RQ SRQ
1 8.623 8.623 33.7 33.7 13.89 14.03 144376 155624
2 17.3 17.3 67.4 67.4 14.03 14.1 145776 155700
4 34.5 34.5 135 135 14.15 14.23 146540 156184
8 69.1 69.1 270 270 14.64 14.49 148116 156960
16 138 138 540 540 14.84 15.38 151216 158668
32 276 276 1079 1079 16.5 16.61 157560 161936
64 513 502 2005 1960 1673 1612 170408 168440
128 535 526 2092 2054 3329 3344 195796 181524
256 571 571 2232 2233 6854 6873 246484 207856
We can see the benefit in memory consumption.
Change-Id: I40c70f6ccbad7754918bcc6cb397e955b09d1033
Signed-off-by: Evgeniy Kochetov <evgeniik@mellanox.com>
Signed-off-by: Sasha Kotchubievsky <sashakot@mellanox.com>
Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/428458
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
2018-10-04 14:59:08 +00:00
|
|
|
# Set the maximum number outstanding I/O per shared receive queue. Relevant only for RDMA transport
|
|
|
|
#MaxSRQDepth 4096
|
|
|
|
|
2019-01-30 23:52:04 +08:00
|
|
|
[Transport]
|
2019-01-27 23:52:39 -05:00
|
|
|
# Set TCP transport type.
|
|
|
|
Type TCP
|
2016-06-06 14:44:30 -07:00
|
|
|
|
2016-07-18 15:14:08 -07:00
|
|
|
# Set the maximum number of outstanding I/O per queue.
|
2016-06-06 14:44:30 -07:00
|
|
|
#MaxQueueDepth 128
|
|
|
|
|
2018-11-09 17:02:25 +08:00
|
|
|
# Set the maximum number of submission and completion queues per session.
|
|
|
|
# Setting this to '8', for example, allows for 8 submission and 8 completion queues
|
|
|
|
# per session.
|
|
|
|
#MaxQueuesPerSession 4
|
|
|
|
|
2016-07-25 14:22:58 -07:00
|
|
|
# Set the maximum in-capsule data size. Must be a multiple of 16.
|
2017-08-15 14:55:41 -07:00
|
|
|
# 0 is a valid choice.
|
2016-07-25 14:22:58 -07:00
|
|
|
#InCapsuleDataSize 4096
|
|
|
|
|
|
|
|
# Set the maximum I/O size. Must be a multiple of 4096.
|
|
|
|
#MaxIOSize 131072
|
|
|
|
|
2018-11-09 17:02:25 +08:00
|
|
|
# Set the I/O unit size, and this value should not be larger than MaxIOSize
|
|
|
|
#IOUnitSize 131072
|
|
|
|
|
|
|
|
# Set the maximum number of IO for admin queue
|
|
|
|
#MaxAQDepth 32
|
2016-09-12 10:44:58 -07:00
|
|
|
|
2018-12-14 22:29:48 +08:00
|
|
|
# Set the number of pooled data buffers available to the transport
|
|
|
|
# It is used to provide the read/write data buffers for the qpairs on this transport.
|
|
|
|
#NumSharedBuffers 512
|
|
|
|
|
2019-01-07 14:27:26 -07:00
|
|
|
# Set the number of shared buffers to be cached per poll group
|
|
|
|
#BufCacheSize 32
|
|
|
|
|
2019-06-11 15:07:28 +00:00
|
|
|
# Set whether to use the C2H Success optimization, only used for TCP transport.
|
|
|
|
# C2HSuccess true
|
|
|
|
|
2019-04-22 18:28:02 +09:00
|
|
|
# Define FC transport
|
|
|
|
#[Transport]
|
|
|
|
# Set FC transport type.
|
|
|
|
#Type FC
|
|
|
|
|
|
|
|
# Set the maximum number of submission and completion queues per session.
|
|
|
|
# Setting this to '8', for example, allows for 8 submission and 8 completion queues
|
|
|
|
# per session.
|
|
|
|
#MaxQueuesPerSession 5
|
|
|
|
|
|
|
|
# Set the maximum number of outstanding I/O per queue.
|
|
|
|
#MaxQueueDepth 128
|
|
|
|
|
|
|
|
# Set the maximum I/O size. Must be a multiple of 4096.
|
|
|
|
#MaxIOSize 65536
|
|
|
|
|
2017-01-13 12:58:23 -07:00
|
|
|
[Nvme]
|
2017-03-01 13:35:59 -07:00
|
|
|
# NVMe Device Whitelist
|
|
|
|
# Users may specify which NVMe devices to claim by their transport id.
|
|
|
|
# See spdk_nvme_transport_id_parse() in spdk/nvme.h for the correct format.
|
|
|
|
# The second argument is the assigned name, which can be referenced from
|
|
|
|
# other sections in the configuration file. For NVMe devices, a namespace
|
|
|
|
# is automatically appended to each name in the format <YourName>nY, where
|
|
|
|
# Y is the NSID (starts at 1).
|
2018-09-18 15:47:08 -07:00
|
|
|
TransportID "trtype:PCIe traddr:0000:00:00.0" Nvme0
|
|
|
|
TransportID "trtype:PCIe traddr:0000:01:00.0" Nvme1
|
2018-09-26 18:31:35 -04:00
|
|
|
TransportID "trtype:PCIe traddr:0000:02:00.0" Nvme2
|
|
|
|
TransportID "trtype:PCIe traddr:0000:03:00.0" Nvme3
|
2019-01-18 14:57:34 +08:00
|
|
|
TransportID "trtype:RDMA adrfam:IPv4 traddr:192.168.100.8 trsvcid:4420 hostaddr:192.168.100.9 subnqn:nqn.2016-06.io.spdk:cnode1" Nvme4
|
|
|
|
TransportID "trtype:TCP adrfam:IPv4 traddr:192.168.100.3 trsvcid:4420 hostaddr:192.168.100.4 subnqn:nqn.2016-06.io.spdk:cnode2" Nvme5
|
2017-03-01 13:35:59 -07:00
|
|
|
|
|
|
|
# The number of attempts per I/O when an I/O fails. Do not include
|
|
|
|
# this key to get the default behavior.
|
2017-04-04 11:12:40 -07:00
|
|
|
RetryCount 4
|
2018-07-09 23:04:33 +02:00
|
|
|
# Timeout for each command, in microseconds. If 0, don't track timeouts.
|
|
|
|
TimeoutUsec 0
|
2017-03-30 12:49:52 -07:00
|
|
|
# Action to take on command time out. Only valid when Timeout is greater
|
|
|
|
# than 0. This may be 'Reset' to reset the controller, 'Abort' to abort
|
|
|
|
# the command, or 'None' to just print a message but do nothing.
|
|
|
|
# Admin command timeouts will always result in a reset.
|
|
|
|
ActionOnTimeout None
|
2017-03-01 13:35:59 -07:00
|
|
|
# Set how often the admin queue is polled for asynchronous events.
|
2017-01-13 12:58:23 -07:00
|
|
|
# Units in microseconds.
|
|
|
|
AdminPollRate 100000
|
2019-03-11 15:26:53 -07:00
|
|
|
# Set how often I/O queues are polled from completions.
|
|
|
|
# Units in microseconds.
|
|
|
|
IOPollRate 0
|
2016-12-10 15:43:18 -07:00
|
|
|
|
2017-08-17 22:18:52 +08:00
|
|
|
# Disable handling of hotplug (runtime insert and remove) events,
|
|
|
|
# users can set to Yes if want to enable it.
|
|
|
|
# Default: No
|
|
|
|
HotplugEnable No
|
2017-03-30 12:57:10 -07:00
|
|
|
|
2016-08-29 10:15:47 -07:00
|
|
|
# The Split virtual block device slices block devices into multiple smaller bdevs.
|
|
|
|
[Split]
|
|
|
|
# Syntax:
|
|
|
|
# Split <bdev> <count> [<size_in_megabytes>]
|
|
|
|
|
|
|
|
# Split Malloc2 into two equally-sized portions, Malloc2p0 and Malloc2p1
|
|
|
|
Split Malloc2 2
|
|
|
|
|
|
|
|
# Split Malloc3 into eight 1-megabyte portions, Malloc3p0 ... Malloc3p7,
|
|
|
|
# leaving the rest of the device inaccessible
|
|
|
|
Split Malloc3 8 1
|
|
|
|
|
2018-09-26 18:31:35 -04:00
|
|
|
# The RAID virtual block device based on pre-configured block device.
|
|
|
|
[RAID1]
|
|
|
|
# Unique name of this RAID device.
|
|
|
|
Name Raid0
|
|
|
|
# RAID level, only raid level 0 is supported.
|
|
|
|
RaidLevel 0
|
|
|
|
# Strip size in KB.
|
|
|
|
StripSize 64
|
|
|
|
# Number of pre-configured bdevs.
|
|
|
|
NumDevices 2
|
2019-04-01 15:03:39 -04:00
|
|
|
# Pre-configured bdevs name with Nvme.
|
2018-09-26 18:31:35 -04:00
|
|
|
Devices Nvme2n1 Nvme3n1
|
2019-04-01 15:03:39 -04:00
|
|
|
# Pre-configured bdevs name with Malloc.
|
|
|
|
#Devices Malloc0 Malloc1
|
|
|
|
# Pre-configured bdevs name with AIO.
|
|
|
|
#Devices AIO0 AIO1
|
2018-09-26 18:31:35 -04:00
|
|
|
|
2016-07-18 15:14:08 -07:00
|
|
|
# Define an NVMf Subsystem.
|
|
|
|
# - NQN is required and must be unique.
|
|
|
|
# - Between 1 and 255 Listen directives are allowed. This defines
|
|
|
|
# the addresses on which new connections may be accepted. The format
|
|
|
|
# is Listen <type> <address> where type currently can only be RDMA.
|
|
|
|
# - Between 0 and 255 Host directives are allowed. This defines the
|
|
|
|
# NQNs of allowed hosts. If no Host directive is specified, all hosts
|
|
|
|
# are allowed to connect.
|
2018-03-27 10:28:48 +05:30
|
|
|
# - Between 0 and 255 Namespace directives are allowed. These define the
|
2017-08-30 16:55:48 -07:00
|
|
|
# namespaces accessible from this subsystem.
|
2018-03-27 10:28:48 +05:30
|
|
|
# The user must specify MaxNamespaces to allow for adding namespaces
|
|
|
|
# during active connection. By default it is 0
|
2017-08-30 16:55:48 -07:00
|
|
|
# The user must specify a bdev name for each namespace, and may optionally
|
2018-03-27 10:28:48 +05:30
|
|
|
# specify a namespace ID. If nsid is omitted, the namespace will be
|
|
|
|
# assigned the next available NSID. The NSID must be unique within the
|
2018-06-11 10:40:42 -07:00
|
|
|
# subsystem. An optional namespace UUID may also be specified.
|
2017-08-30 16:55:48 -07:00
|
|
|
# Syntax:
|
2018-06-11 10:40:42 -07:00
|
|
|
# Namespace <bdev_name> [<nsid> [<uuid>]]
|
2016-08-15 09:16:49 +08:00
|
|
|
|
2017-06-27 11:26:19 -07:00
|
|
|
# Namespaces backed by physical NVMe devices
|
2016-06-08 11:22:36 -07:00
|
|
|
[Subsystem1]
|
2016-07-11 15:22:03 -07:00
|
|
|
NQN nqn.2016-06.io.spdk:cnode1
|
2019-01-27 23:52:39 -05:00
|
|
|
Listen TCP 15.15.15.2:4420
|
2017-08-30 13:21:12 -07:00
|
|
|
AllowAnyHost No
|
2016-07-15 14:16:59 -07:00
|
|
|
Host nqn.2016-06.io.spdk:init
|
2017-06-27 11:26:19 -07:00
|
|
|
SN SPDK00000000000001
|
2018-12-29 11:39:48 -08:00
|
|
|
MN SPDK_Controller1
|
2018-03-27 10:28:48 +05:30
|
|
|
MaxNamespaces 20
|
2017-08-30 16:55:48 -07:00
|
|
|
Namespace Nvme0n1 1
|
|
|
|
Namespace Nvme1n1 2
|
2018-09-26 18:31:35 -04:00
|
|
|
Namespace Raid0
|
2016-06-06 14:44:30 -07:00
|
|
|
|
2016-07-18 15:14:08 -07:00
|
|
|
# Multiple subsystems are allowed.
|
2017-06-27 11:26:19 -07:00
|
|
|
# Namespaces backed by non-NVMe devices
|
2016-06-08 11:22:36 -07:00
|
|
|
[Subsystem2]
|
2016-07-11 15:22:03 -07:00
|
|
|
NQN nqn.2016-06.io.spdk:cnode2
|
2016-07-15 14:16:59 -07:00
|
|
|
Listen RDMA 192.168.2.21:4420
|
2017-08-30 13:21:12 -07:00
|
|
|
AllowAnyHost No
|
2016-07-15 14:16:59 -07:00
|
|
|
Host nqn.2016-06.io.spdk:init
|
2017-06-27 11:26:19 -07:00
|
|
|
SN SPDK00000000000002
|
2018-12-29 11:39:48 -08:00
|
|
|
MN SPDK_Controller2
|
2016-08-15 09:16:49 +08:00
|
|
|
Namespace Malloc0
|
|
|
|
Namespace Malloc1
|
2017-01-18 12:10:01 +08:00
|
|
|
Namespace AIO0
|
|
|
|
Namespace AIO1
|