323 lines
9.2 KiB
Python
Raw Normal View History

def set_nvmf_target_max_subsystems(client,
max_subsystems=None):
"""Set NVMe-oF target options.
Args:
max_subsystems: Maximum number of NVMe-oF subsystems (e.g. 1024)
Returns:
True or False
"""
params = {}
params['max_subsystems'] = max_subsystems
return client.call('set_nvmf_target_max_subsystems', params)
def set_nvmf_target_config(client,
acceptor_poll_rate=None,
conn_sched=None):
"""Set NVMe-oF target subsystem configuration.
Args:
acceptor_poll_rate: Acceptor poll period in microseconds (optional)
conn_sched: Scheduling of incoming connections (optional)
Returns:
True or False
"""
params = {}
if acceptor_poll_rate:
params['acceptor_poll_rate'] = acceptor_poll_rate
if conn_sched:
params['conn_sched'] = conn_sched
return client.call('set_nvmf_target_config', params)
def nvmf_create_transport(client,
trtype,
max_queue_depth=None,
max_qpairs_per_ctrlr=None,
in_capsule_data_size=None,
max_io_size=None,
io_unit_size=None,
max_aq_depth=None,
num_shared_buffers=None,
nvmf/rdma: Add shared receive queue support This is a new feature for NVMEoF RDMA target, that is intended to save resource allocation (by sharing them) and utilize the locality (completions and memory) to get the best performance with Shared Receive Queues (SRQs). We'll create a SRQ per core (poll group), per device and associate each created QP/CQ with an appropriate SRQ. Our testing environment has 2 hosts. Host 1: CPU: Intel(R) Xeon(R) CPU E5-2609 0 @ 2.40GHz dual socket (8 cores total) Network: ConnectX-5, ConnectX-5 VPI , 100GbE, single-port QSFP28, PCIe3.0 x16 Disk: Intel Optane SSD 900P Series OS: Fedora 27 x86_64 Host 2: CPU: Intel(R) Xeon(R) CPU E5-2630 v2 @ 2.60GHz dual-socket (24 cores total) Network: ConnectX-4 VPI , 100GbE, dual-port QSFP28 Disk: Intel Optane SSD 900P Series OS : CentOS 7.5.1804 x86_64 Hosts are connected via Spectrum switch. Host 1 is running SPDK NVMeoF target. Host 2 is used as initiator running fio with SPDK plugin. Configuration: - SPDK NVMeoF target: cpu mask 0x0F (4 cores), max queue depth 128, max SRQ depth 1024, max QPs per controller 1024 - Single NVMf subsystem with single namespace backed by physical SSD disk - fio with SPDK plugin: randread pattern, 1-256 jobs, block size 4k, IO depth 16, cpu_mask 0xFFF0, IO rate 10k, rate process “poisson” Here is a full fio command line: fio --name=Job --stats=1 --group_reporting=1 --idle-prof=percpu \ --loops=1 --numjobs=1 --thread=1 --time_based=1 --runtime=30s \ --ramp_time=5s --bs=4k --size=4G --iodepth=16 --readwrite=randread \ --rwmixread=75 --randrepeat=1 --ioengine=spdk --direct=1 \ --gtod_reduce=0 --cpumask=0xFFF0 --rate_iops=10k \ --rate_process=poisson \ --filename='trtype=RDMA adrfam=IPv4 traddr=1.1.79.1 trsvcid=4420 ns=1' SPDK allocates the following entities for every work request in receive queue (shared or not): reqs (1024 bytes), recvs (96 bytes), cmds (64 bytes), cpls (16 bytes), in_capsule_buffer. All except the last one are fixed size. In capsule data size is configured to 4096. Memory consumption calculation (target): - Multiple SRQ: core_num * ib_devs_num * SRQ_depth * (1200 + in_capsule_data_size) - Multiple RQ: queue_num * RQ_depth * (1200 + in_capsule_data_size) We ignore admin queues in calculations for simplicity. Cases: 1. Multiple SRQ with 1024 entries: - Mem = 4 * 1 * 1024 * (1200 + 4096) = 20.7 MiB (Constant number – does not depend on initiators number) 2. RQ with 128 entries for 64 initiators: - Mem = 64 * 128 * (1200 + 4096) = 41.4 MiB Results: FIO_JOBS kIOPS Bandwidth,MiB/s AvgLatency,us MaxResidentSize,kiB RQ SRQ RQ SRQ RQ SRQ RQ SRQ 1 8.623 8.623 33.7 33.7 13.89 14.03 144376 155624 2 17.3 17.3 67.4 67.4 14.03 14.1 145776 155700 4 34.5 34.5 135 135 14.15 14.23 146540 156184 8 69.1 69.1 270 270 14.64 14.49 148116 156960 16 138 138 540 540 14.84 15.38 151216 158668 32 276 276 1079 1079 16.5 16.61 157560 161936 64 513 502 2005 1960 1673 1612 170408 168440 128 535 526 2092 2054 3329 3344 195796 181524 256 571 571 2232 2233 6854 6873 246484 207856 We can see the benefit in memory consumption. Change-Id: I40c70f6ccbad7754918bcc6cb397e955b09d1033 Signed-off-by: Evgeniy Kochetov <evgeniik@mellanox.com> Signed-off-by: Sasha Kotchubievsky <sashakot@mellanox.com> Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/428458 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com>
2018-10-04 14:59:08 +00:00
buf_cache_size=None,
max_srq_depth=None,
no_srq=False,
c2h_success=True,
dif_insert_or_strip=None):
"""NVMf Transport Create options.
Args:
trtype: Transport type (ex. RDMA)
max_queue_depth: Max number of outstanding I/O per queue (optional)
max_qpairs_per_ctrlr: Max number of SQ and CQ per controller (optional)
in_capsule_data_size: Maximum in-capsule data size in bytes (optional)
max_io_size: Maximum I/O data size in bytes (optional)
io_unit_size: I/O unit size in bytes (optional)
max_aq_depth: Max size admin quque per controller (optional)
num_shared_buffers: The number of pooled data buffers available to the transport (optional)
buf_cache_size: The number of shared buffers to reserve for each poll group (optional)
max_srq_depth: Max number of outstanding I/O per shared receive queue - RDMA specific (optional)
no_srq: Boolean flag to disable SRQ even for devices that support it - RDMA specific (optional)
c2h_success: Boolean flag to enable/disable the C2H success optimization - TCP specific (optional)
dif_insert_or_strip: Boolean flag to enable DIF insert/strip for I/O - TCP specific (optional)
Returns:
True or False
"""
params = {}
params['trtype'] = trtype
if max_queue_depth:
params['max_queue_depth'] = max_queue_depth
if max_qpairs_per_ctrlr:
params['max_qpairs_per_ctrlr'] = max_qpairs_per_ctrlr
if in_capsule_data_size:
params['in_capsule_data_size'] = in_capsule_data_size
if max_io_size:
params['max_io_size'] = max_io_size
if io_unit_size:
params['io_unit_size'] = io_unit_size
if max_aq_depth:
params['max_aq_depth'] = max_aq_depth
if num_shared_buffers:
params['num_shared_buffers'] = num_shared_buffers
if buf_cache_size:
params['buf_cache_size'] = buf_cache_size
nvmf/rdma: Add shared receive queue support This is a new feature for NVMEoF RDMA target, that is intended to save resource allocation (by sharing them) and utilize the locality (completions and memory) to get the best performance with Shared Receive Queues (SRQs). We'll create a SRQ per core (poll group), per device and associate each created QP/CQ with an appropriate SRQ. Our testing environment has 2 hosts. Host 1: CPU: Intel(R) Xeon(R) CPU E5-2609 0 @ 2.40GHz dual socket (8 cores total) Network: ConnectX-5, ConnectX-5 VPI , 100GbE, single-port QSFP28, PCIe3.0 x16 Disk: Intel Optane SSD 900P Series OS: Fedora 27 x86_64 Host 2: CPU: Intel(R) Xeon(R) CPU E5-2630 v2 @ 2.60GHz dual-socket (24 cores total) Network: ConnectX-4 VPI , 100GbE, dual-port QSFP28 Disk: Intel Optane SSD 900P Series OS : CentOS 7.5.1804 x86_64 Hosts are connected via Spectrum switch. Host 1 is running SPDK NVMeoF target. Host 2 is used as initiator running fio with SPDK plugin. Configuration: - SPDK NVMeoF target: cpu mask 0x0F (4 cores), max queue depth 128, max SRQ depth 1024, max QPs per controller 1024 - Single NVMf subsystem with single namespace backed by physical SSD disk - fio with SPDK plugin: randread pattern, 1-256 jobs, block size 4k, IO depth 16, cpu_mask 0xFFF0, IO rate 10k, rate process “poisson” Here is a full fio command line: fio --name=Job --stats=1 --group_reporting=1 --idle-prof=percpu \ --loops=1 --numjobs=1 --thread=1 --time_based=1 --runtime=30s \ --ramp_time=5s --bs=4k --size=4G --iodepth=16 --readwrite=randread \ --rwmixread=75 --randrepeat=1 --ioengine=spdk --direct=1 \ --gtod_reduce=0 --cpumask=0xFFF0 --rate_iops=10k \ --rate_process=poisson \ --filename='trtype=RDMA adrfam=IPv4 traddr=1.1.79.1 trsvcid=4420 ns=1' SPDK allocates the following entities for every work request in receive queue (shared or not): reqs (1024 bytes), recvs (96 bytes), cmds (64 bytes), cpls (16 bytes), in_capsule_buffer. All except the last one are fixed size. In capsule data size is configured to 4096. Memory consumption calculation (target): - Multiple SRQ: core_num * ib_devs_num * SRQ_depth * (1200 + in_capsule_data_size) - Multiple RQ: queue_num * RQ_depth * (1200 + in_capsule_data_size) We ignore admin queues in calculations for simplicity. Cases: 1. Multiple SRQ with 1024 entries: - Mem = 4 * 1 * 1024 * (1200 + 4096) = 20.7 MiB (Constant number – does not depend on initiators number) 2. RQ with 128 entries for 64 initiators: - Mem = 64 * 128 * (1200 + 4096) = 41.4 MiB Results: FIO_JOBS kIOPS Bandwidth,MiB/s AvgLatency,us MaxResidentSize,kiB RQ SRQ RQ SRQ RQ SRQ RQ SRQ 1 8.623 8.623 33.7 33.7 13.89 14.03 144376 155624 2 17.3 17.3 67.4 67.4 14.03 14.1 145776 155700 4 34.5 34.5 135 135 14.15 14.23 146540 156184 8 69.1 69.1 270 270 14.64 14.49 148116 156960 16 138 138 540 540 14.84 15.38 151216 158668 32 276 276 1079 1079 16.5 16.61 157560 161936 64 513 502 2005 1960 1673 1612 170408 168440 128 535 526 2092 2054 3329 3344 195796 181524 256 571 571 2232 2233 6854 6873 246484 207856 We can see the benefit in memory consumption. Change-Id: I40c70f6ccbad7754918bcc6cb397e955b09d1033 Signed-off-by: Evgeniy Kochetov <evgeniik@mellanox.com> Signed-off-by: Sasha Kotchubievsky <sashakot@mellanox.com> Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/428458 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com>
2018-10-04 14:59:08 +00:00
if max_srq_depth:
params['max_srq_depth'] = max_srq_depth
if no_srq:
params['no_srq'] = no_srq
if c2h_success:
params['c2h_success'] = c2h_success
if dif_insert_or_strip:
params['dif_insert_or_strip'] = dif_insert_or_strip
return client.call('nvmf_create_transport', params)
def get_nvmf_transports(client):
"""Get list of NVMe-oF transports.
Returns:
List of NVMe-oF transport objects.
"""
return client.call('get_nvmf_transports')
def get_nvmf_subsystems(client):
"""Get list of NVMe-oF subsystems.
Returns:
List of NVMe-oF subsystem objects.
"""
return client.call('get_nvmf_subsystems')
def nvmf_subsystem_create(client,
nqn,
serial_number,
model_number='SPDK bdev Controller',
allow_any_host=False,
max_namespaces=0):
"""Construct an NVMe over Fabrics target subsystem.
Args:
nqn: Subsystem NQN.
serial_number: Serial number of virtual controller.
model_number: Model number of virtual controller.
allow_any_host: Allow any host (True) or enforce allowed host whitelist (False). Default: False.
max_namespaces: Maximum number of namespaces that can be attached to the subsystem (optional). Default: 0 (Unlimited).
Returns:
True or False
"""
params = {
'nqn': nqn,
}
if serial_number:
params['serial_number'] = serial_number
if model_number:
params['model_number'] = model_number
if allow_any_host:
params['allow_any_host'] = True
if max_namespaces:
params['max_namespaces'] = max_namespaces
return client.call('nvmf_subsystem_create', params)
def nvmf_subsystem_add_listener(client, nqn, trtype, traddr, trsvcid, adrfam):
"""Add a new listen address to an NVMe-oF subsystem.
Args:
nqn: Subsystem NQN.
trtype: Transport type ("RDMA").
traddr: Transport address.
trsvcid: Transport service ID.
adrfam: Address family ("IPv4", "IPv6", "IB", or "FC").
Returns:
True or False
"""
listen_address = {'trtype': trtype,
'traddr': traddr,
'trsvcid': trsvcid}
if adrfam:
listen_address['adrfam'] = adrfam
params = {'nqn': nqn,
'listen_address': listen_address}
return client.call('nvmf_subsystem_add_listener', params)
def nvmf_subsystem_remove_listener(
client,
nqn,
trtype,
traddr,
trsvcid,
adrfam):
"""Remove existing listen address from an NVMe-oF subsystem.
Args:
nqn: Subsystem NQN.
trtype: Transport type ("RDMA").
traddr: Transport address.
trsvcid: Transport service ID.
adrfam: Address family ("IPv4", "IPv6", "IB", or "FC").
Returns:
True or False
"""
listen_address = {'trtype': trtype,
'traddr': traddr,
'trsvcid': trsvcid}
if adrfam:
listen_address['adrfam'] = adrfam
params = {'nqn': nqn,
'listen_address': listen_address}
return client.call('nvmf_subsystem_remove_listener', params)
def nvmf_subsystem_add_ns(client, nqn, bdev_name, nsid=None, nguid=None, eui64=None, uuid=None):
"""Add a namespace to a subsystem.
Args:
nqn: Subsystem NQN.
bdev_name: Name of bdev to expose as a namespace.
nsid: Namespace ID (optional).
nguid: 16-byte namespace globally unique identifier in hexadecimal (optional).
eui64: 8-byte namespace EUI-64 in hexadecimal (e.g. "ABCDEF0123456789") (optional).
uuid: Namespace UUID (optional).
Returns:
The namespace ID
"""
ns = {'bdev_name': bdev_name}
if nsid:
ns['nsid'] = nsid
if nguid:
ns['nguid'] = nguid
if eui64:
ns['eui64'] = eui64
if uuid:
ns['uuid'] = uuid
params = {'nqn': nqn,
'namespace': ns}
return client.call('nvmf_subsystem_add_ns', params)
def nvmf_subsystem_remove_ns(client, nqn, nsid):
"""Remove a existing namespace from a subsystem.
Args:
nqn: Subsystem NQN.
nsid: Namespace ID.
Returns:
True or False
"""
params = {'nqn': nqn,
'nsid': nsid}
return client.call('nvmf_subsystem_remove_ns', params)
def nvmf_subsystem_add_host(client, nqn, host):
"""Add a host NQN to the whitelist of allowed hosts.
Args:
nqn: Subsystem NQN.
host: Host NQN to add to the list of allowed host NQNs
Returns:
True or False
"""
params = {'nqn': nqn,
'host': host}
return client.call('nvmf_subsystem_add_host', params)
def nvmf_subsystem_remove_host(client, nqn, host):
"""Remove a host NQN from the whitelist of allowed hosts.
Args:
nqn: Subsystem NQN.
host: Host NQN to remove to the list of allowed host NQNs
Returns:
True or False
"""
params = {'nqn': nqn,
'host': host}
return client.call('nvmf_subsystem_remove_host', params)
def nvmf_subsystem_allow_any_host(client, nqn, disable):
"""Configure a subsystem to allow any host to connect or to enforce the host NQN whitelist.
Args:
nqn: Subsystem NQN.
disable: Allow any host (true) or enforce allowed host whitelist (false).
Returns:
True or False
"""
params = {'nqn': nqn, 'allow_any_host': False if disable else True}
return client.call('nvmf_subsystem_allow_any_host', params)
def delete_nvmf_subsystem(client, nqn):
"""Delete an existing NVMe-oF subsystem.
Args:
nqn: Subsystem NQN.
Returns:
True or False
"""
params = {'nqn': nqn}
return client.call('delete_nvmf_subsystem', params)