test/qos: set qos limits to a % of the maximum disk performance

We used to set an arbitrary qos limit which in some
cases happened to be higher than the actual disk
capabilities. Even though we had an explicit check
for that and we skipped the entire qos test suite
if the device was too slow, the disk performance could
vary and be just enough to pass that initial check,
but then slow down and fail in the middle of the test
suite. If the bdev maxes out at 21MB/s on one run, it
may just as well do 19MB/s on another. That is exactly
the case causing intermittent failures on our CI.

We fix it by removing the arbitrary qos limit and
setting it to a % of the maximum disk performance
instead. This lets us e.g. remove the code for skipping
the entire test suite when the disk is too slow. We
definitely don't want to skip any tests.

Change-Id: I6de8a183c00bab64484b4ddb12df1dedfbed23f8
Signed-off-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com>
Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/451887
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Karol Latecki <karol.latecki@intel.com>
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-by: yidong0635 <dongx.yi@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
This commit is contained in:
Darek Stojaczyk 2019-04-24 13:20:29 +02:00 committed by Jim Harris
parent 992ffd8071
commit 22364ca8f7

View File

@ -26,10 +26,13 @@ function check_qos_works_well() {
end_io_count=$(jq -r '.bdevs[0].num_read_ops' <<< "$iostats")
end_bytes_read=$(jq -r '.bdevs[0].bytes_read' <<< "$iostats")
IOPS_RESULT=$(((end_io_count-start_io_count)/5))
BANDWIDTH_RESULT=$(((end_bytes_read-start_bytes_read)/5))
if [ $LIMIT_TYPE = IOPS ]; then
read_result=$(((end_io_count-start_io_count)/5))
read_result=$IOPS_RESULT
else
read_result=$(((end_bytes_read-start_bytes_read)/5))
read_result=$BANDWIDTH_RESULT
fi
if [ $enable_limit = true ]; then
@ -42,14 +45,8 @@ function check_qos_works_well() {
else
retval=$(echo "$read_result > $qos_limit" | bc)
if [ $retval -eq 0 ]; then
if [ $check_qos = true ]; then
echo "$read_result less than $qos_limit - exit QoS testing"
ENABLE_QOS=false
exit 0
else
echo "$read_result less than $qos_limit - expected greater than"
exit 1
fi
echo "$read_result less than $qos_limit - expected greater than"
exit 1
fi
fi
}
@ -68,12 +65,8 @@ timing_enter qos
MALLOC_BDEV_SIZE=64
MALLOC_BLOCK_SIZE=512
ENABLE_QOS=true
IOPS_LIMIT=20000
BANDWIDTH_LIMIT_MB=20
BANDWIDTH_LIMIT=$(($BANDWIDTH_LIMIT_MB*1024*1024))
READ_BANDWIDTH_LIMIT_MB=10
READ_BANDWIDTH_LIMIT=$(($READ_BANDWIDTH_LIMIT_MB*1024*1024))
IOPS_RESULT=
BANDWIDTH_RESULT=
LIMIT_TYPE=IOPS
rpc_py="$rootdir/scripts/rpc.py"
fio_py="$rootdir/scripts/fio.py"
@ -104,37 +97,50 @@ iscsiadm -m node --login -p $TARGET_IP:$ISCSI_PORT
trap "iscsicleanup; killprocess $pid; iscsitestfini $1 $2; exit 1" SIGINT SIGTERM EXIT
# Check whether to enable the QoS testing.
check_qos_works_well false $IOPS_LIMIT Malloc0 true
# Run FIO without any QOS limits to determine the raw performance
check_qos_works_well false 0 Malloc0
if [ $ENABLE_QOS = true ]; then
# Limit the I/O rate by RPC, then confirm the observed rate matches.
$rpc_py set_bdev_qos_limit Malloc0 --rw_ios_per_sec $IOPS_LIMIT
check_qos_works_well true $IOPS_LIMIT Malloc0 false
# Set IOPS/bandwidth limit to 50% of the actual unrestrained performance.
IOPS_LIMIT=$(($IOPS_RESULT/2))
BANDWIDTH_LIMIT=$(($BANDWIDTH_RESULT/2))
# Set READ bandwidth limit to 50% of the RW bandwidth limit to be able
# to differentiate those two.
READ_BANDWIDTH_LIMIT=$(($BANDWIDTH_LIMIT/2))
# Now disable the rate limiting, and confirm the observed rate is not limited anymore.
$rpc_py set_bdev_qos_limit Malloc0 --rw_ios_per_sec 0
check_qos_works_well false $IOPS_LIMIT Malloc0 false
# Also round them down to nearest multiple of either 1000 IOPS or 1MB BW
# which are the minimal QoS granularities
IOPS_LIMIT=$(($IOPS_LIMIT/1000*1000))
BANDWIDTH_LIMIT_MB=$(($BANDWIDTH_LIMIT/1024/1024))
BANDWIDTH_LIMIT=$(($BANDWIDTH_LIMIT_MB*1024*1024))
READ_BANDWIDTH_LIMIT_MB=$(($READ_BANDWIDTH_LIMIT/1024/1024))
READ_BANDWIDTH_LIMIT=$(($READ_BANDWIDTH_LIMIT_MB*1024*1024))
# Limit the I/O rate again.
$rpc_py set_bdev_qos_limit Malloc0 --rw_ios_per_sec $IOPS_LIMIT
check_qos_works_well true $IOPS_LIMIT Malloc0 false
echo "I/O rate limiting tests successful"
# Limit the I/O rate by RPC, then confirm the observed rate matches.
$rpc_py set_bdev_qos_limit Malloc0 --rw_ios_per_sec $IOPS_LIMIT
check_qos_works_well true $IOPS_LIMIT Malloc0
# Limit the I/O bandwidth rate by RPC, then confirm the observed rate matches.
LIMIT_TYPE=BANDWIDTH
$rpc_py set_bdev_qos_limit Malloc0 --rw_ios_per_sec 0 --rw_mbytes_per_sec $BANDWIDTH_LIMIT_MB
check_qos_works_well true $BANDWIDTH_LIMIT Malloc0 false
# Now disable the rate limiting, and confirm the observed rate is not limited anymore.
$rpc_py set_bdev_qos_limit Malloc0 --rw_ios_per_sec 0
check_qos_works_well false $IOPS_LIMIT Malloc0
# Now disable the bandwidth rate limiting, and confirm the observed rate is not limited anymore.
$rpc_py set_bdev_qos_limit Malloc0 --rw_mbytes_per_sec 0
check_qos_works_well false $BANDWIDTH_LIMIT Malloc0 false
# Limit the I/O rate again.
$rpc_py set_bdev_qos_limit Malloc0 --rw_ios_per_sec $IOPS_LIMIT
check_qos_works_well true $IOPS_LIMIT Malloc0
echo "I/O rate limiting tests successful"
# Limit the I/O bandwidth rate again with both read/write and read/only.
$rpc_py set_bdev_qos_limit Malloc0 --rw_mbytes_per_sec $BANDWIDTH_LIMIT_MB --r_mbytes_per_sec $READ_BANDWIDTH_LIMIT_MB
check_qos_works_well true $READ_BANDWIDTH_LIMIT Malloc0 false
echo "I/O bandwidth limiting tests successful"
fi
# Limit the I/O bandwidth rate by RPC, then confirm the observed rate matches.
LIMIT_TYPE=BANDWIDTH
$rpc_py set_bdev_qos_limit Malloc0 --rw_ios_per_sec 0 --rw_mbytes_per_sec $BANDWIDTH_LIMIT_MB
check_qos_works_well true $BANDWIDTH_LIMIT Malloc0
# Now disable the bandwidth rate limiting, and confirm the observed rate is not limited anymore.
$rpc_py set_bdev_qos_limit Malloc0 --rw_mbytes_per_sec 0
check_qos_works_well false $BANDWIDTH_LIMIT Malloc0
# Limit the I/O bandwidth rate again with both read/write and read/only.
$rpc_py set_bdev_qos_limit Malloc0 --rw_mbytes_per_sec $BANDWIDTH_LIMIT_MB --r_mbytes_per_sec $READ_BANDWIDTH_LIMIT_MB
check_qos_works_well true $READ_BANDWIDTH_LIMIT Malloc0
echo "I/O bandwidth limiting tests successful"
iscsicleanup
$rpc_py delete_target_node 'iqn.2016-06.io.spdk:Target1'