2016-02-19 21:11:08 +00:00
|
|
|
#!/usr/bin/env bash
|
|
|
|
|
|
|
|
set -e
|
|
|
|
|
|
|
|
rootdir=$(readlink -f $(dirname $0))/..
|
2018-01-02 21:44:48 +00:00
|
|
|
source "$rootdir/scripts/common.sh"
|
2016-02-19 21:11:08 +00:00
|
|
|
|
2018-01-15 18:49:30 +00:00
|
|
|
function usage()
|
|
|
|
{
|
|
|
|
if [ `uname` = Linux ]; then
|
2018-07-17 19:43:33 +00:00
|
|
|
options="[config|reset|status|cleanup|help]"
|
2018-01-15 18:49:30 +00:00
|
|
|
else
|
|
|
|
options="[config|reset|help]"
|
|
|
|
fi
|
|
|
|
|
|
|
|
[[ ! -z $2 ]] && ( echo "$2"; echo ""; )
|
|
|
|
echo "Helper script for allocating hugepages and binding NVMe, I/OAT and Virtio devices to"
|
|
|
|
echo "a generic VFIO kernel driver. If VFIO is not available on the system, this script will"
|
|
|
|
echo "fall back to UIO. NVMe and Virtio devices with active mountpoints will be ignored."
|
|
|
|
echo "All hugepage operations use default hugepage size on the system (hugepagesz)."
|
|
|
|
echo "Usage: $(basename $1) $options"
|
|
|
|
echo
|
|
|
|
echo "$options - as following:"
|
|
|
|
echo "config Default mode. Allocate hugepages and bind PCI devices."
|
2018-07-17 19:43:33 +00:00
|
|
|
if [ `uname` = Linux ]; then
|
|
|
|
echo "cleanup Remove any orphaned files that can be left in the system after SPDK application exit"
|
|
|
|
fi
|
2018-01-15 18:49:30 +00:00
|
|
|
echo "reset Rebind PCI devices back to their original drivers."
|
|
|
|
echo " Also cleanup any leftover spdk files/resources."
|
|
|
|
echo " Hugepage memory size will remain unchanged."
|
|
|
|
if [ `uname` = Linux ]; then
|
|
|
|
echo "status Print status of all SPDK-compatible devices on the system."
|
|
|
|
fi
|
|
|
|
echo "help Print this help message."
|
|
|
|
echo
|
|
|
|
echo "The following environment variables can be specified."
|
|
|
|
echo "HUGEMEM Size of hugepage memory to allocate (in MB). 2048 by default."
|
|
|
|
echo " For NUMA systems, the hugepages will be evenly distributed"
|
|
|
|
echo " between CPU nodes"
|
|
|
|
echo "NRHUGE Number of hugepages to allocate. This variable overwrites HUGEMEM."
|
|
|
|
echo "HUGENODE Specific NUMA node to allocate hugepages on. To allocate"
|
|
|
|
echo " hugepages on multiple nodes run this script multiple times -"
|
|
|
|
echo " once for each node."
|
2019-02-01 09:10:17 +00:00
|
|
|
echo "PCI_WHITELIST"
|
|
|
|
echo "PCI_BLACKLIST Whitespace separated list of PCI devices (NVMe, I/OAT, Virtio)."
|
2018-01-15 18:49:30 +00:00
|
|
|
echo " Each device must be specified as a full PCI address."
|
2018-01-23 14:07:10 +00:00
|
|
|
echo " E.g. PCI_WHITELIST=\"0000:01:00.0 0000:02:00.0\""
|
|
|
|
echo " To blacklist all PCI devices use a non-valid address."
|
|
|
|
echo " E.g. PCI_WHITELIST=\"none\""
|
2019-02-01 09:10:17 +00:00
|
|
|
echo " If PCI_WHITELIST and PCI_BLACKLIST are empty or unset, all PCI devices"
|
|
|
|
echo " will be bound."
|
|
|
|
echo " Each device in PCI_BLACKLIST will be ignored (driver won't be changed)."
|
|
|
|
echo " PCI_BLACKLIST has precedence over PCI_WHITELIST."
|
2018-01-15 18:49:30 +00:00
|
|
|
echo "TARGET_USER User that will own hugepage mountpoint directory and vfio groups."
|
|
|
|
echo " By default the current user will be used."
|
2018-09-29 01:25:16 +00:00
|
|
|
echo "DRIVER_OVERRIDE Disable automatic vfio-pci/uio_pci_generic selection and forcefully"
|
|
|
|
echo " bind devices to the given driver."
|
|
|
|
echo " E.g. DRIVER_OVERRIDE=uio_pci_generic or DRIVER_OVERRIDE=vfio-pci"
|
2018-01-15 18:49:30 +00:00
|
|
|
exit 0
|
|
|
|
}
|
|
|
|
|
2018-03-02 01:43:20 +00:00
|
|
|
# In monolithic kernels the lsmod won't work. So
|
|
|
|
# back that with a /sys/modules check. Return a different code for
|
|
|
|
# built-in vs module just in case we want that down the road.
|
|
|
|
function check_for_driver {
|
|
|
|
$(lsmod | grep $1 > /dev/null)
|
|
|
|
if [ $? -eq 0 ]; then
|
|
|
|
return 1
|
|
|
|
else
|
|
|
|
if [[ -d /sys/module/$1 ]]; then
|
|
|
|
return 2
|
|
|
|
else
|
|
|
|
return 0
|
|
|
|
fi
|
|
|
|
fi
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
|
2018-01-23 14:07:10 +00:00
|
|
|
function pci_can_bind() {
|
2019-02-01 09:10:17 +00:00
|
|
|
# The '\ ' part is important
|
|
|
|
if [[ " $PCI_BLACKLIST " =~ \ $1\ ]] ; then
|
|
|
|
return 1
|
|
|
|
fi
|
|
|
|
|
|
|
|
if [[ -z "$PCI_WHITELIST" ]]; then
|
2018-01-23 14:07:10 +00:00
|
|
|
#no whitelist specified, bind all devices
|
2019-01-31 14:11:36 +00:00
|
|
|
return 0
|
2018-01-23 14:07:10 +00:00
|
|
|
fi
|
|
|
|
|
2019-02-01 09:10:17 +00:00
|
|
|
for i in $PCI_WHITELIST; do
|
2018-01-10 23:14:39 +00:00
|
|
|
if [ "$i" == "$1" ] ; then
|
2019-02-01 09:10:17 +00:00
|
|
|
return 0
|
2018-01-10 23:14:39 +00:00
|
|
|
fi
|
|
|
|
done
|
2019-02-01 09:10:17 +00:00
|
|
|
|
2019-01-31 14:11:36 +00:00
|
|
|
return 1
|
2018-01-10 23:14:39 +00:00
|
|
|
}
|
|
|
|
|
2016-04-14 19:21:32 +00:00
|
|
|
function linux_bind_driver() {
|
|
|
|
bdf="$1"
|
|
|
|
driver_name="$2"
|
|
|
|
old_driver_name="no driver"
|
|
|
|
ven_dev_id=$(lspci -n -s $bdf | cut -d' ' -f3 | sed 's/:/ /')
|
|
|
|
|
|
|
|
if [ -e "/sys/bus/pci/devices/$bdf/driver" ]; then
|
|
|
|
old_driver_name=$(basename $(readlink /sys/bus/pci/devices/$bdf/driver))
|
|
|
|
|
|
|
|
if [ "$driver_name" = "$old_driver_name" ]; then
|
|
|
|
return 0
|
|
|
|
fi
|
|
|
|
|
|
|
|
echo "$ven_dev_id" > "/sys/bus/pci/devices/$bdf/driver/remove_id" 2> /dev/null || true
|
|
|
|
echo "$bdf" > "/sys/bus/pci/devices/$bdf/driver/unbind"
|
|
|
|
fi
|
|
|
|
|
|
|
|
echo "$bdf ($ven_dev_id): $old_driver_name -> $driver_name"
|
|
|
|
|
|
|
|
echo "$ven_dev_id" > "/sys/bus/pci/drivers/$driver_name/new_id" 2> /dev/null || true
|
|
|
|
echo "$bdf" > "/sys/bus/pci/drivers/$driver_name/bind" 2> /dev/null || true
|
2016-04-06 03:03:28 +00:00
|
|
|
|
|
|
|
iommu_group=$(basename $(readlink -f /sys/bus/pci/devices/$bdf/iommu_group))
|
|
|
|
if [ -e "/dev/vfio/$iommu_group" ]; then
|
2018-01-23 13:23:00 +00:00
|
|
|
if [ -n "$TARGET_USER" ]; then
|
2018-01-15 19:08:37 +00:00
|
|
|
chown "$TARGET_USER" "/dev/vfio/$iommu_group"
|
2016-11-29 21:26:42 +00:00
|
|
|
fi
|
2016-04-06 03:03:28 +00:00
|
|
|
fi
|
2016-04-14 19:21:32 +00:00
|
|
|
}
|
|
|
|
|
2017-10-13 15:32:39 +00:00
|
|
|
function linux_unbind_driver() {
|
|
|
|
bdf="$1"
|
|
|
|
ven_dev_id=$(lspci -n -s $bdf | cut -d' ' -f3 | sed 's/:/ /')
|
|
|
|
|
|
|
|
if ! [ -e "/sys/bus/pci/devices/$bdf/driver" ]; then
|
|
|
|
return 0
|
|
|
|
fi
|
|
|
|
|
|
|
|
old_driver_name=$(basename $(readlink /sys/bus/pci/devices/$bdf/driver))
|
|
|
|
|
|
|
|
echo "$ven_dev_id" > "/sys/bus/pci/devices/$bdf/driver/remove_id" 2> /dev/null || true
|
|
|
|
echo "$bdf" > "/sys/bus/pci/devices/$bdf/driver/unbind"
|
|
|
|
echo "$bdf ($ven_dev_id): $old_driver_name -> no driver"
|
|
|
|
}
|
|
|
|
|
2018-01-23 13:09:58 +00:00
|
|
|
function linux_hugetlbfs_mounts() {
|
2017-07-05 23:39:28 +00:00
|
|
|
mount | grep ' type hugetlbfs ' | awk '{ print $3 }'
|
2017-03-28 17:09:31 +00:00
|
|
|
}
|
|
|
|
|
2017-10-13 17:19:43 +00:00
|
|
|
function get_nvme_name_from_bdf {
|
|
|
|
set +e
|
|
|
|
nvme_devs=`lsblk -d --output NAME | grep "^nvme"`
|
|
|
|
set -e
|
|
|
|
for dev in $nvme_devs; do
|
2017-12-08 19:24:46 +00:00
|
|
|
link_name=$(readlink /sys/block/$dev/device/device) || true
|
|
|
|
if [ -z "$link_name" ]; then
|
|
|
|
link_name=$(readlink /sys/block/$dev/device)
|
|
|
|
fi
|
2018-03-09 22:58:27 +00:00
|
|
|
link_bdf=$(basename "$link_name")
|
|
|
|
if [ "$link_bdf" = "$1" ]; then
|
2017-10-13 17:19:43 +00:00
|
|
|
eval "$2=$dev"
|
|
|
|
return
|
|
|
|
fi
|
|
|
|
done
|
|
|
|
}
|
|
|
|
|
2018-01-05 08:54:39 +00:00
|
|
|
function get_virtio_names_from_bdf {
|
2018-02-28 08:11:09 +00:00
|
|
|
blk_devs=`lsblk --nodeps --output NAME`
|
2018-01-05 08:54:39 +00:00
|
|
|
virtio_names=''
|
|
|
|
|
2018-02-28 08:11:09 +00:00
|
|
|
for dev in $blk_devs; do
|
|
|
|
if readlink "/sys/block/$dev" | grep -q "$1"; then
|
|
|
|
virtio_names="$virtio_names $dev"
|
2018-01-04 21:16:30 +00:00
|
|
|
fi
|
|
|
|
done
|
2018-01-05 08:54:39 +00:00
|
|
|
|
|
|
|
eval "$2='$virtio_names'"
|
2018-01-04 21:16:30 +00:00
|
|
|
}
|
|
|
|
|
2017-11-14 20:05:47 +00:00
|
|
|
function configure_linux_pci {
|
2018-09-29 01:25:16 +00:00
|
|
|
if [ -z "${DRIVER_OVERRIDE}" ]; then
|
|
|
|
driver_name=vfio-pci
|
|
|
|
if [ -z "$(ls /sys/kernel/iommu_groups)" ]; then
|
|
|
|
# No IOMMU. Use uio.
|
|
|
|
driver_name=uio_pci_generic
|
|
|
|
fi
|
|
|
|
else
|
|
|
|
driver_name="${DRIVER_OVERRIDE}"
|
2016-02-19 21:11:08 +00:00
|
|
|
fi
|
|
|
|
|
|
|
|
# NVMe
|
2018-09-29 01:25:16 +00:00
|
|
|
modprobe $driver_name
|
2018-01-02 21:44:48 +00:00
|
|
|
for bdf in $(iter_pci_class_code 01 08 02); do
|
2017-10-13 17:19:43 +00:00
|
|
|
blkname=''
|
|
|
|
get_nvme_name_from_bdf "$bdf" blkname
|
2019-01-31 14:11:36 +00:00
|
|
|
if ! pci_can_bind $bdf; then
|
2018-01-10 23:14:39 +00:00
|
|
|
echo "Skipping un-whitelisted NVMe controller $blkname ($bdf)"
|
|
|
|
continue
|
|
|
|
fi
|
2017-10-13 17:19:43 +00:00
|
|
|
if [ "$blkname" != "" ]; then
|
|
|
|
mountpoints=$(lsblk /dev/$blkname --output MOUNTPOINT -n | wc -w)
|
|
|
|
else
|
|
|
|
mountpoints="0"
|
|
|
|
fi
|
|
|
|
if [ "$mountpoints" = "0" ]; then
|
|
|
|
linux_bind_driver "$bdf" "$driver_name"
|
|
|
|
else
|
|
|
|
echo Active mountpoints on /dev/$blkname, so not binding PCI dev $bdf
|
|
|
|
fi
|
2016-02-19 21:11:08 +00:00
|
|
|
done
|
|
|
|
|
|
|
|
# IOAT
|
|
|
|
TMP=`mktemp`
|
|
|
|
#collect all the device_id info of ioat devices.
|
2016-08-08 22:57:49 +00:00
|
|
|
grep "PCI_DEVICE_ID_INTEL_IOAT" $rootdir/include/spdk/pci_ids.h \
|
2016-02-19 21:11:08 +00:00
|
|
|
| awk -F"x" '{print $2}' > $TMP
|
|
|
|
|
|
|
|
for dev_id in `cat $TMP`; do
|
2018-01-02 21:44:48 +00:00
|
|
|
for bdf in $(iter_pci_dev_id 8086 $dev_id); do
|
2019-01-31 14:11:36 +00:00
|
|
|
if ! pci_can_bind $bdf; then
|
2018-01-23 14:07:10 +00:00
|
|
|
echo "Skipping un-whitelisted I/OAT device at $bdf"
|
|
|
|
continue
|
|
|
|
fi
|
2018-09-29 01:25:16 +00:00
|
|
|
|
2016-04-14 19:21:32 +00:00
|
|
|
linux_bind_driver "$bdf" "$driver_name"
|
2016-02-19 21:11:08 +00:00
|
|
|
done
|
|
|
|
done
|
|
|
|
rm $TMP
|
|
|
|
|
2017-12-27 15:22:48 +00:00
|
|
|
# virtio
|
2017-05-30 21:13:50 +00:00
|
|
|
TMP=`mktemp`
|
2017-12-27 15:22:48 +00:00
|
|
|
#collect all the device_id info of virtio devices.
|
|
|
|
grep "PCI_DEVICE_ID_VIRTIO" $rootdir/include/spdk/pci_ids.h \
|
2017-05-30 21:13:50 +00:00
|
|
|
| awk -F"x" '{print $2}' > $TMP
|
|
|
|
|
|
|
|
for dev_id in `cat $TMP`; do
|
2018-01-02 21:44:48 +00:00
|
|
|
for bdf in $(iter_pci_dev_id 1af4 $dev_id); do
|
2019-01-31 14:11:36 +00:00
|
|
|
if ! pci_can_bind $bdf; then
|
2018-01-23 14:07:10 +00:00
|
|
|
echo "Skipping un-whitelisted Virtio device at $bdf"
|
|
|
|
continue
|
|
|
|
fi
|
2018-01-05 08:54:39 +00:00
|
|
|
blknames=''
|
|
|
|
get_virtio_names_from_bdf "$bdf" blknames
|
|
|
|
for blkname in $blknames; do
|
2018-11-21 13:08:19 +00:00
|
|
|
if [ "$(lsblk /dev/$blkname --output MOUNTPOINT -n | wc -w)" != "0" ]; then
|
2018-01-05 08:54:39 +00:00
|
|
|
echo Active mountpoints on /dev/$blkname, so not binding PCI dev $bdf
|
|
|
|
continue 2
|
|
|
|
fi
|
|
|
|
done
|
|
|
|
|
|
|
|
linux_bind_driver "$bdf" "$driver_name"
|
2017-05-30 21:13:50 +00:00
|
|
|
done
|
|
|
|
done
|
|
|
|
rm $TMP
|
|
|
|
|
2016-02-19 21:11:08 +00:00
|
|
|
echo "1" > "/sys/bus/pci/rescan"
|
2017-11-14 20:05:47 +00:00
|
|
|
}
|
|
|
|
|
2018-07-17 19:43:33 +00:00
|
|
|
function cleanup_linux {
|
2018-08-16 06:05:00 +00:00
|
|
|
shopt -s extglob nullglob
|
|
|
|
dirs_to_clean=""
|
|
|
|
dirs_to_clean="$(echo {/var/run,/tmp}/dpdk/spdk{,_pid}+([0-9])) "
|
|
|
|
if [[ -d $XDG_RUNTIME_DIR && $XDG_RUNTIME_DIR != *" "* ]]; then
|
|
|
|
dirs_to_clean+="$(readlink -e assert_not_empty $XDG_RUNTIME_DIR/dpdk/spdk{,_pid}+([0-9]) || true) "
|
|
|
|
fi
|
|
|
|
|
|
|
|
files_to_clean=""
|
|
|
|
for dir in $dirs_to_clean; do
|
|
|
|
files_to_clean+="$(echo $dir/*) "
|
|
|
|
done
|
|
|
|
shopt -u extglob nullglob
|
|
|
|
|
2018-12-14 23:06:32 +00:00
|
|
|
files_to_clean+="$(ls -1 /dev/shm/* | egrep '(spdk_tgt|iscsi|vhost|nvmf|rocksdb|bdevtest|bdevperf)_trace|spdk_iscsi_conns' || true) "
|
2018-07-17 19:43:33 +00:00
|
|
|
files_to_clean="$(readlink -e assert_not_empty $files_to_clean || true)"
|
|
|
|
if [[ -z "$files_to_clean" ]]; then
|
|
|
|
echo "Clean"
|
|
|
|
return 0;
|
|
|
|
fi
|
|
|
|
|
|
|
|
shopt -s extglob
|
|
|
|
for fd_dir in $(echo /proc/+([0-9])); do
|
|
|
|
opened_files+="$(readlink -e assert_not_empty $fd_dir/fd/* || true)"
|
|
|
|
done
|
|
|
|
shopt -u extglob
|
|
|
|
|
|
|
|
if [[ -z "$opened_files" ]]; then
|
|
|
|
echo "Can't get list of opened files!"
|
|
|
|
exit 1
|
|
|
|
fi
|
|
|
|
|
|
|
|
echo 'Cleaning'
|
|
|
|
for f in $files_to_clean; do
|
|
|
|
if ! echo "$opened_files" | egrep -q "^$f\$"; then
|
|
|
|
echo "Removing: $f"
|
|
|
|
rm $f
|
|
|
|
else
|
|
|
|
echo "Still open: $f"
|
|
|
|
fi
|
|
|
|
done
|
2018-08-16 06:05:00 +00:00
|
|
|
|
|
|
|
for dir in $dirs_to_clean; do
|
|
|
|
if ! echo "$opened_files" | egrep -q "^$dir\$"; then
|
|
|
|
echo "Removing: $dir"
|
|
|
|
rmdir $dir
|
|
|
|
else
|
|
|
|
echo "Still open: $dir"
|
|
|
|
fi
|
|
|
|
done
|
2018-07-17 19:43:33 +00:00
|
|
|
echo "Clean"
|
|
|
|
|
2018-08-16 06:05:00 +00:00
|
|
|
unset dirs_to_clean files_to_clean opened_files
|
2018-07-17 19:43:33 +00:00
|
|
|
}
|
|
|
|
|
2017-11-14 20:05:47 +00:00
|
|
|
function configure_linux {
|
2018-01-23 14:57:26 +00:00
|
|
|
configure_linux_pci
|
2018-01-23 13:09:58 +00:00
|
|
|
hugetlbfs_mounts=$(linux_hugetlbfs_mounts)
|
2017-03-28 17:09:31 +00:00
|
|
|
|
2018-01-23 13:09:58 +00:00
|
|
|
if [ -z "$hugetlbfs_mounts" ]; then
|
|
|
|
hugetlbfs_mounts=/mnt/huge
|
|
|
|
echo "Mounting hugetlbfs at $hugetlbfs_mounts"
|
|
|
|
mkdir -p "$hugetlbfs_mounts"
|
|
|
|
mount -t hugetlbfs nodev "$hugetlbfs_mounts"
|
2016-04-14 20:22:11 +00:00
|
|
|
fi
|
2017-10-27 17:26:58 +00:00
|
|
|
|
|
|
|
if [ -z "$HUGENODE" ]; then
|
|
|
|
hugepages_target="/proc/sys/vm/nr_hugepages"
|
|
|
|
else
|
|
|
|
hugepages_target="/sys/devices/system/node/node${HUGENODE}/hugepages/hugepages-${HUGEPGSZ}kB/nr_hugepages"
|
|
|
|
fi
|
|
|
|
|
|
|
|
echo "$NRHUGE" > "$hugepages_target"
|
2017-10-27 17:56:36 +00:00
|
|
|
allocated_hugepages=`cat $hugepages_target`
|
|
|
|
if [ "$allocated_hugepages" -lt "$NRHUGE" ]; then
|
|
|
|
echo ""
|
|
|
|
echo "## ERROR: requested $NRHUGE hugepages but only $allocated_hugepages could be allocated."
|
|
|
|
echo "## Memory might be heavily fragmented. Please try flushing the system cache, or reboot the machine."
|
|
|
|
exit 1
|
|
|
|
fi
|
2016-04-06 03:03:28 +00:00
|
|
|
|
|
|
|
if [ "$driver_name" = "vfio-pci" ]; then
|
2018-01-23 13:23:00 +00:00
|
|
|
if [ -n "$TARGET_USER" ]; then
|
2018-01-23 13:09:58 +00:00
|
|
|
for mount in $hugetlbfs_mounts; do
|
|
|
|
chown "$TARGET_USER" "$mount"
|
|
|
|
chmod g+w "$mount"
|
|
|
|
done
|
2016-11-29 21:26:42 +00:00
|
|
|
fi
|
2016-04-06 03:03:28 +00:00
|
|
|
|
|
|
|
MEMLOCK_AMNT=`ulimit -l`
|
|
|
|
if [ "$MEMLOCK_AMNT" != "unlimited" ] ; then
|
2016-11-08 14:21:15 +00:00
|
|
|
MEMLOCK_MB=$(( $MEMLOCK_AMNT / 1024 ))
|
2016-04-06 03:03:28 +00:00
|
|
|
echo ""
|
|
|
|
echo "Current user memlock limit: ${MEMLOCK_MB} MB"
|
|
|
|
echo ""
|
|
|
|
echo "This is the maximum amount of memory you will be"
|
|
|
|
echo "able to use with DPDK and VFIO if run as current user."
|
|
|
|
echo -n "To change this, please adjust limits.conf memlock "
|
|
|
|
echo "limit for current user."
|
|
|
|
|
|
|
|
if [ $MEMLOCK_AMNT -lt 65536 ] ; then
|
|
|
|
echo ""
|
|
|
|
echo "## WARNING: memlock limit is less than 64MB"
|
|
|
|
echo -n "## DPDK with VFIO may not be able to initialize "
|
|
|
|
echo "if run as current user."
|
|
|
|
fi
|
|
|
|
fi
|
|
|
|
fi
|
2016-02-19 21:11:08 +00:00
|
|
|
}
|
|
|
|
|
2017-11-14 20:05:47 +00:00
|
|
|
function reset_linux_pci {
|
2016-02-19 21:11:08 +00:00
|
|
|
# NVMe
|
2017-10-13 15:32:39 +00:00
|
|
|
set +e
|
2018-03-02 01:43:20 +00:00
|
|
|
check_for_driver nvme
|
2017-10-13 15:32:39 +00:00
|
|
|
driver_loaded=$?
|
|
|
|
set -e
|
2018-01-02 21:44:48 +00:00
|
|
|
for bdf in $(iter_pci_class_code 01 08 02); do
|
2019-01-31 14:11:36 +00:00
|
|
|
if ! pci_can_bind $bdf; then
|
2018-01-23 15:07:56 +00:00
|
|
|
echo "Skipping un-whitelisted NVMe controller $blkname ($bdf)"
|
|
|
|
continue
|
|
|
|
fi
|
2018-03-02 01:43:20 +00:00
|
|
|
if [ $driver_loaded -ne 0 ]; then
|
2017-10-13 15:32:39 +00:00
|
|
|
linux_bind_driver "$bdf" nvme
|
|
|
|
else
|
|
|
|
linux_unbind_driver "$bdf"
|
|
|
|
fi
|
2016-02-19 21:11:08 +00:00
|
|
|
done
|
|
|
|
|
|
|
|
# IOAT
|
|
|
|
TMP=`mktemp`
|
|
|
|
#collect all the device_id info of ioat devices.
|
2016-08-08 22:57:49 +00:00
|
|
|
grep "PCI_DEVICE_ID_INTEL_IOAT" $rootdir/include/spdk/pci_ids.h \
|
2016-02-19 21:11:08 +00:00
|
|
|
| awk -F"x" '{print $2}' > $TMP
|
|
|
|
|
2017-10-13 15:32:39 +00:00
|
|
|
set +e
|
2018-03-02 01:43:20 +00:00
|
|
|
check_for_driver ioatdma
|
2017-10-13 15:32:39 +00:00
|
|
|
driver_loaded=$?
|
|
|
|
set -e
|
2016-02-19 21:11:08 +00:00
|
|
|
for dev_id in `cat $TMP`; do
|
2018-01-02 21:44:48 +00:00
|
|
|
for bdf in $(iter_pci_dev_id 8086 $dev_id); do
|
2019-01-31 14:11:36 +00:00
|
|
|
if ! pci_can_bind $bdf; then
|
2018-01-23 15:07:56 +00:00
|
|
|
echo "Skipping un-whitelisted I/OAT device at $bdf"
|
|
|
|
continue
|
|
|
|
fi
|
2018-03-02 01:43:20 +00:00
|
|
|
if [ $driver_loaded -ne 0 ]; then
|
2017-10-13 15:32:39 +00:00
|
|
|
linux_bind_driver "$bdf" ioatdma
|
|
|
|
else
|
|
|
|
linux_unbind_driver "$bdf"
|
|
|
|
fi
|
2016-02-19 21:11:08 +00:00
|
|
|
done
|
|
|
|
done
|
|
|
|
rm $TMP
|
|
|
|
|
2017-12-27 15:22:48 +00:00
|
|
|
# virtio
|
2017-05-30 21:13:50 +00:00
|
|
|
TMP=`mktemp`
|
2017-12-27 15:22:48 +00:00
|
|
|
#collect all the device_id info of virtio devices.
|
|
|
|
grep "PCI_DEVICE_ID_VIRTIO" $rootdir/include/spdk/pci_ids.h \
|
2017-05-30 21:13:50 +00:00
|
|
|
| awk -F"x" '{print $2}' > $TMP
|
|
|
|
|
2017-10-13 15:32:39 +00:00
|
|
|
# TODO: check if virtio-pci is loaded first and just unbind if it is not loaded
|
|
|
|
# Requires some more investigation - for example, some kernels do not seem to have
|
|
|
|
# virtio-pci but just virtio_scsi instead. Also need to make sure we get the
|
|
|
|
# underscore vs. dash right in the virtio_scsi name.
|
2017-05-30 21:13:50 +00:00
|
|
|
modprobe virtio-pci || true
|
|
|
|
for dev_id in `cat $TMP`; do
|
2018-01-02 21:44:48 +00:00
|
|
|
for bdf in $(iter_pci_dev_id 1af4 $dev_id); do
|
2019-01-31 14:11:36 +00:00
|
|
|
if ! pci_can_bind $bdf; then
|
2018-01-23 15:07:56 +00:00
|
|
|
echo "Skipping un-whitelisted Virtio device at $bdf"
|
|
|
|
continue
|
|
|
|
fi
|
2017-05-30 21:13:50 +00:00
|
|
|
linux_bind_driver "$bdf" virtio-pci
|
|
|
|
done
|
|
|
|
done
|
|
|
|
rm $TMP
|
|
|
|
|
2016-02-19 21:11:08 +00:00
|
|
|
echo "1" > "/sys/bus/pci/rescan"
|
2017-11-14 20:05:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
function reset_linux {
|
2018-01-23 14:57:26 +00:00
|
|
|
reset_linux_pci
|
2018-01-23 13:09:58 +00:00
|
|
|
for mount in $(linux_hugetlbfs_mounts); do
|
|
|
|
rm -f "$mount"/spdk*map_*
|
|
|
|
done
|
2017-09-25 17:55:07 +00:00
|
|
|
rm -f /run/.spdk*
|
2016-02-19 21:11:08 +00:00
|
|
|
}
|
|
|
|
|
2017-01-30 13:38:44 +00:00
|
|
|
function status_linux {
|
2018-03-02 15:18:38 +00:00
|
|
|
echo "Hugepages"
|
|
|
|
printf "%-6s %10s %8s / %6s\n" "node" "hugesize" "free" "total"
|
|
|
|
|
|
|
|
numa_nodes=0
|
|
|
|
shopt -s nullglob
|
|
|
|
for path in /sys/devices/system/node/node?/hugepages/hugepages-*/; do
|
|
|
|
numa_nodes=$((numa_nodes + 1))
|
|
|
|
free_pages=`cat $path/free_hugepages`
|
|
|
|
all_pages=`cat $path/nr_hugepages`
|
|
|
|
|
|
|
|
[[ $path =~ (node[0-9]+)/hugepages/hugepages-([0-9]+kB) ]]
|
|
|
|
|
|
|
|
node=${BASH_REMATCH[1]}
|
|
|
|
huge_size=${BASH_REMATCH[2]}
|
|
|
|
|
|
|
|
printf "%-6s %10s %8s / %6s\n" $node $huge_size $free_pages $all_pages
|
|
|
|
done
|
|
|
|
shopt -u nullglob
|
|
|
|
|
|
|
|
# fall back to system-wide hugepages
|
|
|
|
if [ "$numa_nodes" = "0" ]; then
|
|
|
|
free_pages=`grep HugePages_Free /proc/meminfo | awk '{ print $2 }'`
|
|
|
|
all_pages=`grep HugePages_Total /proc/meminfo | awk '{ print $2 }'`
|
|
|
|
node="-"
|
|
|
|
huge_size="$HUGEPGSZ"
|
|
|
|
|
|
|
|
printf "%-6s %10s %8s / %6s\n" $node $huge_size $free_pages $all_pages
|
|
|
|
fi
|
|
|
|
|
2017-01-30 13:38:44 +00:00
|
|
|
echo "NVMe devices"
|
|
|
|
|
|
|
|
echo -e "BDF\t\tNuma Node\tDriver name\t\tDevice name"
|
2018-01-02 21:44:48 +00:00
|
|
|
for bdf in $(iter_pci_class_code 01 08 02); do
|
2017-01-30 13:38:44 +00:00
|
|
|
driver=`grep DRIVER /sys/bus/pci/devices/$bdf/uevent |awk -F"=" '{print $2}'`
|
|
|
|
node=`cat /sys/bus/pci/devices/$bdf/numa_node`;
|
2018-01-05 04:20:09 +00:00
|
|
|
if [ "$driver" = "nvme" -a -d /sys/bus/pci/devices/$bdf/nvme ]; then
|
2017-01-30 13:38:44 +00:00
|
|
|
name="\t"`ls /sys/bus/pci/devices/$bdf/nvme`;
|
|
|
|
else
|
|
|
|
name="-";
|
|
|
|
fi
|
|
|
|
echo -e "$bdf\t$node\t\t$driver\t\t$name";
|
|
|
|
done
|
|
|
|
|
|
|
|
echo "I/OAT DMA"
|
|
|
|
|
|
|
|
#collect all the device_id info of ioat devices.
|
|
|
|
TMP=`grep "PCI_DEVICE_ID_INTEL_IOAT" $rootdir/include/spdk/pci_ids.h \
|
|
|
|
| awk -F"x" '{print $2}'`
|
|
|
|
echo -e "BDF\t\tNuma Node\tDriver Name"
|
|
|
|
for dev_id in $TMP; do
|
2018-01-02 21:44:48 +00:00
|
|
|
for bdf in $(iter_pci_dev_id 8086 $dev_id); do
|
2017-01-30 13:38:44 +00:00
|
|
|
driver=`grep DRIVER /sys/bus/pci/devices/$bdf/uevent |awk -F"=" '{print $2}'`
|
|
|
|
node=`cat /sys/bus/pci/devices/$bdf/numa_node`;
|
|
|
|
echo -e "$bdf\t$node\t\t$driver"
|
|
|
|
done
|
|
|
|
done
|
2017-05-30 21:13:50 +00:00
|
|
|
|
|
|
|
echo "virtio"
|
|
|
|
|
2017-12-27 15:22:48 +00:00
|
|
|
#collect all the device_id info of virtio devices.
|
|
|
|
TMP=`grep "PCI_DEVICE_ID_VIRTIO" $rootdir/include/spdk/pci_ids.h \
|
2017-05-30 21:13:50 +00:00
|
|
|
| awk -F"x" '{print $2}'`
|
2018-01-05 04:03:38 +00:00
|
|
|
echo -e "BDF\t\tNuma Node\tDriver Name\t\tDevice Name"
|
2017-05-30 21:13:50 +00:00
|
|
|
for dev_id in $TMP; do
|
2018-01-02 21:44:48 +00:00
|
|
|
for bdf in $(iter_pci_dev_id 1af4 $dev_id); do
|
2017-05-30 21:13:50 +00:00
|
|
|
driver=`grep DRIVER /sys/bus/pci/devices/$bdf/uevent |awk -F"=" '{print $2}'`
|
|
|
|
node=`cat /sys/bus/pci/devices/$bdf/numa_node`;
|
2018-01-05 04:03:38 +00:00
|
|
|
blknames=''
|
|
|
|
get_virtio_names_from_bdf "$bdf" blknames
|
|
|
|
echo -e "$bdf\t$node\t\t$driver\t\t$blknames"
|
2017-05-30 21:13:50 +00:00
|
|
|
done
|
|
|
|
done
|
2017-01-30 13:38:44 +00:00
|
|
|
}
|
|
|
|
|
2017-11-14 20:05:47 +00:00
|
|
|
function configure_freebsd_pci {
|
2016-02-19 21:11:08 +00:00
|
|
|
TMP=`mktemp`
|
2016-08-08 22:57:49 +00:00
|
|
|
|
|
|
|
# NVMe
|
|
|
|
GREP_STR="class=0x010802"
|
|
|
|
|
|
|
|
# IOAT
|
|
|
|
grep "PCI_DEVICE_ID_INTEL_IOAT" $rootdir/include/spdk/pci_ids.h \
|
|
|
|
| awk -F"x" '{print $2}' > $TMP
|
|
|
|
for dev_id in `cat $TMP`; do
|
|
|
|
GREP_STR="${GREP_STR}\|chip=0x${dev_id}8086"
|
|
|
|
done
|
|
|
|
|
2016-02-19 21:11:08 +00:00
|
|
|
AWK_PROG="{if (count > 0) printf \",\"; printf \"%s:%s:%s\",\$2,\$3,\$4; count++}"
|
|
|
|
echo $AWK_PROG > $TMP
|
2016-08-08 22:57:49 +00:00
|
|
|
|
|
|
|
BDFS=`pciconf -l | grep "${GREP_STR}" | awk -F: -f $TMP`
|
|
|
|
|
2016-02-19 21:11:08 +00:00
|
|
|
kldunload nic_uio.ko || true
|
|
|
|
kenv hw.nic_uio.bdfs=$BDFS
|
|
|
|
kldload nic_uio.ko
|
|
|
|
rm $TMP
|
2017-11-14 20:05:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
function configure_freebsd {
|
2018-01-23 14:57:26 +00:00
|
|
|
configure_freebsd_pci
|
2018-05-23 15:48:46 +00:00
|
|
|
# If contigmem is already loaded but the HUGEMEM specified doesn't match the
|
|
|
|
# previous value, unload contigmem so that we can reload with the new value.
|
|
|
|
if kldstat -q -m contigmem; then
|
|
|
|
if [ `kenv hw.contigmem.num_buffers` -ne "$((HUGEMEM / 256))" ]; then
|
|
|
|
kldunload contigmem.ko
|
|
|
|
fi
|
|
|
|
fi
|
|
|
|
if ! kldstat -q -m contigmem; then
|
|
|
|
kenv hw.contigmem.num_buffers=$((HUGEMEM / 256))
|
|
|
|
kenv hw.contigmem.buffer_size=$((256 * 1024 * 1024))
|
|
|
|
kldload contigmem.ko
|
|
|
|
fi
|
2016-02-19 21:11:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
function reset_freebsd {
|
|
|
|
kldunload contigmem.ko || true
|
2018-01-23 14:57:26 +00:00
|
|
|
kldunload nic_uio.ko || true
|
2016-02-19 21:11:08 +00:00
|
|
|
}
|
|
|
|
|
2018-01-15 19:08:37 +00:00
|
|
|
mode=$1
|
2016-04-06 03:03:28 +00:00
|
|
|
|
2018-01-15 19:08:37 +00:00
|
|
|
if [ -z "$mode" ]; then
|
2016-02-19 21:11:08 +00:00
|
|
|
mode="config"
|
|
|
|
fi
|
|
|
|
|
2017-08-30 18:20:22 +00:00
|
|
|
: ${HUGEMEM:=2048}
|
2018-01-23 14:07:10 +00:00
|
|
|
: ${PCI_WHITELIST:=""}
|
2019-02-01 09:10:17 +00:00
|
|
|
: ${PCI_BLACKLIST:=""}
|
2018-01-23 14:07:10 +00:00
|
|
|
|
|
|
|
if [ -n "$NVME_WHITELIST" ]; then
|
|
|
|
PCI_WHITELIST="$PCI_WHITELIST $NVME_WHITELIST"
|
|
|
|
fi
|
|
|
|
|
2018-01-23 14:57:26 +00:00
|
|
|
if [ -n "$SKIP_PCI" ]; then
|
|
|
|
PCI_WHITELIST="none"
|
|
|
|
fi
|
|
|
|
|
2018-01-15 19:08:37 +00:00
|
|
|
if [ -z "$TARGET_USER" ]; then
|
|
|
|
TARGET_USER="$SUDO_USER"
|
|
|
|
if [ -z "$TARGET_USER" ]; then
|
|
|
|
TARGET_USER=`logname 2>/dev/null` || true
|
|
|
|
fi
|
|
|
|
fi
|
|
|
|
|
2016-02-19 21:11:08 +00:00
|
|
|
if [ `uname` = Linux ]; then
|
2017-10-27 17:26:58 +00:00
|
|
|
HUGEPGSZ=$(( `grep Hugepagesize /proc/meminfo | cut -d : -f 2 | tr -dc '0-9'` ))
|
|
|
|
HUGEPGSZ_MB=$(( $HUGEPGSZ / 1024 ))
|
|
|
|
: ${NRHUGE=$(( (HUGEMEM + HUGEPGSZ_MB - 1) / HUGEPGSZ_MB ))}
|
2017-08-30 18:20:22 +00:00
|
|
|
|
2016-02-19 21:11:08 +00:00
|
|
|
if [ "$mode" == "config" ]; then
|
|
|
|
configure_linux
|
2018-07-17 19:43:33 +00:00
|
|
|
elif [ "$mode" == "cleanup" ]; then
|
|
|
|
cleanup_linux
|
2016-02-19 21:11:08 +00:00
|
|
|
elif [ "$mode" == "reset" ]; then
|
|
|
|
reset_linux
|
2017-01-30 13:38:44 +00:00
|
|
|
elif [ "$mode" == "status" ]; then
|
|
|
|
status_linux
|
2018-01-15 18:49:30 +00:00
|
|
|
elif [ "$mode" == "help" ]; then
|
|
|
|
usage $0
|
|
|
|
else
|
|
|
|
usage $0 "Invalid argument '$mode'"
|
2016-02-19 21:11:08 +00:00
|
|
|
fi
|
|
|
|
else
|
|
|
|
if [ "$mode" == "config" ]; then
|
|
|
|
configure_freebsd
|
|
|
|
elif [ "$mode" == "reset" ]; then
|
|
|
|
reset_freebsd
|
2018-09-24 08:11:47 +00:00
|
|
|
elif [ "$mode" == "cleanup" ]; then
|
|
|
|
echo "setup.sh cleanup function not yet supported on $(uname)"
|
|
|
|
elif [ "$mode" == "status" ]; then
|
|
|
|
echo "setup.sh status function not yet supported on $(uname)"
|
2018-01-15 18:49:30 +00:00
|
|
|
elif [ "$mode" == "help" ]; then
|
|
|
|
usage $0
|
|
|
|
else
|
|
|
|
usage $0 "Invalid argument '$mode'"
|
2016-02-19 21:11:08 +00:00
|
|
|
fi
|
|
|
|
fi
|