numam-spdk/scripts/setup.sh
Dayu Liu 5cdab5e2e6 doc: fix some typos and align indent when setup.sh --help
Signed-off-by: Dayu Liu <liu.dayu@zte.com.cn>
Change-Id: Id0a2a3a577adfd5026b662d97d74c9ec6aebeb8e
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/2396
Community-CI: Mellanox Build Bot
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
2020-05-18 10:14:56 +00:00

885 lines
24 KiB
Bash
Executable File

#!/usr/bin/env bash
set -e
os=$(uname -s)
if [[ $os != Linux && $os != FreeBSD ]]; then
echo "Not supported platform ($os), aborting"
exit 1
fi
rootdir=$(readlink -f $(dirname $0))/..
source "$rootdir/scripts/common.sh"
function usage() {
if [[ $os == Linux ]]; then
options="[config|reset|status|cleanup|help]"
else
options="[config|reset|help]"
fi
[[ -n $2 ]] && (
echo "$2"
echo ""
)
echo "Helper script for allocating hugepages and binding NVMe, I/OAT, VMD and Virtio devices"
echo "to a generic VFIO kernel driver. If VFIO is not available on the system, this script"
echo "will fall back to UIO. NVMe and Virtio devices with active mountpoints will be ignored."
echo "All hugepage operations use default hugepage size on the system (hugepagesz)."
echo "Usage: $(basename $1) $options"
echo
echo "$options - as following:"
echo "config Default mode. Allocate hugepages and bind PCI devices."
if [[ $os == Linux ]]; then
echo "cleanup Remove any orphaned files that can be left in the system after SPDK application exit"
fi
echo "reset Rebind PCI devices back to their original drivers."
echo " Also cleanup any leftover spdk files/resources."
echo " Hugepage memory size will remain unchanged."
if [[ $os == Linux ]]; then
echo "status Print status of all SPDK-compatible devices on the system."
fi
echo "help Print this help message."
echo
echo "The following environment variables can be specified."
echo "HUGEMEM Size of hugepage memory to allocate (in MB). 2048 by default."
echo " For NUMA systems, the hugepages will be evenly distributed"
echo " between CPU nodes"
echo "NRHUGE Number of hugepages to allocate. This variable overwrites HUGEMEM."
echo "HUGENODE Specific NUMA node to allocate hugepages on. To allocate"
echo " hugepages on multiple nodes run this script multiple times -"
echo " once for each node."
echo "PCI_WHITELIST"
echo "PCI_BLACKLIST Whitespace separated list of PCI devices (NVMe, I/OAT, VMD, Virtio)."
echo " Each device must be specified as a full PCI address."
echo " E.g. PCI_WHITELIST=\"0000:01:00.0 0000:02:00.0\""
echo " To blacklist all PCI devices use a non-valid address."
echo " E.g. PCI_WHITELIST=\"none\""
echo " If PCI_WHITELIST and PCI_BLACKLIST are empty or unset, all PCI devices"
echo " will be bound."
echo " Each device in PCI_BLACKLIST will be ignored (driver won't be changed)."
echo " PCI_BLACKLIST has precedence over PCI_WHITELIST."
echo "TARGET_USER User that will own hugepage mountpoint directory and vfio groups."
echo " By default the current user will be used."
echo "DRIVER_OVERRIDE Disable automatic vfio-pci/uio_pci_generic selection and forcefully"
echo " bind devices to the given driver."
echo " E.g. DRIVER_OVERRIDE=uio_pci_generic or DRIVER_OVERRIDE=/home/public/dpdk/build/kmod/igb_uio.ko"
exit 0
}
# In monolithic kernels the lsmod won't work. So
# back that with a /sys/modules. We also check
# /sys/bus/pci/drivers/ as neither lsmod nor /sys/modules might
# contain needed info (like in Fedora-like OS).
function check_for_driver() {
if lsmod | grep -q ${1//-/_}; then
return 1
fi
if [[ -d /sys/module/${1} || -d \
/sys/module/${1//-/_} || -d \
/sys/bus/pci/drivers/${1} || -d \
/sys/bus/pci/drivers/${1//-/_} ]]; then
return 2
fi
return 0
}
function pci_dev_echo() {
local bdf="$1"
local vendor
local device
vendor="$(cat /sys/bus/pci/devices/$bdf/vendor)"
device="$(cat /sys/bus/pci/devices/$bdf/device)"
shift
echo "$bdf (${vendor#0x} ${device#0x}): $*"
}
function linux_bind_driver() {
bdf="$1"
driver_name="$2"
old_driver_name="no driver"
ven_dev_id=$(lspci -n -s $bdf | cut -d' ' -f3 | sed 's/:/ /')
if [ -e "/sys/bus/pci/devices/$bdf/driver" ]; then
old_driver_name=$(basename $(readlink /sys/bus/pci/devices/$bdf/driver))
if [ "$driver_name" = "$old_driver_name" ]; then
pci_dev_echo "$bdf" "Already using the $old_driver_name driver"
return 0
fi
echo "$ven_dev_id" > "/sys/bus/pci/devices/$bdf/driver/remove_id" 2> /dev/null || true
echo "$bdf" > "/sys/bus/pci/devices/$bdf/driver/unbind"
fi
pci_dev_echo "$bdf" "$old_driver_name -> $driver_name"
echo "$ven_dev_id" > "/sys/bus/pci/drivers/$driver_name/new_id" 2> /dev/null || true
echo "$bdf" > "/sys/bus/pci/drivers/$driver_name/bind" 2> /dev/null || true
iommu_group=$(basename $(readlink -f /sys/bus/pci/devices/$bdf/iommu_group))
if [ -e "/dev/vfio/$iommu_group" ]; then
if [ -n "$TARGET_USER" ]; then
chown "$TARGET_USER" "/dev/vfio/$iommu_group"
fi
fi
}
function linux_unbind_driver() {
local bdf="$1"
local ven_dev_id
ven_dev_id=$(lspci -n -s $bdf | cut -d' ' -f3 | sed 's/:/ /')
local old_driver_name="no driver"
if [ -e "/sys/bus/pci/devices/$bdf/driver" ]; then
old_driver_name=$(basename $(readlink /sys/bus/pci/devices/$bdf/driver))
echo "$ven_dev_id" > "/sys/bus/pci/devices/$bdf/driver/remove_id" 2> /dev/null || true
echo "$bdf" > "/sys/bus/pci/devices/$bdf/driver/unbind"
fi
pci_dev_echo "$bdf" "$old_driver_name -> no driver"
}
function linux_hugetlbfs_mounts() {
mount | grep ' type hugetlbfs ' | awk '{ print $3 }'
}
function get_nvme_name_from_bdf() {
local blknames=()
set +e
nvme_devs=$(lsblk -d --output NAME | grep "^nvme")
set -e
for dev in $nvme_devs; do
link_name=$(readlink /sys/block/$dev/device/device) || true
if [ -z "$link_name" ]; then
link_name=$(readlink /sys/block/$dev/device)
fi
link_bdf=$(basename "$link_name")
if [ "$link_bdf" = "$1" ]; then
blknames+=($dev)
fi
done
printf '%s\n' "${blknames[@]}"
}
function get_virtio_names_from_bdf() {
blk_devs=$(lsblk --nodeps --output NAME)
virtio_names=()
for dev in $blk_devs; do
if readlink "/sys/block/$dev" | grep -q "$1"; then
virtio_names+=("$dev")
fi
done
eval "$2=( " "${virtio_names[@]}" " )"
}
function configure_linux_pci() {
local driver_path=""
driver_name=""
if [[ -n "${DRIVER_OVERRIDE}" ]]; then
driver_path="$DRIVER_OVERRIDE"
driver_name="${DRIVER_OVERRIDE##*/}"
# modprobe and the sysfs don't use the .ko suffix.
driver_name=${driver_name%.ko}
# path = name -> there is no path
if [[ "$driver_path" = "$driver_name" ]]; then
driver_path=""
fi
# igb_uio is a common driver to override with and it depends on uio.
if [[ "$driver_name" = "igb_uio" ]]; then
modprobe uio
fi
elif [[ -n "$(ls /sys/kernel/iommu_groups)" || (-e \
/sys/module/vfio/parameters/enable_unsafe_noiommu_mode && \
"$(cat /sys/module/vfio/parameters/enable_unsafe_noiommu_mode)" == "Y") ]]; then
driver_name=vfio-pci
elif modinfo uio_pci_generic > /dev/null 2>&1; then
driver_name=uio_pci_generic
elif [[ -r "$rootdir/dpdk/build/kmod/igb_uio.ko" ]]; then
driver_path="$rootdir/dpdk/build/kmod/igb_uio.ko"
driver_name="igb_uio"
modprobe uio
echo "WARNING: uio_pci_generic not detected - using $driver_name"
else
echo "No valid drivers found [vfio-pci, uio_pci_generic, igb_uio]. Please either enable the vfio-pci or uio_pci_generic"
echo "kernel modules, or have SPDK build the igb_uio driver by running ./configure --with-igb-uio-driver and recompiling."
return 1
fi
# modprobe assumes the directory of the module. If the user passes in a path, we should use insmod
if [[ -n "$driver_path" ]]; then
insmod $driver_path || true
else
modprobe $driver_name
fi
# NVMe
for bdf in ${pci_bus_cache["0x010802"]}; do
blknames=()
if ! pci_can_use $bdf; then
pci_dev_echo "$bdf" "Skipping un-whitelisted NVMe controller at $bdf"
continue
fi
mount=false
for blkname in $(get_nvme_name_from_bdf $bdf); do
mountpoints=$(lsblk /dev/$blkname --output MOUNTPOINT -n | wc -w)
if [ "$mountpoints" != "0" ]; then
mount=true
blknames+=($blkname)
fi
done
if ! $mount; then
linux_bind_driver "$bdf" "$driver_name"
else
for name in "${blknames[@]}"; do
pci_dev_echo "$bdf" "Active mountpoints on /dev/$name, so not binding PCI dev"
done
fi
done
# IOAT
TMP=$(mktemp)
#collect all the device_id info of ioat devices.
grep "PCI_DEVICE_ID_INTEL_IOAT" $rootdir/include/spdk/pci_ids.h \
| awk -F"x" '{print $2}' > $TMP
while IFS= read -r dev_id; do
for bdf in ${pci_bus_cache["0x8086:0x$dev_id"]}; do
if ! pci_can_use $bdf; then
pci_dev_echo "$bdf" "Skipping un-whitelisted I/OAT device"
continue
fi
linux_bind_driver "$bdf" "$driver_name"
done
done < $TMP
rm $TMP
# IDXD
TMP=$(mktemp)
#collect all the device_id info of idxd devices.
grep "PCI_DEVICE_ID_INTEL_IDXD" $rootdir/include/spdk/pci_ids.h \
| awk -F"x" '{print $2}' > $TMP
while IFS= read -r dev_id; do
for bdf in ${pci_bus_cache["0x8086:0x$dev_id"]}; do
if ! pci_can_use $bdf; then
pci_dev_echo "$bdf" "Skipping un-whitelisted IDXD device"
continue
fi
linux_bind_driver "$bdf" "$driver_name"
done
done < $TMP
rm $TMP
# virtio
TMP=$(mktemp)
#collect all the device_id info of virtio devices.
grep "PCI_DEVICE_ID_VIRTIO" $rootdir/include/spdk/pci_ids.h \
| awk -F"x" '{print $2}' > $TMP
while IFS= read -r dev_id; do
for bdf in ${pci_bus_cache["0x1af4:0x$dev_id"]}; do
if ! pci_can_use $bdf; then
pci_dev_echo "$bdf" "Skipping un-whitelisted Virtio device at $bdf"
continue
fi
blknames=()
get_virtio_names_from_bdf "$bdf" blknames
for blkname in "${blknames[@]}"; do
if [ "$(lsblk /dev/$blkname --output MOUNTPOINT -n | wc -w)" != "0" ]; then
pci_dev_echo "$bdf" "Active mountpoints on /dev/$blkname, so not binding"
continue 2
fi
done
linux_bind_driver "$bdf" "$driver_name"
done
done < $TMP
rm $TMP
# VMD
TMP=$(mktemp)
#collect all the device_id info of vmd devices.
grep "PCI_DEVICE_ID_INTEL_VMD" $rootdir/include/spdk/pci_ids.h \
| awk -F"x" '{print $2}' > $TMP
while IFS= read -r dev_id; do
for bdf in ${pci_bus_cache["0x8086:0x$dev_id"]}; do
if [[ -z "$PCI_WHITELIST" ]] || ! pci_can_use $bdf; then
echo "Skipping un-whitelisted VMD device at $bdf"
continue
fi
linux_bind_driver "$bdf" "$driver_name"
echo " VMD generic kdrv: " "$bdf" "$driver_name"
done
done < $TMP
rm $TMP
echo "1" > "/sys/bus/pci/rescan"
}
function cleanup_linux() {
shopt -s extglob nullglob
dirs_to_clean=""
dirs_to_clean="$(echo {/var/run,/tmp}/dpdk/spdk{,_pid}+([0-9])) "
if [[ -d $XDG_RUNTIME_DIR && $XDG_RUNTIME_DIR != *" "* ]]; then
dirs_to_clean+="$(readlink -e assert_not_empty $XDG_RUNTIME_DIR/dpdk/spdk{,_pid}+([0-9]) || true) "
fi
files_to_clean=""
for dir in $dirs_to_clean; do
files_to_clean+="$(echo $dir/*) "
done
shopt -u extglob nullglob
files_to_clean+="$(ls -1 /dev/shm/* \
| grep -E '(spdk_tgt|iscsi|vhost|nvmf|rocksdb|bdevio|bdevperf|vhost_fuzz|nvme_fuzz)_trace|spdk_iscsi_conns' || true) "
files_to_clean="$(readlink -e assert_not_empty $files_to_clean || true)"
if [[ -z "$files_to_clean" ]]; then
echo "Clean"
return 0
fi
shopt -s extglob
for fd_dir in $(echo /proc/+([0-9])); do
opened_files+="$(readlink -e assert_not_empty $fd_dir/fd/* || true)"
done
shopt -u extglob
if [[ -z "$opened_files" ]]; then
echo "Can't get list of opened files!"
exit 1
fi
echo 'Cleaning'
for f in $files_to_clean; do
if ! echo "$opened_files" | grep -E -q "^$f\$"; then
echo "Removing: $f"
rm $f
else
echo "Still open: $f"
fi
done
for dir in $dirs_to_clean; do
if ! echo "$opened_files" | grep -E -q "^$dir\$"; then
echo "Removing: $dir"
rmdir $dir
else
echo "Still open: $dir"
fi
done
echo "Clean"
unset dirs_to_clean files_to_clean opened_files
}
function configure_linux() {
configure_linux_pci
hugetlbfs_mounts=$(linux_hugetlbfs_mounts)
if [ -z "$hugetlbfs_mounts" ]; then
hugetlbfs_mounts=/mnt/huge
echo "Mounting hugetlbfs at $hugetlbfs_mounts"
mkdir -p "$hugetlbfs_mounts"
mount -t hugetlbfs nodev "$hugetlbfs_mounts"
fi
if [ -z "$HUGENODE" ]; then
hugepages_target="/proc/sys/vm/nr_hugepages"
else
hugepages_target="/sys/devices/system/node/node${HUGENODE}/hugepages/hugepages-${HUGEPGSZ}kB/nr_hugepages"
fi
echo "$NRHUGE" > "$hugepages_target"
allocated_hugepages=$(cat $hugepages_target)
if [ "$allocated_hugepages" -lt "$NRHUGE" ]; then
echo ""
echo "## ERROR: requested $NRHUGE hugepages but only $allocated_hugepages could be allocated."
echo "## Memory might be heavily fragmented. Please try flushing the system cache, or reboot the machine."
exit 1
fi
if [ "$driver_name" = "vfio-pci" ]; then
if [ -n "$TARGET_USER" ]; then
for mount in $hugetlbfs_mounts; do
chown "$TARGET_USER" "$mount"
chmod g+w "$mount"
done
MEMLOCK_AMNT=$(su "$TARGET_USER" -c "ulimit -l")
if [[ $MEMLOCK_AMNT != "unlimited" ]]; then
MEMLOCK_MB=$((MEMLOCK_AMNT / 1024))
cat <<- MEMLOCK
"$TARGET_USER" user memlock limit: $MEMLOCK_MB MB
This is the maximum amount of memory you will be
able to use with DPDK and VFIO if run as user "$TARGET_USER".
To change this, please adjust limits.conf memlock limit for user "$TARGET_USER".
MEMLOCK
if ((MEMLOCK_AMNT < 65536)); then
echo ""
echo "## WARNING: memlock limit is less than 64MB"
echo -n "## DPDK with VFIO may not be able to initialize "
echo "if run as user \"$TARGET_USER\"."
fi
fi
fi
fi
if [ ! -f /dev/cpu/0/msr ]; then
# Some distros build msr as a module. Make sure it's loaded to ensure
# DPDK can easily figure out the TSC rate rather than relying on 100ms
# sleeps.
modprobe msr || true
fi
}
function reset_linux_pci() {
# NVMe
set +e
check_for_driver nvme
driver_loaded=$?
set -e
for bdf in ${pci_bus_cache["0x010802"]}; do
if ! pci_can_use $bdf; then
pci_dev_echo "$bdf" "Skipping un-whitelisted NVMe controller $blkname"
continue
fi
if [ $driver_loaded -ne 0 ]; then
linux_bind_driver "$bdf" nvme
else
linux_unbind_driver "$bdf"
fi
done
# IOAT
TMP=$(mktemp)
#collect all the device_id info of ioat devices.
grep "PCI_DEVICE_ID_INTEL_IOAT" $rootdir/include/spdk/pci_ids.h \
| awk -F"x" '{print $2}' > $TMP
set +e
check_for_driver ioatdma
driver_loaded=$?
set -e
while IFS= read -r dev_id; do
for bdf in ${pci_bus_cache["0x8086:0x$dev_id"]}; do
if ! pci_can_use $bdf; then
pci_dev_echo "$bdf" "Skipping un-whitelisted I/OAT device"
continue
fi
if [ $driver_loaded -ne 0 ]; then
linux_bind_driver "$bdf" ioatdma
else
linux_unbind_driver "$bdf"
fi
done
done < $TMP
rm $TMP
# IDXD
TMP=$(mktemp)
#collect all the device_id info of idxd devices.
grep "PCI_DEVICE_ID_INTEL_IDXD" $rootdir/include/spdk/pci_ids.h \
| awk -F"x" '{print $2}' > $TMP
set +e
check_for_driver idxd
driver_loaded=$?
set -e
while IFS= read -r dev_id; do
for bdf in ${pci_bus_cache["0x8086:0x$dev_id"]}; do
if ! pci_can_use $bdf; then
pci_dev_echo "$bdf" "Skipping un-whitelisted IDXD device"
continue
fi
if [ $driver_loaded -ne 0 ]; then
linux_bind_driver "$bdf" idxd
else
linux_unbind_driver "$bdf"
fi
done
done < $TMP
rm $TMP
# virtio
TMP=$(mktemp)
#collect all the device_id info of virtio devices.
grep "PCI_DEVICE_ID_VIRTIO" $rootdir/include/spdk/pci_ids.h \
| awk -F"x" '{print $2}' > $TMP
# TODO: check if virtio-pci is loaded first and just unbind if it is not loaded
# Requires some more investigation - for example, some kernels do not seem to have
# virtio-pci but just virtio_scsi instead. Also need to make sure we get the
# underscore vs. dash right in the virtio_scsi name.
modprobe virtio-pci || true
while IFS= read -r dev_id; do
for bdf in ${pci_bus_cache["0x1af4:0x$dev_id"]}; do
if ! pci_can_use $bdf; then
pci_dev_echo "$bdf" "Skipping un-whitelisted Virtio device at"
continue
fi
linux_bind_driver "$bdf" virtio-pci
done
done < $TMP
rm $TMP
# VMD
TMP=$(mktemp)
#collect all the device_id info of vmd devices.
grep "PCI_DEVICE_ID_INTEL_VMD" $rootdir/include/spdk/pci_ids.h \
| awk -F"x" '{print $2}' > $TMP
set +e
check_for_driver vmd
driver_loaded=$?
set -e
while IFS= read -r dev_id; do
for bdf in ${pci_bus_cache["0x8086:0x$dev_id"]}; do
if ! pci_can_use $bdf; then
echo "Skipping un-whitelisted VMD device at $bdf"
continue
fi
if [ $driver_loaded -ne 0 ]; then
linux_bind_driver "$bdf" vmd
else
linux_unbind_driver "$bdf"
fi
done
done < $TMP
rm $TMP
echo "1" > "/sys/bus/pci/rescan"
}
function reset_linux() {
reset_linux_pci
for mount in $(linux_hugetlbfs_mounts); do
rm -f "$mount"/spdk*map_*
done
rm -f /run/.spdk*
}
function status_linux() {
echo "Hugepages"
printf "%-6s %10s %8s / %6s\n" "node" "hugesize" "free" "total"
numa_nodes=0
shopt -s nullglob
for path in /sys/devices/system/node/node*/hugepages/hugepages-*/; do
numa_nodes=$((numa_nodes + 1))
free_pages=$(cat $path/free_hugepages)
all_pages=$(cat $path/nr_hugepages)
[[ $path =~ (node[0-9]+)/hugepages/hugepages-([0-9]+kB) ]]
node=${BASH_REMATCH[1]}
huge_size=${BASH_REMATCH[2]}
printf "%-6s %10s %8s / %6s\n" $node $huge_size $free_pages $all_pages
done
shopt -u nullglob
# fall back to system-wide hugepages
if [ "$numa_nodes" = "0" ]; then
free_pages=$(grep HugePages_Free /proc/meminfo | awk '{ print $2 }')
all_pages=$(grep HugePages_Total /proc/meminfo | awk '{ print $2 }')
node="-"
huge_size="$HUGEPGSZ"
printf "%-6s %10s %8s / %6s\n" $node $huge_size $free_pages $all_pages
fi
echo ""
echo "NVMe devices"
echo -e "BDF\t\tVendor\tDevice\tNUMA\tDriver\t\tDevice name"
for bdf in ${pci_bus_cache["0x010802"]}; do
driver=$(grep DRIVER /sys/bus/pci/devices/$bdf/uevent | awk -F"=" '{print $2}')
if [ "$numa_nodes" = "0" ]; then
node="-"
else
node=$(cat /sys/bus/pci/devices/$bdf/numa_node)
if ((node == -1)); then
node=unknown
fi
fi
device=$(cat /sys/bus/pci/devices/$bdf/device)
vendor=$(cat /sys/bus/pci/devices/$bdf/vendor)
if [ "$driver" = "nvme" ] && [ -d /sys/bus/pci/devices/$bdf/nvme ]; then
name="\t"$(ls /sys/bus/pci/devices/$bdf/nvme)
else
name="-"
fi
echo -e "$bdf\t${vendor#0x}\t${device#0x}\t$node\t${driver:--}\t\t$name"
done
echo ""
echo "I/OAT DMA"
#collect all the device_id info of ioat devices.
TMP=$(grep "PCI_DEVICE_ID_INTEL_IOAT" $rootdir/include/spdk/pci_ids.h \
| awk -F"x" '{print $2}')
echo -e "BDF\t\tVendor\tDevice\tNUMA\tDriver"
for dev_id in $TMP; do
for bdf in ${pci_bus_cache["0x8086:0x$dev_id"]}; do
driver=$(grep DRIVER /sys/bus/pci/devices/$bdf/uevent | awk -F"=" '{print $2}')
if [ "$numa_nodes" = "0" ]; then
node="-"
else
node=$(cat /sys/bus/pci/devices/$bdf/numa_node)
if ((node == -1)); then
node=unknown
fi
fi
device=$(cat /sys/bus/pci/devices/$bdf/device)
vendor=$(cat /sys/bus/pci/devices/$bdf/vendor)
echo -e "$bdf\t${vendor#0x}\t${device#0x}\t$node\t${driver:--}"
done
done
echo ""
echo "IDXD DMA"
#collect all the device_id info of idxd devices.
TMP=$(grep "PCI_DEVICE_ID_INTEL_IDXD" $rootdir/include/spdk/pci_ids.h \
| awk -F"x" '{print $2}')
echo -e "BDF\t\tVendor\tDevice\tNUMA\tDriver"
for dev_id in $TMP; do
for bdf in ${pci_bus_cache["0x8086:0x$dev_id"]}; do
driver=$(grep DRIVER /sys/bus/pci/devices/$bdf/uevent | awk -F"=" '{print $2}')
if [ "$numa_nodes" = "0" ]; then
node="-"
else
node=$(cat /sys/bus/pci/devices/$bdf/numa_node)
fi
device=$(cat /sys/bus/pci/devices/$bdf/device)
vendor=$(cat /sys/bus/pci/devices/$bdf/vendor)
echo -e "$bdf\t${vendor#0x}\t${device#0x}\t$node\t${driver:--}"
done
done
echo ""
echo "virtio"
#collect all the device_id info of virtio devices.
TMP=$(grep "PCI_DEVICE_ID_VIRTIO" $rootdir/include/spdk/pci_ids.h \
| awk -F"x" '{print $2}')
echo -e "BDF\t\tVendor\tDevice\tNUMA\tDriver\t\tDevice name"
for dev_id in $TMP; do
for bdf in ${pci_bus_cache["0x1af4:0x$dev_id"]}; do
driver=$(grep DRIVER /sys/bus/pci/devices/$bdf/uevent | awk -F"=" '{print $2}')
if [ "$numa_nodes" = "0" ]; then
node="-"
else
node=$(cat /sys/bus/pci/devices/$bdf/numa_node)
if ((node == -1)); then
node=unknown
fi
fi
device=$(cat /sys/bus/pci/devices/$bdf/device)
vendor=$(cat /sys/bus/pci/devices/$bdf/vendor)
blknames=()
get_virtio_names_from_bdf "$bdf" blknames
echo -e "$bdf\t${vendor#0x}\t${device#0x}\t$node\t\t${driver:--}\t\t" "${blknames[@]}"
done
done
echo "VMD"
#collect all the device_id info of vmd devices.
TMP=$(grep "PCI_DEVICE_ID_INTEL_VMD" $rootdir/include/spdk/pci_ids.h \
| awk -F"x" '{print $2}')
echo -e "BDF\t\tNuma Node\tDriver Name"
for dev_id in $TMP; do
for bdf in ${pci_bus_cache["0x8086:0x$dev_id"]}; do
driver=$(grep DRIVER /sys/bus/pci/devices/$bdf/uevent | awk -F"=" '{print $2}')
node=$(cat /sys/bus/pci/devices/$bdf/numa_node)
if ((node == -1)); then
node=unknown
fi
echo -e "$bdf\t$node\t\t$driver"
done
done
}
function status_freebsd() {
local id pci
local ioat idxd vmd
status_print() (
local dev driver
echo -e "BDF\t\tVendor\tDevice\tDriver"
for id; do
for pci in ${pci_bus_cache["$id"]}; do
driver=$(pciconf -l "pci$pci")
driver=${driver%@*}
printf '%s\t%s\t%s\t%s\n' \
"$pci" \
"${pci_ids_vendor["$pci"]}" \
"${pci_ids_device["$pci"]}" \
"$driver"
done
done
)
devs=PCI_DEVICE_ID_INTEL_IOAT
devs+="|PCI_DEVICE_ID_INTEL_IDXD"
devs+="|PCI_DEVICE_ID_INTEL_VMD"
local dev_type dev_id
while read -r _ dev_type dev_id; do
case "$dev_type" in
*IOAT*) ioat+=("0x8086:$dev_id") ;;
*IDXD*) idxd+=("0x8086:$dev_id") ;;
*VMD*) vmd+=("0x8086:$dev_id") ;;
esac
done < <(grep -E "$devs" "$rootdir/include/spdk/pci_ids.h")
local contigmem=present
if ! kldstat -q -m contigmem; then
contigmem="not present"
fi
cat <<- BSD_INFO
Contigmem ($contigmem)
Buffer Size: $(kenv hw.contigmem.buffer_size)
Num Buffers: $(kenv hw.contigmem.num_buffers)
NVMe devices
$(status_print 0x010802)
I/IOAT DMA
$(status_print "${ioat[@]}")
IDXD DMA
$(status_print "${idxd[@]}")
VMD
$(status_print "${vmd[@]}")
BSD_INFO
}
function configure_freebsd_pci() {
local devs ids id
local BDFS
devs=PCI_DEVICE_ID_INTEL_IOAT
devs+="|PCI_DEVICE_ID_INTEL_IDXD"
devs+="|PCI_DEVICE_ID_INTEL_VMD"
ids=($(grep -E "$devs" "$rootdir/include/spdk/pci_ids.h" | awk '{print $3}'))
if [[ -n ${pci_bus_cache["0x010802"]} ]]; then
BDFS+=(${pci_bus_cache["0x010802"]})
fi
for id in "${ids[@]}"; do
[[ -n ${pci_bus_cache["0x8086:$id"]} ]] || continue
BDFS+=(${pci_bus_cache["0x8086:$id"]})
done
# Drop the domain part from all the addresses
BDFS=("${BDFS[@]#*:}")
local IFS=","
kldunload nic_uio.ko || true
kenv hw.nic_uio.bdfs="${BDFS[*]}"
kldload nic_uio.ko
}
function configure_freebsd() {
configure_freebsd_pci
# If contigmem is already loaded but the HUGEMEM specified doesn't match the
# previous value, unload contigmem so that we can reload with the new value.
if kldstat -q -m contigmem; then
if [ $(kenv hw.contigmem.num_buffers) -ne "$((HUGEMEM / 256))" ]; then
kldunload contigmem.ko
fi
fi
if ! kldstat -q -m contigmem; then
kenv hw.contigmem.num_buffers=$((HUGEMEM / 256))
kenv hw.contigmem.buffer_size=$((256 * 1024 * 1024))
kldload contigmem.ko
fi
}
function reset_freebsd() {
kldunload contigmem.ko || true
kldunload nic_uio.ko || true
}
CMD=reset cache_pci_bus
mode=$1
if [ -z "$mode" ]; then
mode="config"
fi
: ${HUGEMEM:=2048}
: ${PCI_WHITELIST:=""}
: ${PCI_BLACKLIST:=""}
if [ -n "$NVME_WHITELIST" ]; then
PCI_WHITELIST="$PCI_WHITELIST $NVME_WHITELIST"
fi
if [ -n "$SKIP_PCI" ]; then
PCI_WHITELIST="none"
fi
if [ -z "$TARGET_USER" ]; then
TARGET_USER="$SUDO_USER"
if [ -z "$TARGET_USER" ]; then
TARGET_USER=$(logname 2> /dev/null) || true
fi
fi
if [[ $os == Linux ]]; then
HUGEPGSZ=$(($(grep Hugepagesize /proc/meminfo | cut -d : -f 2 | tr -dc '0-9')))
HUGEPGSZ_MB=$((HUGEPGSZ / 1024))
: ${NRHUGE=$(((HUGEMEM + HUGEPGSZ_MB - 1) / HUGEPGSZ_MB))}
if [ "$mode" == "config" ]; then
configure_linux
elif [ "$mode" == "cleanup" ]; then
cleanup_linux
elif [ "$mode" == "reset" ]; then
reset_linux
elif [ "$mode" == "status" ]; then
status_linux
elif [ "$mode" == "help" ]; then
usage $0
else
usage $0 "Invalid argument '$mode'"
fi
else
if [ "$mode" == "config" ]; then
configure_freebsd
elif [ "$mode" == "reset" ]; then
reset_freebsd
elif [ "$mode" == "cleanup" ]; then
echo "setup.sh cleanup function not yet supported on $os"
elif [ "$mode" == "status" ]; then
status_freebsd
elif [ "$mode" == "help" ]; then
usage $0
else
usage $0 "Invalid argument '$mode'"
fi
fi