Reset PCI pass through devices via PCI-e FLR during VM start and end.

Add routines to trigger a function level reset (FLR) of a PCI-express
device via the PCI-express device control register.  This also includes
support routines to wait for pending transactions to complete as well
as calculating the maximum completion timeout permitted by a device.

Change the ppt(4) driver to reset pass through devices before attaching
to a VM during startup and before detaching from a VM during shutdown.

Reviewed by:	imp, wblock (earlier version)
MFC after:	1 month
Sponsored by:	Chelsio Communications
Differential Revision:	https://reviews.freebsd.org/D7751
This commit is contained in:
John Baldwin 2016-09-06 21:15:35 +00:00
parent b90d9a3752
commit da0fc9250c
6 changed files with 296 additions and 4 deletions

View File

@ -1354,7 +1354,10 @@ MLINKS+=pci.9 pci_alloc_msi.9 \
pci.9 pci_set_max_read_req.9 \
pci.9 pci_write_config.9 \
pci.9 pcie_adjust_config.9 \
pci.9 pcie_flr.9 \
pci.9 pcie_max_completion_timeout.9 \
pci.9 pcie_read_config.9 \
pci.9 pcie_wait_for_pending_transactions.9 \
pci.9 pcie_write_config.9
MLINKS+=pci_iov_schema.9 pci_iov_schema_alloc_node.9 \
pci_iov_schema.9 pci_iov_schema_add_bool.9 \

View File

@ -66,7 +66,10 @@
.Nm pci_set_powerstate ,
.Nm pci_write_config ,
.Nm pcie_adjust_config ,
.Nm pcie_flr ,
.Nm pcie_get_max_completion_timeout ,
.Nm pcie_read_config ,
.Nm pcie_wait_for_pending_transactions ,
.Nm pcie_write_config
.Nd PCI bus interface
.Sh SYNOPSIS
@ -145,8 +148,14 @@
.Fa "uint32_t val"
.Fa "int width"
.Fc
.Ft bool
.Fn pcie_flr "device_t dev" "u_int max_delay" "bool force"
.Ft int
.Fn pcie_get_max_completion_timeout "device_t dev"
.Ft uint32_t
.Fn pcie_read_config "device_t dev" "int reg" "int width"
.Ft bool
.Fn pcie_wait_for_pending_transactions "device_t dev" "u_int max_delay"
.Ft void
.Fn pcie_write_config "device_t dev" "int reg" "uint32_t val" "int width"
.Ft void
@ -431,6 +440,51 @@ keyword,
then
.Fn pci_get_vpd_readonly
returns an error.
.Pp
The
.Fn pcie_get_max_completion_timeout
function returns the maximum completion timeout configured for the device
.Fa dev
in microseconds.
If the
.Fa dev
device is not a PCI-express device,
.Fn pcie_get_max_completion_timeout
returns zero.
When completion timeouts are disabled for
.Fa dev ,
this function returns the maxmimum timeout that would be used if timeouts
were enabled.
.Pp
The
.Fn pcie_wait_for_pending_transactions
function waits for any pending transactions initiated by the
.Fa dev
device to complete.
The function checks for pending transactions by polling the transactions
pending flag in the PCI-express device status register.
It returns
.Dv true
once the transaction pending flag is clear.
If transactions are still pending after
.Fa max_delay
milliseconds,
.Fn pcie_wait_for_pending_transactions
returns
.Dv false .
If
.Fa max_delay
is set to zero,
.Fn pcie_wait_for_pending_transactions
performs a single check;
otherwise,
this function may sleep while polling the transactions pending flag.
.Nm pcie_wait_for_pending_transactions
returns
.Dv true
if
.Fa dev
is not a PCI-express device.
.Ss Device Configuration
The
.Fn pci_enable_busmaster
@ -662,6 +716,51 @@ is invoked,
then the device will be transitioned to
.Dv PCI_POWERSTATE_D0
before any config registers are restored.
.Pp
The
.Fn pcie_flr
function requests a Function Level Reset
.Pq FLR
of
.Fa dev .
If
.Fa dev
is not a PCI-express device or does not support Function Level Resets via
the PCI-express device control register,
.Dv false
is returned.
Pending transactions are drained by disabling busmastering and calling
.Fn pcie_wait_for_pending_transactions
before resetting the device.
The
.Fa max_delay
argument specifies the maximum timeout to wait for pending transactions as
described for
.Fn pcie_wait_for_pending_transactions .
If
.Fn pcie_wait_for_pending_transactions
fails with a timeout and
.Fa force
is
.Dv false ,
busmastering is re-enabled and
.Dv false
is returned.
If
.Fn pcie_wait_for_pending_transactions
fails with a timeout and
.Fa force
is
.Dv true ,
the device is reset despite the timeout.
After the reset has been requested,
.Nm pcie_flr
sleeps for at least 100 milliseconds before returning
.Dv true .
Note that
.Nm pcie_flr
does not save and restore any state around the reset.
The caller should save and restore state as needed.
.Ss Message Signaled Interrupts
Message Signaled Interrupts
.Pq MSI

View File

@ -362,6 +362,11 @@ ppt_assign_device(struct vm *vm, int bus, int slot, int func)
if (ppt->vm != NULL && ppt->vm != vm)
return (EBUSY);
pci_save_state(ppt->dev);
pcie_flr(ppt->dev,
max(pcie_get_max_completion_timeout(ppt->dev) / 1000, 10),
true);
pci_restore_state(ppt->dev);
ppt->vm = vm;
iommu_remove_device(iommu_host_domain(), pci_get_rid(ppt->dev));
iommu_add_device(vm_iommu_domain(vm), pci_get_rid(ppt->dev));
@ -382,6 +387,12 @@ ppt_unassign_device(struct vm *vm, int bus, int slot, int func)
*/
if (ppt->vm != vm)
return (EBUSY);
pci_save_state(ppt->dev);
pcie_flr(ppt->dev,
max(pcie_get_max_completion_timeout(ppt->dev) / 1000, 10),
true);
pci_restore_state(ppt->dev);
ppt_unmap_mmio(vm, ppt);
ppt_teardown_msi(ppt);
ppt_teardown_msix(ppt);

View File

@ -5892,3 +5892,165 @@ pci_find_pcie_root_port(device_t dev)
dev = pcib;
}
}
/*
* Wait for pending transactions to complete on a PCI-express function.
*
* The maximum delay is specified in milliseconds in max_delay. Note
* that this function may sleep.
*
* Returns true if the function is idle and false if the timeout is
* exceeded. If dev is not a PCI-express function, this returns true.
*/
bool
pcie_wait_for_pending_transactions(device_t dev, u_int max_delay)
{
struct pci_devinfo *dinfo = device_get_ivars(dev);
uint16_t sta;
int cap;
cap = dinfo->cfg.pcie.pcie_location;
if (cap == 0)
return (true);
sta = pci_read_config(dev, cap + PCIER_DEVICE_STA, 2);
while (sta & PCIEM_STA_TRANSACTION_PND) {
if (max_delay == 0)
return (false);
/* Poll once every 100 milliseconds up to the timeout. */
if (max_delay > 100) {
pause_sbt("pcietp", 100 * SBT_1MS, 0, C_HARDCLOCK);
max_delay -= 100;
} else {
pause_sbt("pcietp", max_delay * SBT_1MS, 0,
C_HARDCLOCK);
max_delay = 0;
}
sta = pci_read_config(dev, cap + PCIER_DEVICE_STA, 2);
}
return (true);
}
/*
* Determine the maximum Completion Timeout in microseconds.
*
* For non-PCI-express functions this returns 0.
*/
int
pcie_get_max_completion_timeout(device_t dev)
{
struct pci_devinfo *dinfo = device_get_ivars(dev);
int cap;
cap = dinfo->cfg.pcie.pcie_location;
if (cap == 0)
return (0);
/*
* Functions using the 1.x spec use the default timeout range of
* 50 microseconds to 50 milliseconds. Functions that do not
* support programmable timeouts also use this range.
*/
if ((dinfo->cfg.pcie.pcie_flags & PCIEM_FLAGS_VERSION) < 2 ||
(pci_read_config(dev, cap + PCIER_DEVICE_CAP2, 4) &
PCIEM_CAP2_COMP_TIMO_RANGES) == 0)
return (50 * 1000);
switch (pci_read_config(dev, cap + PCIER_DEVICE_CTL2, 2) &
PCIEM_CTL2_COMP_TIMO_VAL) {
case PCIEM_CTL2_COMP_TIMO_100US:
return (100);
case PCIEM_CTL2_COMP_TIMO_10MS:
return (10 * 1000);
case PCIEM_CTL2_COMP_TIMO_55MS:
return (55 * 1000);
case PCIEM_CTL2_COMP_TIMO_210MS:
return (210 * 1000);
case PCIEM_CTL2_COMP_TIMO_900MS:
return (900 * 1000);
case PCIEM_CTL2_COMP_TIMO_3500MS:
return (3500 * 1000);
case PCIEM_CTL2_COMP_TIMO_13S:
return (13 * 1000 * 1000);
case PCIEM_CTL2_COMP_TIMO_64S:
return (64 * 1000 * 1000);
default:
return (50 * 1000);
}
}
/*
* Perform a Function Level Reset (FLR) on a device.
*
* This function first waits for any pending transactions to complete
* within the timeout specified by max_delay. If transactions are
* still pending, the function will return false without attempting a
* reset.
*
* If dev is not a PCI-express function or does not support FLR, this
* function returns false.
*
* Note that no registers are saved or restored. The caller is
* responsible for saving and restoring any registers including
* PCI-standard registers via pci_save_state() and
* pci_restore_state().
*/
bool
pcie_flr(device_t dev, u_int max_delay, bool force)
{
struct pci_devinfo *dinfo = device_get_ivars(dev);
uint16_t cmd, ctl;
int compl_delay;
int cap;
cap = dinfo->cfg.pcie.pcie_location;
if (cap == 0)
return (false);
if (!(pci_read_config(dev, cap + PCIER_DEVICE_CAP, 4) & PCIEM_CAP_FLR))
return (false);
/*
* Disable busmastering to prevent generation of new
* transactions while waiting for the device to go idle. If
* the idle timeout fails, the command register is restored
* which will re-enable busmastering.
*/
cmd = pci_read_config(dev, PCIR_COMMAND, 2);
pci_write_config(dev, PCIR_COMMAND, cmd & ~(PCIM_CMD_BUSMASTEREN), 2);
if (!pcie_wait_for_pending_transactions(dev, max_delay)) {
if (!force) {
pci_write_config(dev, PCIR_COMMAND, cmd, 2);
return (false);
}
pci_printf(&dinfo->cfg,
"Resetting with transactions pending after %d ms\n",
max_delay);
/*
* Extend the post-FLR delay to cover the maximum
* Completion Timeout delay of anything in flight
* during the FLR delay. Enforce a minimum delay of
* at least 10ms.
*/
compl_delay = pcie_get_max_completion_timeout(dev) / 1000;
if (compl_delay < 10)
compl_delay = 10;
} else
compl_delay = 0;
/* Initiate the reset. */
ctl = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
pci_write_config(dev, cap + PCIER_DEVICE_CTL, ctl |
PCIEM_CTL_INITIATE_FLR, 2);
/* Wait for 100ms. */
pause_sbt("pcieflr", (100 + compl_delay) * SBT_1MS, 0, C_HARDCLOCK);
if (pci_read_config(dev, cap + PCIER_DEVICE_STA, 2) &
PCIEM_STA_TRANSACTION_PND)
pci_printf(&dinfo->cfg, "Transactions pending after FLR!\n");
return (true);
}

View File

@ -885,10 +885,25 @@
#define PCIEM_ROOT_STA_PME_STATUS 0x00010000
#define PCIEM_ROOT_STA_PME_PEND 0x00020000
#define PCIER_DEVICE_CAP2 0x24
#define PCIEM_CAP2_ARI 0x20
#define PCIEM_CAP2_COMP_TIMO_RANGES 0x0000000f
#define PCIEM_CAP2_COMP_TIMO_RANGE_A 0x00000001
#define PCIEM_CAP2_COMP_TIMO_RANGE_B 0x00000002
#define PCIEM_CAP2_COMP_TIMO_RANGE_C 0x00000004
#define PCIEM_CAP2_COMP_TIMO_RANGE_D 0x00000008
#define PCIEM_CAP2_COMP_TIMO_DISABLE 0x00000010
#define PCIEM_CAP2_ARI 0x00000020
#define PCIER_DEVICE_CTL2 0x28
#define PCIEM_CTL2_COMP_TIMEOUT_VAL 0x000f
#define PCIEM_CTL2_COMP_TIMEOUT_DIS 0x0010
#define PCIEM_CTL2_COMP_TIMO_VAL 0x000f
#define PCIEM_CTL2_COMP_TIMO_50MS 0x0000
#define PCIEM_CTL2_COMP_TIMO_100US 0x0001
#define PCIEM_CTL2_COMP_TIMO_10MS 0x0002
#define PCIEM_CTL2_COMP_TIMO_55MS 0x0005
#define PCIEM_CTL2_COMP_TIMO_210MS 0x0006
#define PCIEM_CTL2_COMP_TIMO_900MS 0x0009
#define PCIEM_CTL2_COMP_TIMO_3500MS 0x000a
#define PCIEM_CTL2_COMP_TIMO_13S 0x000d
#define PCIEM_CTL2_COMP_TIMO_64S 0x000e
#define PCIEM_CTL2_COMP_TIMO_DISABLE 0x0010
#define PCIEM_CTL2_ARI 0x0020
#define PCIEM_CTL2_ATOMIC_REQ_ENABLE 0x0040
#define PCIEM_CTL2_ATOMIC_EGR_BLOCK 0x0080

View File

@ -595,7 +595,9 @@ uint32_t pcie_read_config(device_t dev, int reg, int width);
void pcie_write_config(device_t dev, int reg, uint32_t value, int width);
uint32_t pcie_adjust_config(device_t dev, int reg, uint32_t mask,
uint32_t value, int width);
bool pcie_flr(device_t dev, u_int max_delay, bool force);
int pcie_get_max_completion_timeout(device_t dev);
bool pcie_wait_for_pending_transactions(device_t dev, u_int max_delay);
#ifdef BUS_SPACE_MAXADDR
#if (BUS_SPACE_MAXADDR > 0xFFFFFFFF)