Reset PCI pass through devices via PCI-e FLR during VM start and end.
Add routines to trigger a function level reset (FLR) of a PCI-express device via the PCI-express device control register. This also includes support routines to wait for pending transactions to complete as well as calculating the maximum completion timeout permitted by a device. Change the ppt(4) driver to reset pass through devices before attaching to a VM during startup and before detaching from a VM during shutdown. Reviewed by: imp, wblock (earlier version) MFC after: 1 month Sponsored by: Chelsio Communications Differential Revision: https://reviews.freebsd.org/D7751
This commit is contained in:
parent
b90d9a3752
commit
da0fc9250c
@ -1354,7 +1354,10 @@ MLINKS+=pci.9 pci_alloc_msi.9 \
|
||||
pci.9 pci_set_max_read_req.9 \
|
||||
pci.9 pci_write_config.9 \
|
||||
pci.9 pcie_adjust_config.9 \
|
||||
pci.9 pcie_flr.9 \
|
||||
pci.9 pcie_max_completion_timeout.9 \
|
||||
pci.9 pcie_read_config.9 \
|
||||
pci.9 pcie_wait_for_pending_transactions.9 \
|
||||
pci.9 pcie_write_config.9
|
||||
MLINKS+=pci_iov_schema.9 pci_iov_schema_alloc_node.9 \
|
||||
pci_iov_schema.9 pci_iov_schema_add_bool.9 \
|
||||
|
@ -66,7 +66,10 @@
|
||||
.Nm pci_set_powerstate ,
|
||||
.Nm pci_write_config ,
|
||||
.Nm pcie_adjust_config ,
|
||||
.Nm pcie_flr ,
|
||||
.Nm pcie_get_max_completion_timeout ,
|
||||
.Nm pcie_read_config ,
|
||||
.Nm pcie_wait_for_pending_transactions ,
|
||||
.Nm pcie_write_config
|
||||
.Nd PCI bus interface
|
||||
.Sh SYNOPSIS
|
||||
@ -145,8 +148,14 @@
|
||||
.Fa "uint32_t val"
|
||||
.Fa "int width"
|
||||
.Fc
|
||||
.Ft bool
|
||||
.Fn pcie_flr "device_t dev" "u_int max_delay" "bool force"
|
||||
.Ft int
|
||||
.Fn pcie_get_max_completion_timeout "device_t dev"
|
||||
.Ft uint32_t
|
||||
.Fn pcie_read_config "device_t dev" "int reg" "int width"
|
||||
.Ft bool
|
||||
.Fn pcie_wait_for_pending_transactions "device_t dev" "u_int max_delay"
|
||||
.Ft void
|
||||
.Fn pcie_write_config "device_t dev" "int reg" "uint32_t val" "int width"
|
||||
.Ft void
|
||||
@ -431,6 +440,51 @@ keyword,
|
||||
then
|
||||
.Fn pci_get_vpd_readonly
|
||||
returns an error.
|
||||
.Pp
|
||||
The
|
||||
.Fn pcie_get_max_completion_timeout
|
||||
function returns the maximum completion timeout configured for the device
|
||||
.Fa dev
|
||||
in microseconds.
|
||||
If the
|
||||
.Fa dev
|
||||
device is not a PCI-express device,
|
||||
.Fn pcie_get_max_completion_timeout
|
||||
returns zero.
|
||||
When completion timeouts are disabled for
|
||||
.Fa dev ,
|
||||
this function returns the maxmimum timeout that would be used if timeouts
|
||||
were enabled.
|
||||
.Pp
|
||||
The
|
||||
.Fn pcie_wait_for_pending_transactions
|
||||
function waits for any pending transactions initiated by the
|
||||
.Fa dev
|
||||
device to complete.
|
||||
The function checks for pending transactions by polling the transactions
|
||||
pending flag in the PCI-express device status register.
|
||||
It returns
|
||||
.Dv true
|
||||
once the transaction pending flag is clear.
|
||||
If transactions are still pending after
|
||||
.Fa max_delay
|
||||
milliseconds,
|
||||
.Fn pcie_wait_for_pending_transactions
|
||||
returns
|
||||
.Dv false .
|
||||
If
|
||||
.Fa max_delay
|
||||
is set to zero,
|
||||
.Fn pcie_wait_for_pending_transactions
|
||||
performs a single check;
|
||||
otherwise,
|
||||
this function may sleep while polling the transactions pending flag.
|
||||
.Nm pcie_wait_for_pending_transactions
|
||||
returns
|
||||
.Dv true
|
||||
if
|
||||
.Fa dev
|
||||
is not a PCI-express device.
|
||||
.Ss Device Configuration
|
||||
The
|
||||
.Fn pci_enable_busmaster
|
||||
@ -662,6 +716,51 @@ is invoked,
|
||||
then the device will be transitioned to
|
||||
.Dv PCI_POWERSTATE_D0
|
||||
before any config registers are restored.
|
||||
.Pp
|
||||
The
|
||||
.Fn pcie_flr
|
||||
function requests a Function Level Reset
|
||||
.Pq FLR
|
||||
of
|
||||
.Fa dev .
|
||||
If
|
||||
.Fa dev
|
||||
is not a PCI-express device or does not support Function Level Resets via
|
||||
the PCI-express device control register,
|
||||
.Dv false
|
||||
is returned.
|
||||
Pending transactions are drained by disabling busmastering and calling
|
||||
.Fn pcie_wait_for_pending_transactions
|
||||
before resetting the device.
|
||||
The
|
||||
.Fa max_delay
|
||||
argument specifies the maximum timeout to wait for pending transactions as
|
||||
described for
|
||||
.Fn pcie_wait_for_pending_transactions .
|
||||
If
|
||||
.Fn pcie_wait_for_pending_transactions
|
||||
fails with a timeout and
|
||||
.Fa force
|
||||
is
|
||||
.Dv false ,
|
||||
busmastering is re-enabled and
|
||||
.Dv false
|
||||
is returned.
|
||||
If
|
||||
.Fn pcie_wait_for_pending_transactions
|
||||
fails with a timeout and
|
||||
.Fa force
|
||||
is
|
||||
.Dv true ,
|
||||
the device is reset despite the timeout.
|
||||
After the reset has been requested,
|
||||
.Nm pcie_flr
|
||||
sleeps for at least 100 milliseconds before returning
|
||||
.Dv true .
|
||||
Note that
|
||||
.Nm pcie_flr
|
||||
does not save and restore any state around the reset.
|
||||
The caller should save and restore state as needed.
|
||||
.Ss Message Signaled Interrupts
|
||||
Message Signaled Interrupts
|
||||
.Pq MSI
|
||||
|
@ -362,6 +362,11 @@ ppt_assign_device(struct vm *vm, int bus, int slot, int func)
|
||||
if (ppt->vm != NULL && ppt->vm != vm)
|
||||
return (EBUSY);
|
||||
|
||||
pci_save_state(ppt->dev);
|
||||
pcie_flr(ppt->dev,
|
||||
max(pcie_get_max_completion_timeout(ppt->dev) / 1000, 10),
|
||||
true);
|
||||
pci_restore_state(ppt->dev);
|
||||
ppt->vm = vm;
|
||||
iommu_remove_device(iommu_host_domain(), pci_get_rid(ppt->dev));
|
||||
iommu_add_device(vm_iommu_domain(vm), pci_get_rid(ppt->dev));
|
||||
@ -382,6 +387,12 @@ ppt_unassign_device(struct vm *vm, int bus, int slot, int func)
|
||||
*/
|
||||
if (ppt->vm != vm)
|
||||
return (EBUSY);
|
||||
|
||||
pci_save_state(ppt->dev);
|
||||
pcie_flr(ppt->dev,
|
||||
max(pcie_get_max_completion_timeout(ppt->dev) / 1000, 10),
|
||||
true);
|
||||
pci_restore_state(ppt->dev);
|
||||
ppt_unmap_mmio(vm, ppt);
|
||||
ppt_teardown_msi(ppt);
|
||||
ppt_teardown_msix(ppt);
|
||||
|
@ -5892,3 +5892,165 @@ pci_find_pcie_root_port(device_t dev)
|
||||
dev = pcib;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Wait for pending transactions to complete on a PCI-express function.
|
||||
*
|
||||
* The maximum delay is specified in milliseconds in max_delay. Note
|
||||
* that this function may sleep.
|
||||
*
|
||||
* Returns true if the function is idle and false if the timeout is
|
||||
* exceeded. If dev is not a PCI-express function, this returns true.
|
||||
*/
|
||||
bool
|
||||
pcie_wait_for_pending_transactions(device_t dev, u_int max_delay)
|
||||
{
|
||||
struct pci_devinfo *dinfo = device_get_ivars(dev);
|
||||
uint16_t sta;
|
||||
int cap;
|
||||
|
||||
cap = dinfo->cfg.pcie.pcie_location;
|
||||
if (cap == 0)
|
||||
return (true);
|
||||
|
||||
sta = pci_read_config(dev, cap + PCIER_DEVICE_STA, 2);
|
||||
while (sta & PCIEM_STA_TRANSACTION_PND) {
|
||||
if (max_delay == 0)
|
||||
return (false);
|
||||
|
||||
/* Poll once every 100 milliseconds up to the timeout. */
|
||||
if (max_delay > 100) {
|
||||
pause_sbt("pcietp", 100 * SBT_1MS, 0, C_HARDCLOCK);
|
||||
max_delay -= 100;
|
||||
} else {
|
||||
pause_sbt("pcietp", max_delay * SBT_1MS, 0,
|
||||
C_HARDCLOCK);
|
||||
max_delay = 0;
|
||||
}
|
||||
sta = pci_read_config(dev, cap + PCIER_DEVICE_STA, 2);
|
||||
}
|
||||
|
||||
return (true);
|
||||
}
|
||||
|
||||
/*
|
||||
* Determine the maximum Completion Timeout in microseconds.
|
||||
*
|
||||
* For non-PCI-express functions this returns 0.
|
||||
*/
|
||||
int
|
||||
pcie_get_max_completion_timeout(device_t dev)
|
||||
{
|
||||
struct pci_devinfo *dinfo = device_get_ivars(dev);
|
||||
int cap;
|
||||
|
||||
cap = dinfo->cfg.pcie.pcie_location;
|
||||
if (cap == 0)
|
||||
return (0);
|
||||
|
||||
/*
|
||||
* Functions using the 1.x spec use the default timeout range of
|
||||
* 50 microseconds to 50 milliseconds. Functions that do not
|
||||
* support programmable timeouts also use this range.
|
||||
*/
|
||||
if ((dinfo->cfg.pcie.pcie_flags & PCIEM_FLAGS_VERSION) < 2 ||
|
||||
(pci_read_config(dev, cap + PCIER_DEVICE_CAP2, 4) &
|
||||
PCIEM_CAP2_COMP_TIMO_RANGES) == 0)
|
||||
return (50 * 1000);
|
||||
|
||||
switch (pci_read_config(dev, cap + PCIER_DEVICE_CTL2, 2) &
|
||||
PCIEM_CTL2_COMP_TIMO_VAL) {
|
||||
case PCIEM_CTL2_COMP_TIMO_100US:
|
||||
return (100);
|
||||
case PCIEM_CTL2_COMP_TIMO_10MS:
|
||||
return (10 * 1000);
|
||||
case PCIEM_CTL2_COMP_TIMO_55MS:
|
||||
return (55 * 1000);
|
||||
case PCIEM_CTL2_COMP_TIMO_210MS:
|
||||
return (210 * 1000);
|
||||
case PCIEM_CTL2_COMP_TIMO_900MS:
|
||||
return (900 * 1000);
|
||||
case PCIEM_CTL2_COMP_TIMO_3500MS:
|
||||
return (3500 * 1000);
|
||||
case PCIEM_CTL2_COMP_TIMO_13S:
|
||||
return (13 * 1000 * 1000);
|
||||
case PCIEM_CTL2_COMP_TIMO_64S:
|
||||
return (64 * 1000 * 1000);
|
||||
default:
|
||||
return (50 * 1000);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform a Function Level Reset (FLR) on a device.
|
||||
*
|
||||
* This function first waits for any pending transactions to complete
|
||||
* within the timeout specified by max_delay. If transactions are
|
||||
* still pending, the function will return false without attempting a
|
||||
* reset.
|
||||
*
|
||||
* If dev is not a PCI-express function or does not support FLR, this
|
||||
* function returns false.
|
||||
*
|
||||
* Note that no registers are saved or restored. The caller is
|
||||
* responsible for saving and restoring any registers including
|
||||
* PCI-standard registers via pci_save_state() and
|
||||
* pci_restore_state().
|
||||
*/
|
||||
bool
|
||||
pcie_flr(device_t dev, u_int max_delay, bool force)
|
||||
{
|
||||
struct pci_devinfo *dinfo = device_get_ivars(dev);
|
||||
uint16_t cmd, ctl;
|
||||
int compl_delay;
|
||||
int cap;
|
||||
|
||||
cap = dinfo->cfg.pcie.pcie_location;
|
||||
if (cap == 0)
|
||||
return (false);
|
||||
|
||||
if (!(pci_read_config(dev, cap + PCIER_DEVICE_CAP, 4) & PCIEM_CAP_FLR))
|
||||
return (false);
|
||||
|
||||
/*
|
||||
* Disable busmastering to prevent generation of new
|
||||
* transactions while waiting for the device to go idle. If
|
||||
* the idle timeout fails, the command register is restored
|
||||
* which will re-enable busmastering.
|
||||
*/
|
||||
cmd = pci_read_config(dev, PCIR_COMMAND, 2);
|
||||
pci_write_config(dev, PCIR_COMMAND, cmd & ~(PCIM_CMD_BUSMASTEREN), 2);
|
||||
if (!pcie_wait_for_pending_transactions(dev, max_delay)) {
|
||||
if (!force) {
|
||||
pci_write_config(dev, PCIR_COMMAND, cmd, 2);
|
||||
return (false);
|
||||
}
|
||||
pci_printf(&dinfo->cfg,
|
||||
"Resetting with transactions pending after %d ms\n",
|
||||
max_delay);
|
||||
|
||||
/*
|
||||
* Extend the post-FLR delay to cover the maximum
|
||||
* Completion Timeout delay of anything in flight
|
||||
* during the FLR delay. Enforce a minimum delay of
|
||||
* at least 10ms.
|
||||
*/
|
||||
compl_delay = pcie_get_max_completion_timeout(dev) / 1000;
|
||||
if (compl_delay < 10)
|
||||
compl_delay = 10;
|
||||
} else
|
||||
compl_delay = 0;
|
||||
|
||||
/* Initiate the reset. */
|
||||
ctl = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
|
||||
pci_write_config(dev, cap + PCIER_DEVICE_CTL, ctl |
|
||||
PCIEM_CTL_INITIATE_FLR, 2);
|
||||
|
||||
/* Wait for 100ms. */
|
||||
pause_sbt("pcieflr", (100 + compl_delay) * SBT_1MS, 0, C_HARDCLOCK);
|
||||
|
||||
if (pci_read_config(dev, cap + PCIER_DEVICE_STA, 2) &
|
||||
PCIEM_STA_TRANSACTION_PND)
|
||||
pci_printf(&dinfo->cfg, "Transactions pending after FLR!\n");
|
||||
return (true);
|
||||
}
|
||||
|
@ -885,10 +885,25 @@
|
||||
#define PCIEM_ROOT_STA_PME_STATUS 0x00010000
|
||||
#define PCIEM_ROOT_STA_PME_PEND 0x00020000
|
||||
#define PCIER_DEVICE_CAP2 0x24
|
||||
#define PCIEM_CAP2_ARI 0x20
|
||||
#define PCIEM_CAP2_COMP_TIMO_RANGES 0x0000000f
|
||||
#define PCIEM_CAP2_COMP_TIMO_RANGE_A 0x00000001
|
||||
#define PCIEM_CAP2_COMP_TIMO_RANGE_B 0x00000002
|
||||
#define PCIEM_CAP2_COMP_TIMO_RANGE_C 0x00000004
|
||||
#define PCIEM_CAP2_COMP_TIMO_RANGE_D 0x00000008
|
||||
#define PCIEM_CAP2_COMP_TIMO_DISABLE 0x00000010
|
||||
#define PCIEM_CAP2_ARI 0x00000020
|
||||
#define PCIER_DEVICE_CTL2 0x28
|
||||
#define PCIEM_CTL2_COMP_TIMEOUT_VAL 0x000f
|
||||
#define PCIEM_CTL2_COMP_TIMEOUT_DIS 0x0010
|
||||
#define PCIEM_CTL2_COMP_TIMO_VAL 0x000f
|
||||
#define PCIEM_CTL2_COMP_TIMO_50MS 0x0000
|
||||
#define PCIEM_CTL2_COMP_TIMO_100US 0x0001
|
||||
#define PCIEM_CTL2_COMP_TIMO_10MS 0x0002
|
||||
#define PCIEM_CTL2_COMP_TIMO_55MS 0x0005
|
||||
#define PCIEM_CTL2_COMP_TIMO_210MS 0x0006
|
||||
#define PCIEM_CTL2_COMP_TIMO_900MS 0x0009
|
||||
#define PCIEM_CTL2_COMP_TIMO_3500MS 0x000a
|
||||
#define PCIEM_CTL2_COMP_TIMO_13S 0x000d
|
||||
#define PCIEM_CTL2_COMP_TIMO_64S 0x000e
|
||||
#define PCIEM_CTL2_COMP_TIMO_DISABLE 0x0010
|
||||
#define PCIEM_CTL2_ARI 0x0020
|
||||
#define PCIEM_CTL2_ATOMIC_REQ_ENABLE 0x0040
|
||||
#define PCIEM_CTL2_ATOMIC_EGR_BLOCK 0x0080
|
||||
|
@ -595,7 +595,9 @@ uint32_t pcie_read_config(device_t dev, int reg, int width);
|
||||
void pcie_write_config(device_t dev, int reg, uint32_t value, int width);
|
||||
uint32_t pcie_adjust_config(device_t dev, int reg, uint32_t mask,
|
||||
uint32_t value, int width);
|
||||
|
||||
bool pcie_flr(device_t dev, u_int max_delay, bool force);
|
||||
int pcie_get_max_completion_timeout(device_t dev);
|
||||
bool pcie_wait_for_pending_transactions(device_t dev, u_int max_delay);
|
||||
|
||||
#ifdef BUS_SPACE_MAXADDR
|
||||
#if (BUS_SPACE_MAXADDR > 0xFFFFFFFF)
|
||||
|
Loading…
x
Reference in New Issue
Block a user