igb_uio: fix interrupt enablement after FLR in VM

If pass-through a VF by vfio-pci to a Qemu VM, after FLR
in VM, the interrupt setting is not recoverd correctly
to host as below:
 in VM guest:
        Capabilities: [70] MSI-X: Enable+ Count=5 Masked-
 in Host:
        Capabilities: [70] MSI-X: Enable+ Count=5 Masked-

That was because in pci_reset_function, it first reads the
PCI configure and set FLR reset, and then writes PCI configure
as restoration. But not all the writing are successful to Host.
Because vfio-pci driver doesn't allow directly write PCI MSI-X
Cap.

To fix this issue, we need to move the interrupt enablement from
igb_uio probe to open device file. While it is also the similar as
the behaviour in vfio_pci kernel module code.

Fixes: b58eedfc7dd5 ("igb_uio: issue FLR during open and release of device file")
Cc: stable@dpdk.org

Signed-off-by: Jingjing Wu <jingjing.wu@intel.com>
Signed-off-by: Jianfeng Tan <jianfeng.tan@intel.com>
Tested-by: Shijith Thotton <shijith.thotton@caviumnetworks.com>
Acked-by: Ferruh Yigit <ferruh.yigit@intel.com>
This commit is contained in:
Jingjing Wu 2017-10-10 06:09:20 +08:00 committed by Ferruh Yigit
parent 8cacf78469
commit 5f6ff30dc5

View File

@ -49,7 +49,6 @@ struct rte_uio_pci_dev {
static char *intr_mode; static char *intr_mode;
static enum rte_intr_mode igbuio_intr_mode_preferred = RTE_INTR_MODE_MSIX; static enum rte_intr_mode igbuio_intr_mode_preferred = RTE_INTR_MODE_MSIX;
/* sriov sysfs */ /* sriov sysfs */
static ssize_t static ssize_t
show_max_vfs(struct device *dev, struct device_attribute *attr, show_max_vfs(struct device *dev, struct device_attribute *attr,
@ -208,8 +207,10 @@ igbuio_pci_irqcontrol(struct uio_info *info, s32 irq_state)
* If yes, disable it here and will be enable later. * If yes, disable it here and will be enable later.
*/ */
static irqreturn_t static irqreturn_t
igbuio_pci_irqhandler(int irq, struct uio_info *info) igbuio_pci_irqhandler(int irq, void *dev_id)
{ {
struct uio_device *idev = (struct uio_device *)dev_id;
struct uio_info *info = idev->info;
struct rte_uio_pci_dev *udev = info->priv; struct rte_uio_pci_dev *udev = info->priv;
/* Legacy mode need to mask in hardware */ /* Legacy mode need to mask in hardware */
@ -217,10 +218,115 @@ igbuio_pci_irqhandler(int irq, struct uio_info *info)
!pci_check_and_mask_intx(udev->pdev)) !pci_check_and_mask_intx(udev->pdev))
return IRQ_NONE; return IRQ_NONE;
uio_event_notify(info);
/* Message signal mode, no share IRQ and automasked */ /* Message signal mode, no share IRQ and automasked */
return IRQ_HANDLED; return IRQ_HANDLED;
} }
static int
igbuio_pci_enable_interrupts(struct rte_uio_pci_dev *udev)
{
int err = 0;
#ifndef HAVE_ALLOC_IRQ_VECTORS
struct msix_entry msix_entry;
#endif
switch (igbuio_intr_mode_preferred) {
case RTE_INTR_MODE_MSIX:
/* Only 1 msi-x vector needed */
#ifndef HAVE_ALLOC_IRQ_VECTORS
msix_entry.entry = 0;
if (pci_enable_msix(udev->pdev, &msix_entry, 1) == 0) {
dev_dbg(&udev->pdev->dev, "using MSI-X");
udev->info.irq_flags = IRQF_NO_THREAD;
udev->info.irq = msix_entry.vector;
udev->mode = RTE_INTR_MODE_MSIX;
break;
}
#else
if (pci_alloc_irq_vectors(udev->pdev, 1, 1, PCI_IRQ_MSIX) == 1) {
dev_dbg(&udev->pdev->dev, "using MSI-X");
udev->info.irq_flags = IRQF_NO_THREAD;
udev->info.irq = pci_irq_vector(udev->pdev, 0);
udev->mode = RTE_INTR_MODE_MSIX;
break;
}
#endif
/* fall back to MSI */
case RTE_INTR_MODE_MSI:
#ifndef HAVE_ALLOC_IRQ_VECTORS
if (pci_enable_msi(udev->pdev) == 0) {
dev_dbg(&udev->pdev->dev, "using MSI");
udev->info.irq_flags = IRQF_NO_THREAD;
udev->info.irq = udev->pdev->irq;
udev->mode = RTE_INTR_MODE_MSI;
break;
}
#else
if (pci_alloc_irq_vectors(udev->pdev, 1, 1, PCI_IRQ_MSI) == 1) {
dev_dbg(&udev->pdev->dev, "using MSI");
udev->info.irq_flags = IRQF_NO_THREAD;
udev->info.irq = pci_irq_vector(udev->pdev, 0);
udev->mode = RTE_INTR_MODE_MSI;
break;
}
#endif
/* fall back to INTX */
case RTE_INTR_MODE_LEGACY:
if (pci_intx_mask_supported(udev->pdev)) {
dev_dbg(&udev->pdev->dev, "using INTX");
udev->info.irq_flags = IRQF_SHARED | IRQF_NO_THREAD;
udev->info.irq = udev->pdev->irq;
udev->mode = RTE_INTR_MODE_LEGACY;
break;
}
dev_notice(&udev->pdev->dev, "PCI INTX mask not supported\n");
/* fall back to no IRQ */
case RTE_INTR_MODE_NONE:
udev->mode = RTE_INTR_MODE_NONE;
udev->info.irq = UIO_IRQ_NONE;
break;
default:
dev_err(&udev->pdev->dev, "invalid IRQ mode %u",
igbuio_intr_mode_preferred);
udev->info.irq = UIO_IRQ_NONE;
err = -EINVAL;
}
if (udev->info.irq != UIO_IRQ_NONE)
err = request_irq(udev->info.irq, igbuio_pci_irqhandler,
udev->info.irq_flags, udev->info.name,
udev->info.uio_dev);
dev_info(&udev->pdev->dev, "uio device registered with irq %lx\n",
udev->info.irq);
return err;
}
static void
igbuio_pci_disable_interrupts(struct rte_uio_pci_dev *udev)
{
if (udev->info.irq) {
free_irq(udev->info.irq, udev->info.uio_dev);
udev->info.irq = 0;
}
#ifndef HAVE_ALLOC_IRQ_VECTORS
if (udev->mode == RTE_INTR_MODE_MSIX)
pci_disable_msix(udev->pdev);
if (udev->mode == RTE_INTR_MODE_MSI)
pci_disable_msi(udev->pdev);
#else
if (udev->mode == RTE_INTR_MODE_MSIX ||
udev->mode == RTE_INTR_MODE_MSI)
pci_free_irq_vectors(udev->pdev);
#endif
}
/** /**
* This gets called while opening uio device file. * This gets called while opening uio device file.
*/ */
@ -229,12 +335,19 @@ igbuio_pci_open(struct uio_info *info, struct inode *inode)
{ {
struct rte_uio_pci_dev *udev = info->priv; struct rte_uio_pci_dev *udev = info->priv;
struct pci_dev *dev = udev->pdev; struct pci_dev *dev = udev->pdev;
int err;
pci_reset_function(dev); pci_reset_function(dev);
/* set bus master, which was cleared by the reset function */ /* set bus master, which was cleared by the reset function */
pci_set_master(dev); pci_set_master(dev);
/* enable interrupts */
err = igbuio_pci_enable_interrupts(udev);
if (err) {
dev_err(&dev->dev, "Enable interrupt fails\n");
return err;
}
return 0; return 0;
} }
@ -244,6 +357,9 @@ igbuio_pci_release(struct uio_info *info, struct inode *inode)
struct rte_uio_pci_dev *udev = info->priv; struct rte_uio_pci_dev *udev = info->priv;
struct pci_dev *dev = udev->pdev; struct pci_dev *dev = udev->pdev;
/* disable interrupts */
igbuio_pci_disable_interrupts(udev);
/* stop the device from further DMA */ /* stop the device from further DMA */
pci_clear_master(dev); pci_clear_master(dev);
@ -313,94 +429,6 @@ igbuio_pci_release_iomem(struct uio_info *info)
} }
} }
static int
igbuio_pci_enable_interrupts(struct rte_uio_pci_dev *udev)
{
int err = 0;
#ifndef HAVE_ALLOC_IRQ_VECTORS
struct msix_entry msix_entry;
#endif
switch (igbuio_intr_mode_preferred) {
case RTE_INTR_MODE_MSIX:
/* Only 1 msi-x vector needed */
#ifndef HAVE_ALLOC_IRQ_VECTORS
msix_entry.entry = 0;
if (pci_enable_msix(udev->pdev, &msix_entry, 1) == 0) {
dev_dbg(&udev->pdev->dev, "using MSI-X");
udev->info.irq_flags = IRQF_NO_THREAD;
udev->info.irq = msix_entry.vector;
udev->mode = RTE_INTR_MODE_MSIX;
break;
}
#else
if (pci_alloc_irq_vectors(udev->pdev, 1, 1, PCI_IRQ_MSIX) == 1) {
dev_dbg(&udev->pdev->dev, "using MSI-X");
udev->info.irq_flags = IRQF_NO_THREAD;
udev->info.irq = pci_irq_vector(udev->pdev, 0);
udev->mode = RTE_INTR_MODE_MSIX;
break;
}
#endif
/* fall back to MSI */
case RTE_INTR_MODE_MSI:
#ifndef HAVE_ALLOC_IRQ_VECTORS
if (pci_enable_msi(udev->pdev) == 0) {
dev_dbg(&udev->pdev->dev, "using MSI");
udev->info.irq_flags = IRQF_NO_THREAD;
udev->info.irq = udev->pdev->irq;
udev->mode = RTE_INTR_MODE_MSI;
break;
}
#else
if (pci_alloc_irq_vectors(udev->pdev, 1, 1, PCI_IRQ_MSI) == 1) {
dev_dbg(&udev->pdev->dev, "using MSI");
udev->info.irq_flags = IRQF_NO_THREAD;
udev->info.irq = pci_irq_vector(udev->pdev, 0);
udev->mode = RTE_INTR_MODE_MSI;
break;
}
#endif
/* fall back to INTX */
case RTE_INTR_MODE_LEGACY:
if (pci_intx_mask_supported(udev->pdev)) {
dev_dbg(&udev->pdev->dev, "using INTX");
udev->info.irq_flags = IRQF_SHARED | IRQF_NO_THREAD;
udev->info.irq = udev->pdev->irq;
udev->mode = RTE_INTR_MODE_LEGACY;
break;
}
dev_notice(&udev->pdev->dev, "PCI INTX mask not supported\n");
/* fall back to no IRQ */
case RTE_INTR_MODE_NONE:
udev->mode = RTE_INTR_MODE_NONE;
udev->info.irq = UIO_IRQ_NONE;
break;
default:
dev_err(&udev->pdev->dev, "invalid IRQ mode %u",
igbuio_intr_mode_preferred);
err = -EINVAL;
}
return err;
}
static void
igbuio_pci_disable_interrupts(struct rte_uio_pci_dev *udev)
{
#ifndef HAVE_ALLOC_IRQ_VECTORS
if (udev->mode == RTE_INTR_MODE_MSIX)
pci_disable_msix(udev->pdev);
if (udev->mode == RTE_INTR_MODE_MSI)
pci_disable_msi(udev->pdev);
#else
if (udev->mode == RTE_INTR_MODE_MSIX ||
udev->mode == RTE_INTR_MODE_MSI)
pci_free_irq_vectors(udev->pdev);
#endif
}
static int static int
igbuio_setup_bars(struct pci_dev *dev, struct uio_info *info) igbuio_setup_bars(struct pci_dev *dev, struct uio_info *info)
{ {
@ -491,20 +519,15 @@ igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
/* fill uio infos */ /* fill uio infos */
udev->info.name = "igb_uio"; udev->info.name = "igb_uio";
udev->info.version = "0.1"; udev->info.version = "0.1";
udev->info.handler = igbuio_pci_irqhandler;
udev->info.irqcontrol = igbuio_pci_irqcontrol; udev->info.irqcontrol = igbuio_pci_irqcontrol;
udev->info.open = igbuio_pci_open; udev->info.open = igbuio_pci_open;
udev->info.release = igbuio_pci_release; udev->info.release = igbuio_pci_release;
udev->info.priv = udev; udev->info.priv = udev;
udev->pdev = dev; udev->pdev = dev;
err = igbuio_pci_enable_interrupts(udev);
if (err != 0)
goto fail_release_iomem;
err = sysfs_create_group(&dev->dev.kobj, &dev_attr_grp); err = sysfs_create_group(&dev->dev.kobj, &dev_attr_grp);
if (err != 0) if (err != 0)
goto fail_disable_interrupts; goto fail_release_iomem;
/* register uio driver */ /* register uio driver */
err = uio_register_device(&dev->dev, &udev->info); err = uio_register_device(&dev->dev, &udev->info);
@ -513,9 +536,6 @@ igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
pci_set_drvdata(dev, udev); pci_set_drvdata(dev, udev);
dev_info(&dev->dev, "uio device registered with irq %lx\n",
udev->info.irq);
/* /*
* Doing a harmless dma mapping for attaching the device to * Doing a harmless dma mapping for attaching the device to
* the iommu identity mapping if kernel boots with iommu=pt. * the iommu identity mapping if kernel boots with iommu=pt.
@ -541,8 +561,6 @@ igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
fail_remove_group: fail_remove_group:
sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp); sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp);
fail_disable_interrupts:
igbuio_pci_disable_interrupts(udev);
fail_release_iomem: fail_release_iomem:
igbuio_pci_release_iomem(&udev->info); igbuio_pci_release_iomem(&udev->info);
pci_disable_device(dev); pci_disable_device(dev);
@ -559,7 +577,6 @@ igbuio_pci_remove(struct pci_dev *dev)
sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp); sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp);
uio_unregister_device(&udev->info); uio_unregister_device(&udev->info);
igbuio_pci_disable_interrupts(udev);
igbuio_pci_release_iomem(&udev->info); igbuio_pci_release_iomem(&udev->info);
pci_disable_device(dev); pci_disable_device(dev);
pci_set_drvdata(dev, NULL); pci_set_drvdata(dev, NULL);