Add ioctl to conveniently mmap a PCI device BAR into userspace.

Add the ioctl PCIOCBARMMAP on /dev/pci to conveniently create
userspace mapping of a PCI device BAR.  This is enormously superior to
read the BAR value with PCIOCREAD and then try to mmap /dev/mem, and
should allow to automatically activate the mapped BARs when needed in
future.

Current implementation creates new sg pager for each user mmap
request.  If the pointer (and reference) to a managed device pager is
stored in pci_map, we would be able to revoke all mappings on the BAR
deactivation or relocation.  This is related to the unimplemented BAR
activation on mmap, and is postponed for the future.

Discussed with:	imp, jhb
Sponsored by:	The FreeBSD Foundation, Mellanox Technologies
MFC after:	2 weeks
Differential revision:	https://reviews.freebsd.org/D15583
This commit is contained in:
Konstantin Belousov 2018-08-01 18:58:24 +00:00
parent c4c9cd8d68
commit 87842989f8
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=337053
5 changed files with 231 additions and 3 deletions

View File

@ -24,7 +24,7 @@
.\"
.\" $FreeBSD$
.\"
.Dd December 20, 2017
.Dd June 14, 2018
.Dt PCI 4
.Os
.Sh NAME
@ -333,6 +333,72 @@ The limitations on data width described for
reading registers, above, also apply to writing
.Tn PCI
configuration registers.
.It PCIOCBARMMAP
This
.Xr ioctl 2
command allows userspace processes to
.Xr mmap 2
the memory-mapped PCI BAR into its address space.
The input parameters and results are passed in the
.Va pci_bar_mmap
structure, which has the following fields:
.Bl -tag -width Vt struct pcise pbm_sel
.It Vt uint64_t pbm_map_base
Reports the established mapping base to the caller.
If
.Va PCIIO_BAR_MMAP_FIXED
flag was specified, then this field must be filled before the call
with the desired address for the mapping.
.It Vt uint64_t pbm_map_length
Reports the mapped length of the BAR, in bytes.
Its .Vt uint64_t value is always multiple of machine pages.
.It Vt int64_t pbm_bar_length
Reports length of the bar as exposed by the device.
.It Vt int pbm_bar_off
Reports offset from the mapped base to the start of the
first register in the bar.
.It Vt struct pcisel pbm_sel
Should be filled before the call.
Describes the device to operate on.
.It Vt int pbm_reg
The BAR index to mmap.
.It Vt int pbm_flags
Flags which augments the operation.
See below.
.It Vt int pbm_memattr
The caching attribute for the mapping.
Typical values are
.Dv VM_MEMATTR_UNCACHEABLE
for control registers BARs, and
.Dv VM_MEMATTR_WRITE_COMBINING
for frame buffers.
Regular memory-like BAR should be mapped with
.Dv VM_MEMATTR_DEFAULT
attribute.
.El
.Pp
Currently defined flags are:
.Bl -tag -width PCIIO_BAR_MMAP_ACTIVATE
.It PCIIO_BAR_MMAP_FIXED
The resulted mappings should be established at the address
specified by the
.Va pbm_map_base
member, otherwise fail.
.It PCIIO_BAR_MMAP_EXCL
Must be used together with
.Vd PCIIO_BAR_MMAP_FIXED
If the specified base contains already established mappings, the
operation fails instead of implicitly unmapping them.
.It PCIIO_BAR_MMAP_RW
The requested mapping allows both reading and writing.
Without the flag, read-only mapping is established.
Note that it is common for the device registers to have side-effects
even on reads.
.It PCIIO_BAR_MMAP_ACTIVATE
(Unimplemented) If the BAR is not activated, activate it in the course
of mapping.
Currently attempt to mmap an inactive BAR results in error.
.El
.El
.Sh LOADER TUNABLES
Tunables can be set at the

View File

@ -53,6 +53,7 @@ __FBSDID("$FreeBSD$");
#include <compat/freebsd32/freebsd32.h>
#include <compat/freebsd32/freebsd32_ioctl.h>
#include <compat/freebsd32/freebsd32_misc.h>
#include <compat/freebsd32/freebsd32_proto.h>
CTASSERT(sizeof(struct ioc_read_toc_entry32) == 8);
@ -248,6 +249,40 @@ freebsd32_ioctl_pciocgetconf(struct thread *td,
return (error);
}
static int
freebsd32_ioctl_barmmap(struct thread *td,
struct freebsd32_ioctl_args *uap, struct file *fp)
{
struct pci_bar_mmap32 pbm32;
struct pci_bar_mmap pbm;
int error;
error = copyin(uap->data, &pbm32, sizeof(pbm32));
if (error != 0)
return (error);
PTRIN_CP(pbm32, pbm, pbm_map_base);
CP(pbm32, pbm, pbm_sel);
CP(pbm32, pbm, pbm_reg);
CP(pbm32, pbm, pbm_flags);
CP(pbm32, pbm, pbm_memattr);
pbm.pbm_bar_length = PAIR32TO64(uint64_t, pbm32.pbm_bar_length);
error = fo_ioctl(fp, PCIOCBARMMAP, (caddr_t)&pbm, td->td_ucred, td);
if (error == 0) {
PTROUT_CP(pbm, pbm32, pbm_map_base);
CP(pbm, pbm32, pbm_map_length);
#if BYTE_ORDER == LITTLE_ENDIAN
pbm32.pbm_bar_length1 = pbm.pbm_bar_length;
pbm32.pbm_bar_length2 = pbm.pbm_bar_length >> 32;
#else
pbm32.pbm_bar_length1 = pbm.pbm_bar_length >> 32;
pbm32.pbm_bar_length2 = pbm.pbm_bar_length;
#endif
CP(pbm, pbm32, pbm_bar_off);
error = copyout(&pbm32, uap->data, sizeof(pbm32));
}
return (error);
}
static int
freebsd32_ioctl_sg(struct thread *td,
struct freebsd32_ioctl_args *uap, struct file *fp)
@ -355,6 +390,10 @@ freebsd32_ioctl(struct thread *td, struct freebsd32_ioctl_args *uap)
error = freebsd32_ioctl_sg(td, uap, fp);
break;
case PCIOCBARMMAP_32:
error = freebsd32_ioctl_barmmap(td, uap, fp);
break;
default:
fdrop(fp, td);
ap.fd = uap->fd;
@ -364,5 +403,5 @@ freebsd32_ioctl(struct thread *td, struct freebsd32_ioctl_args *uap)
}
fdrop(fp, td);
return error;
return (error);
}

View File

@ -95,11 +95,23 @@ struct pci_conf_io32 {
u_int32_t status; /* request status */
};
struct pci_bar_mmap32 {
uint32_t pbm_map_base;
uint32_t pbm_map_length;
uint32_t pbm_bar_length1, pbm_bar_length2;
int pbm_bar_off;
struct pcisel pbm_sel;
int pbm_reg;
int pbm_flags;
int pbm_memattr;
};
#define CDIOREADTOCENTRYS_32 _IOWR('c', 5, struct ioc_read_toc_entry32)
#define FIODGNAME_32 _IOW('f', 120, struct fiodgname_arg32)
#define MEMRANGE_GET32 _IOWR('m', 50, struct mem_range_op32)
#define MEMRANGE_SET32 _IOW('m', 51, struct mem_range_op32)
#define PCIOCGETCONF_32 _IOWR('p', 5, struct pci_conf_io32)
#define SG_IO_32 _IOWR(SGIOC, 0x85, struct sg_io_hdr32)
#define PCIOCBARMMAP_32 _IOWR('p', 8, struct pci_bar_mmap32)
#endif /* _COMPAT_FREEBSD32_IOCTL_H_ */

View File

@ -31,6 +31,7 @@ __FBSDID("$FreeBSD$");
#include "opt_bus.h" /* XXX trim includes */
#include <sys/types.h>
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/malloc.h>
@ -39,13 +40,19 @@ __FBSDID("$FreeBSD$");
#include <sys/fcntl.h>
#include <sys/conf.h>
#include <sys/kernel.h>
#include <sys/mman.h>
#include <sys/proc.h>
#include <sys/queue.h>
#include <sys/types.h>
#include <sys/rwlock.h>
#include <sys/sglist.h>
#include <vm/vm.h>
#include <vm/pmap.h>
#include <vm/vm_extern.h>
#include <vm/vm_map.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_pager.h>
#include <sys/bus.h>
#include <machine/bus.h>
@ -696,6 +703,77 @@ pci_conf_for_copyout(const struct pci_conf *pcp, union pci_conf_union *pcup,
}
}
static int
pci_bar_mmap(device_t pcidev, struct pci_bar_mmap *pbm)
{
vm_map_t map;
vm_object_t obj;
struct thread *td;
struct sglist *sg;
struct pci_map *pm;
vm_paddr_t pbase;
vm_size_t plen;
vm_offset_t addr;
vm_prot_t prot;
int error, flags;
td = curthread;
map = &td->td_proc->p_vmspace->vm_map;
if ((pbm->pbm_flags & ~(PCIIO_BAR_MMAP_FIXED | PCIIO_BAR_MMAP_EXCL |
PCIIO_BAR_MMAP_RW | PCIIO_BAR_MMAP_ACTIVATE)) != 0 ||
pbm->pbm_memattr != (vm_memattr_t)pbm->pbm_memattr ||
!pmap_is_valid_memattr(map->pmap, pbm->pbm_memattr))
return (EINVAL);
/* Fetch the BAR physical base and length. */
pm = pci_find_bar(pcidev, pbm->pbm_reg);
if (pm == NULL)
return (EINVAL);
if (!pci_bar_enabled(pcidev, pm))
return (EBUSY); /* XXXKIB enable if _ACTIVATE */
if (!PCI_BAR_MEM(pm->pm_value))
return (EIO);
pbase = trunc_page(pm->pm_value);
plen = round_page(pm->pm_value + ((pci_addr_t)1 << pm->pm_size)) -
pbase;
prot = VM_PROT_READ | (((pbm->pbm_flags & PCIIO_BAR_MMAP_RW) != 0) ?
VM_PROT_WRITE : 0);
/* Create vm structures and mmap. */
sg = sglist_alloc(1, M_WAITOK);
error = sglist_append_phys(sg, pbase, plen);
if (error != 0)
goto out;
obj = vm_pager_allocate(OBJT_SG, sg, plen, prot, 0, td->td_ucred);
if (obj == NULL) {
error = EIO;
goto out;
}
obj->memattr = pbm->pbm_memattr;
flags = MAP_SHARED;
addr = 0;
if ((pbm->pbm_flags & PCIIO_BAR_MMAP_FIXED) != 0) {
addr = (uintptr_t)pbm->pbm_map_base;
flags |= MAP_FIXED;
}
if ((pbm->pbm_flags & PCIIO_BAR_MMAP_EXCL) != 0)
flags |= MAP_CHECK_EXCL;
error = vm_mmap_object(map, &addr, plen, prot, prot, flags, obj, 0,
FALSE, td);
if (error != 0) {
vm_object_deallocate(obj);
goto out;
}
pbm->pbm_map_base = (void *)addr;
pbm->pbm_map_length = plen;
pbm->pbm_bar_off = pm->pm_value - pbase;
pbm->pbm_bar_length = (pci_addr_t)1 << pm->pm_size;
out:
sglist_free(sg);
return (error);
}
static int
pci_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td)
{
@ -709,6 +787,7 @@ pci_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *t
struct pci_list_vpd_io *lvio;
struct pci_match_conf *pattern_buf;
struct pci_map *pm;
struct pci_bar_mmap *pbm;
size_t confsz, iolen;
int error, ionum, i, num_patterns;
union pci_conf_union pcu;
@ -730,6 +809,7 @@ pci_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *t
#endif
case PCIOCGETBAR:
case PCIOCLISTVPD:
case PCIOCBARMMAP:
break;
default:
return (EPERM);
@ -1053,6 +1133,18 @@ pci_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *t
}
error = pci_list_vpd(pcidev, lvio);
break;
case PCIOCBARMMAP:
pbm = (struct pci_bar_mmap *)data;
if ((flag & FWRITE) == 0 &&
(pbm->pbm_flags & PCIIO_BAR_MMAP_RW) != 0)
return (EPERM);
pcidev = pci_find_dbsf(pbm->pbm_sel.pc_domain,
pbm->pbm_sel.pc_bus, pbm->pbm_sel.pc_dev,
pbm->pbm_sel.pc_func);
error = pcidev == NULL ? ENODEV : pci_bar_mmap(pcidev, pbm);
break;
default:
error = ENOTTY;
break;

View File

@ -138,11 +138,30 @@ struct pci_list_vpd_io {
struct pci_vpd_element *plvi_data;
};
struct pci_bar_mmap {
void *pbm_map_base; /* (sometimes IN)/OUT mmaped base */
size_t pbm_map_length; /* mapped length of the BAR, multiple
of pages */
uint64_t pbm_bar_length; /* actual length of the BAR */
int pbm_bar_off; /* offset from the mapped base to the
start of BAR */
struct pcisel pbm_sel; /* device to operate on */
int pbm_reg; /* starting address of BAR */
int pbm_flags;
int pbm_memattr;
};
#define PCIIO_BAR_MMAP_FIXED 0x01
#define PCIIO_BAR_MMAP_EXCL 0x02
#define PCIIO_BAR_MMAP_RW 0x04
#define PCIIO_BAR_MMAP_ACTIVATE 0x08
#define PCIOCGETCONF _IOWR('p', 5, struct pci_conf_io)
#define PCIOCREAD _IOWR('p', 2, struct pci_io)
#define PCIOCWRITE _IOWR('p', 3, struct pci_io)
#define PCIOCATTACHED _IOWR('p', 4, struct pci_io)
#define PCIOCGETBAR _IOWR('p', 6, struct pci_bar_io)
#define PCIOCLISTVPD _IOWR('p', 7, struct pci_list_vpd_io)
#define PCIOCBARMMAP _IOWR('p', 8, struct pci_bar_mmap)
#endif /* !_SYS_PCIIO_H_ */