Add the MEM_EXTRACT_PADDR ioctl to /dev/mem.
This allows privileged userspace processes to find information about the physical page backing a given mapping. It is useful in applications such as DPDK which perform some of their own memory management. Reviewed by: kib, jhb (previous version) MFC after: 2 weeks Sponsored by: Juniper Networks, Inc. Sponsored by: Klara Inc. Differential Revision: https://reviews.freebsd.org/D26237
This commit is contained in:
parent
662c3e2084
commit
2d838cd867
@ -28,7 +28,7 @@
|
||||
.\" @(#)mem.4 5.3 (Berkeley) 5/2/91
|
||||
.\" $FreeBSD$
|
||||
.\"
|
||||
.Dd October 3, 2004
|
||||
.Dd August 25, 2020
|
||||
.Dt MEM 4
|
||||
.Os
|
||||
.Sh NAME
|
||||
@ -54,11 +54,7 @@ in the same manner as
|
||||
.Pa /dev/mem .
|
||||
Only kernel virtual addresses that are currently mapped to memory are allowed.
|
||||
.Pp
|
||||
On
|
||||
.Tn ISA
|
||||
the
|
||||
.Tn I/O
|
||||
memory space begins at physical address 0x000a0000
|
||||
On ISA the I/O memory space begins at physical address 0x000a0000
|
||||
and runs to 0x00100000.
|
||||
The
|
||||
per-process data
|
||||
@ -69,6 +65,46 @@ is
|
||||
long, and ends at virtual
|
||||
address 0xf0000000.
|
||||
.Sh IOCTL INTERFACE
|
||||
The
|
||||
.Dv MEM_EXTRACT_PADDR
|
||||
ioctl can be used to look up the physical address and NUMA domain of a given
|
||||
virtual address in the calling process' address space.
|
||||
The request is described by
|
||||
.Bd -literal
|
||||
struct mem_extract {
|
||||
uint64_t me_vaddr; /* input */
|
||||
uint64_t me_paddr; /* output */
|
||||
int me_domain; /* output */
|
||||
int me_state; /* output */
|
||||
};
|
||||
.Ed
|
||||
.Pp
|
||||
The ioctl returns an error if the address is not valid.
|
||||
The information returned by
|
||||
.Dv MEM_EXTRACT_PADDR
|
||||
may be out of date by the time that the ioctl call returns.
|
||||
Specifically, concurrent system calls, page faults, or system page reclamation
|
||||
activity may have unmapped the virtual page or replaced the backing physical
|
||||
page before the ioctl call returns.
|
||||
Wired pages, e.g., those locked by
|
||||
.Xr mlock 2 ,
|
||||
will not be reclaimed by the system.
|
||||
.Pp
|
||||
The
|
||||
.Fa me_state
|
||||
field provides information about the state of the virtual page:
|
||||
.Bl -tag -width indent
|
||||
.It Dv ME_STATE_INVALID
|
||||
The virtual address is invalid.
|
||||
.It Dv ME_STATE_VALID
|
||||
The virtual address is valid but is not mapped at the time of the ioctl call.
|
||||
.It Dv ME_STATE_MAPPED
|
||||
The virtual address corresponds to a physical page mapping, and the
|
||||
.Fa me_paddr
|
||||
and
|
||||
.Fa me_domain
|
||||
fields are valid.
|
||||
.Pp
|
||||
Several architectures allow attributes to be associated with ranges of physical
|
||||
memory.
|
||||
These attributes can be manipulated via
|
||||
@ -95,12 +131,13 @@ The region cannot be written to.
|
||||
.El
|
||||
.Pp
|
||||
Memory ranges are described by
|
||||
.Vt struct mem_range_desc :
|
||||
.Bd -literal -offset indent
|
||||
uint64_t mr_base; /\(** physical base address \(**/
|
||||
uint64_t mr_len; /\(** physical length of region \(**/
|
||||
int mr_flags; /\(** attributes of region \(**/
|
||||
char mr_owner[8];
|
||||
.Bd -literal
|
||||
struct mem_range_desc {
|
||||
uint64_t mr_base; /* physical base address */
|
||||
uint64_t mr_len; /* physical length of region */
|
||||
int mr_flags; /* attributes of region */
|
||||
char mr_owner[8];
|
||||
};
|
||||
.Ed
|
||||
.Pp
|
||||
In addition to the region attributes listed above, the following flags
|
||||
@ -126,10 +163,11 @@ altered.
|
||||
.El
|
||||
.Pp
|
||||
Operations are performed using
|
||||
.Fa struct mem_range_op :
|
||||
.Bd -literal -offset indent
|
||||
struct mem_range_desc *mo_desc;
|
||||
int mo_arg[2];
|
||||
.Bd -literal
|
||||
struct mem_range_op {
|
||||
struct mem_range_desc *mo_desc;
|
||||
int mo_arg[2];
|
||||
};
|
||||
.Ed
|
||||
.Pp
|
||||
The
|
||||
@ -165,7 +203,7 @@ to remove a range.
|
||||
.It Bq Er EOPNOTSUPP
|
||||
Memory range operations are not supported on this architecture.
|
||||
.It Bq Er ENXIO
|
||||
No memory range descriptors are available (e.g.\& firmware has not enabled
|
||||
No memory range descriptors are available (e.g., firmware has not enabled
|
||||
any).
|
||||
.It Bq Er EINVAL
|
||||
The memory range supplied as an argument is invalid or overlaps another
|
||||
@ -174,7 +212,7 @@ range in a fashion not supported by this architecture.
|
||||
An attempt to remove or update a range failed because the range is busy.
|
||||
.It Bq Er ENOSPC
|
||||
An attempt to create a new range failed due to a shortage of hardware
|
||||
resources (e.g.\& descriptor slots).
|
||||
resources (e.g., descriptor slots).
|
||||
.It Bq Er ENOENT
|
||||
An attempt to remove a range failed because no range matches the descriptor
|
||||
base/length supplied.
|
||||
|
@ -185,9 +185,8 @@ memmmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr,
|
||||
* This is basically just an ioctl shim for mem_range_attr_get
|
||||
* and mem_range_attr_set.
|
||||
*/
|
||||
/* ARGSUSED */
|
||||
int
|
||||
memioctl(struct cdev *dev __unused, u_long cmd, caddr_t data, int flags,
|
||||
memioctl_md(struct cdev *dev __unused, u_long cmd, caddr_t data, int flags,
|
||||
struct thread *td)
|
||||
{
|
||||
int nd, error = 0;
|
||||
|
@ -36,7 +36,7 @@
|
||||
|
||||
d_open_t memopen;
|
||||
d_read_t memrw;
|
||||
d_ioctl_t memioctl;
|
||||
d_ioctl_t memioctl_md;
|
||||
d_mmap_t memmmap;
|
||||
|
||||
#endif /* _MACHINE_MEMDEV_H_ */
|
||||
|
@ -172,3 +172,10 @@ memmmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr,
|
||||
}
|
||||
return (-1);
|
||||
}
|
||||
|
||||
int
|
||||
memioctl_md(struct cdev *dev __unused, u_long cmd __unused,
|
||||
caddr_t data __unused, int flags __unused, struct thread *td __unused)
|
||||
{
|
||||
return (ENOTTY);
|
||||
}
|
||||
|
@ -37,6 +37,6 @@
|
||||
d_open_t memopen;
|
||||
d_read_t memrw;
|
||||
d_mmap_t memmmap;
|
||||
#define memioctl (d_ioctl_t *)NULL
|
||||
d_ioctl_t memioctl_md;
|
||||
|
||||
#endif /* _MACHINE_MEMDEV_H_ */
|
||||
|
@ -129,3 +129,10 @@ memmmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr,
|
||||
}
|
||||
return (-1);
|
||||
}
|
||||
|
||||
int
|
||||
memioctl_md(struct cdev *dev __unused, u_long cmd __unused,
|
||||
caddr_t data __unused, int flags __unused, struct thread *td __unused)
|
||||
{
|
||||
return (ENOTTY);
|
||||
}
|
||||
|
@ -34,7 +34,7 @@
|
||||
|
||||
d_open_t memopen;
|
||||
d_read_t memrw;
|
||||
#define memioctl (d_ioctl_t *)NULL
|
||||
d_ioctl_t memioctl_md;
|
||||
d_mmap_t memmmap;
|
||||
|
||||
#endif /* _MACHINE_MEMDEV_H_ */
|
||||
|
@ -33,6 +33,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include <sys/param.h>
|
||||
#include <sys/conf.h>
|
||||
#include <sys/fcntl.h>
|
||||
#include <sys/ioccom.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/lock.h>
|
||||
#include <sys/malloc.h>
|
||||
@ -46,12 +47,19 @@ __FBSDID("$FreeBSD$");
|
||||
#include <sys/uio.h>
|
||||
|
||||
#include <vm/vm.h>
|
||||
#include <vm/vm_param.h>
|
||||
#include <vm/pmap.h>
|
||||
#include <vm/vm_map.h>
|
||||
#include <vm/vm_object.h>
|
||||
#include <vm/vm_page.h>
|
||||
#include <vm/vm_phys.h>
|
||||
|
||||
#include <machine/memdev.h>
|
||||
|
||||
static struct cdev *memdev, *kmemdev;
|
||||
|
||||
static d_ioctl_t memioctl;
|
||||
|
||||
static struct cdevsw mem_cdevsw = {
|
||||
.d_version = D_VERSION,
|
||||
.d_flags = D_MEM,
|
||||
@ -82,6 +90,43 @@ memopen(struct cdev *dev __unused, int flags, int fmt __unused,
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
memioctl(struct cdev *dev, u_long cmd, caddr_t data, int flags,
|
||||
struct thread *td)
|
||||
{
|
||||
vm_map_t map;
|
||||
vm_map_entry_t entry;
|
||||
struct mem_extract *me;
|
||||
int error;
|
||||
|
||||
error = 0;
|
||||
switch (cmd) {
|
||||
case MEM_EXTRACT_PADDR:
|
||||
me = (struct mem_extract *)data;
|
||||
|
||||
map = &td->td_proc->p_vmspace->vm_map;
|
||||
vm_map_lock_read(map);
|
||||
if (vm_map_lookup_entry(map, me->me_vaddr, &entry)) {
|
||||
me->me_paddr = pmap_extract(
|
||||
&td->td_proc->p_vmspace->vm_pmap, me->me_vaddr);
|
||||
if (me->me_paddr != 0) {
|
||||
me->me_state = ME_STATE_MAPPED;
|
||||
me->me_domain = _vm_phys_domain(me->me_paddr);
|
||||
} else {
|
||||
me->me_state = ME_STATE_VALID;
|
||||
}
|
||||
} else {
|
||||
me->me_state = ME_STATE_INVALID;
|
||||
}
|
||||
vm_map_unlock_read(map);
|
||||
break;
|
||||
default:
|
||||
error = memioctl_md(dev, cmd, data, flags, td);
|
||||
break;
|
||||
}
|
||||
return (error);
|
||||
}
|
||||
|
||||
/* ARGSUSED */
|
||||
static int
|
||||
mem_modevent(module_t mod __unused, int type, void *data __unused)
|
||||
|
@ -176,9 +176,8 @@ memmmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr,
|
||||
* This is basically just an ioctl shim for mem_range_attr_get
|
||||
* and mem_range_attr_set.
|
||||
*/
|
||||
/* ARGSUSED */
|
||||
int
|
||||
memioctl(struct cdev *dev __unused, u_long cmd, caddr_t data, int flags,
|
||||
memioctl_md(struct cdev *dev __unused, u_long cmd, caddr_t data, int flags,
|
||||
struct thread *td)
|
||||
{
|
||||
int nd, error = 0;
|
||||
|
@ -36,7 +36,7 @@
|
||||
|
||||
d_open_t memopen;
|
||||
d_read_t memrw;
|
||||
d_ioctl_t memioctl;
|
||||
d_ioctl_t memioctl_md;
|
||||
d_mmap_t memmmap;
|
||||
|
||||
#endif /* _MACHINE_MEMDEV_H_ */
|
||||
|
@ -37,7 +37,7 @@
|
||||
|
||||
d_open_t memopen;
|
||||
d_read_t memrw;
|
||||
#define memioctl (d_ioctl_t *)NULL
|
||||
d_ioctl_t memioctl_md;
|
||||
d_mmap_t memmmap;
|
||||
|
||||
#endif /* _MACHINE_MEMDEV_H_ */
|
||||
|
@ -160,3 +160,10 @@ memmmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr,
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
memioctl_md(struct cdev *dev __unused, u_long cmd __unused,
|
||||
caddr_t data __unused, int flags __unused, struct thread *td __unused)
|
||||
{
|
||||
return (ENOTTY);
|
||||
}
|
||||
|
@ -36,7 +36,7 @@
|
||||
|
||||
d_open_t memopen;
|
||||
d_read_t memrw;
|
||||
d_ioctl_t memioctl;
|
||||
d_ioctl_t memioctl_md;
|
||||
d_mmap_t memmmap;
|
||||
|
||||
#endif /* _MACHINE_MEMDEV_H_ */
|
||||
|
@ -278,9 +278,8 @@ ppc_mrset(struct mem_range_softc *sc, struct mem_range_desc *desc, int *arg)
|
||||
* This is basically just an ioctl shim for mem_range_attr_get
|
||||
* and mem_range_attr_set.
|
||||
*/
|
||||
/* ARGSUSED */
|
||||
int
|
||||
memioctl(struct cdev *dev __unused, u_long cmd, caddr_t data, int flags,
|
||||
memioctl_md(struct cdev *dev __unused, u_long cmd, caddr_t data, int flags,
|
||||
struct thread *td)
|
||||
{
|
||||
int nd, error = 0;
|
||||
|
@ -34,7 +34,7 @@
|
||||
|
||||
d_open_t memopen;
|
||||
d_read_t memrw;
|
||||
#define memioctl (d_ioctl_t *)NULL
|
||||
d_ioctl_t memioctl_md;
|
||||
#define memmmap (d_mmap_t *)NULL
|
||||
|
||||
#endif /* _MACHINE_MEMDEV_H_ */
|
||||
|
@ -121,3 +121,10 @@ memrw(struct cdev *dev, struct uio *uio, int flags)
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
memioctl_md(struct cdev *dev __unused, u_long cmd __unused,
|
||||
caddr_t data __unused, int flags __unused, struct thread *td __unused)
|
||||
{
|
||||
return (ENOTTY);
|
||||
}
|
||||
|
@ -45,6 +45,20 @@ struct mem_range_op
|
||||
#define MEMRANGE_GET _IOWR('m', 50, struct mem_range_op)
|
||||
#define MEMRANGE_SET _IOW('m', 51, struct mem_range_op)
|
||||
|
||||
#define ME_STATE_INVALID 0
|
||||
#define ME_STATE_VALID 1
|
||||
#define ME_STATE_MAPPED 2
|
||||
|
||||
struct mem_extract {
|
||||
uint64_t me_vaddr;
|
||||
uint64_t me_paddr;
|
||||
int me_domain;
|
||||
int me_state;
|
||||
uint64_t pad1[5];
|
||||
};
|
||||
|
||||
#define MEM_EXTRACT_PADDR _IOWR('m', 52, struct mem_extract)
|
||||
|
||||
#ifdef _KERNEL
|
||||
|
||||
MALLOC_DECLARE(M_MEMDESC);
|
||||
|
Loading…
Reference in New Issue
Block a user