e7236a7ddf
Sponsored by: Rubicon Communications, LLC ("Netgate")
614 lines
14 KiB
C
614 lines
14 KiB
C
/*
|
|
* Copyright (c) 2014 Roger Pau Monné <roger.pau@citrix.com>
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*/
|
|
|
|
#include <sys/cdefs.h>
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
#include <sys/param.h>
|
|
#include <sys/systm.h>
|
|
#include <sys/uio.h>
|
|
#include <sys/bus.h>
|
|
#include <sys/malloc.h>
|
|
#include <sys/kernel.h>
|
|
#include <sys/lock.h>
|
|
#include <sys/mutex.h>
|
|
#include <sys/rwlock.h>
|
|
#include <sys/selinfo.h>
|
|
#include <sys/poll.h>
|
|
#include <sys/conf.h>
|
|
#include <sys/fcntl.h>
|
|
#include <sys/ioccom.h>
|
|
#include <sys/rman.h>
|
|
#include <sys/tree.h>
|
|
#include <sys/module.h>
|
|
#include <sys/proc.h>
|
|
#include <sys/bitset.h>
|
|
|
|
#include <vm/vm.h>
|
|
#include <vm/vm_param.h>
|
|
#include <vm/vm_extern.h>
|
|
#include <vm/vm_kern.h>
|
|
#include <vm/vm_page.h>
|
|
#include <vm/vm_map.h>
|
|
#include <vm/vm_object.h>
|
|
#include <vm/vm_pager.h>
|
|
|
|
#include <machine/md_var.h>
|
|
|
|
#include <xen/xen-os.h>
|
|
#include <xen/hypervisor.h>
|
|
#include <xen/privcmd.h>
|
|
#include <xen/error.h>
|
|
|
|
MALLOC_DEFINE(M_PRIVCMD, "privcmd_dev", "Xen privcmd user-space device");
|
|
|
|
#define MAX_DMOP_BUFFERS 16
|
|
|
|
struct privcmd_map {
|
|
vm_object_t mem;
|
|
vm_size_t size;
|
|
struct resource *pseudo_phys_res;
|
|
int pseudo_phys_res_id;
|
|
vm_paddr_t phys_base_addr;
|
|
boolean_t mapped;
|
|
BITSET_DEFINE_VAR() *err;
|
|
};
|
|
|
|
static d_ioctl_t privcmd_ioctl;
|
|
static d_open_t privcmd_open;
|
|
static d_mmap_single_t privcmd_mmap_single;
|
|
|
|
static struct cdevsw privcmd_devsw = {
|
|
.d_version = D_VERSION,
|
|
.d_ioctl = privcmd_ioctl,
|
|
.d_mmap_single = privcmd_mmap_single,
|
|
.d_open = privcmd_open,
|
|
.d_name = "privcmd",
|
|
};
|
|
|
|
static int privcmd_pg_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
|
|
vm_ooffset_t foff, struct ucred *cred, u_short *color);
|
|
static void privcmd_pg_dtor(void *handle);
|
|
static int privcmd_pg_fault(vm_object_t object, vm_ooffset_t offset,
|
|
int prot, vm_page_t *mres);
|
|
|
|
static struct cdev_pager_ops privcmd_pg_ops = {
|
|
.cdev_pg_fault = privcmd_pg_fault,
|
|
.cdev_pg_ctor = privcmd_pg_ctor,
|
|
.cdev_pg_dtor = privcmd_pg_dtor,
|
|
};
|
|
|
|
struct per_user_data {
|
|
domid_t dom;
|
|
};
|
|
|
|
static device_t privcmd_dev = NULL;
|
|
|
|
/*------------------------- Privcmd Pager functions --------------------------*/
|
|
static int
|
|
privcmd_pg_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
|
|
vm_ooffset_t foff, struct ucred *cred, u_short *color)
|
|
{
|
|
|
|
return (0);
|
|
}
|
|
|
|
static void
|
|
privcmd_pg_dtor(void *handle)
|
|
{
|
|
struct xen_remove_from_physmap rm = { .domid = DOMID_SELF };
|
|
struct privcmd_map *map = handle;
|
|
int error __diagused;
|
|
vm_size_t i;
|
|
vm_page_t m;
|
|
|
|
/*
|
|
* Remove the mappings from the used pages. This will remove the
|
|
* underlying p2m bindings in Xen second stage translation.
|
|
*/
|
|
if (map->mapped == true) {
|
|
VM_OBJECT_WLOCK(map->mem);
|
|
retry:
|
|
for (i = 0; i < map->size; i++) {
|
|
m = vm_page_lookup(map->mem, i);
|
|
if (m == NULL)
|
|
continue;
|
|
if (vm_page_busy_acquire(m, VM_ALLOC_WAITFAIL) == 0)
|
|
goto retry;
|
|
cdev_pager_free_page(map->mem, m);
|
|
}
|
|
VM_OBJECT_WUNLOCK(map->mem);
|
|
|
|
for (i = 0; i < map->size; i++) {
|
|
rm.gpfn = atop(map->phys_base_addr) + i;
|
|
HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &rm);
|
|
}
|
|
free(map->err, M_PRIVCMD);
|
|
}
|
|
|
|
error = xenmem_free(privcmd_dev, map->pseudo_phys_res_id,
|
|
map->pseudo_phys_res);
|
|
KASSERT(error == 0, ("Unable to release memory resource: %d", error));
|
|
|
|
free(map, M_PRIVCMD);
|
|
}
|
|
|
|
static int
|
|
privcmd_pg_fault(vm_object_t object, vm_ooffset_t offset,
|
|
int prot, vm_page_t *mres)
|
|
{
|
|
struct privcmd_map *map = object->handle;
|
|
vm_pindex_t pidx;
|
|
vm_page_t page;
|
|
|
|
if (map->mapped != true)
|
|
return (VM_PAGER_FAIL);
|
|
|
|
pidx = OFF_TO_IDX(offset);
|
|
if (pidx >= map->size || BIT_ISSET(map->size, pidx, map->err))
|
|
return (VM_PAGER_FAIL);
|
|
|
|
page = PHYS_TO_VM_PAGE(map->phys_base_addr + offset);
|
|
if (page == NULL)
|
|
return (VM_PAGER_FAIL);
|
|
|
|
KASSERT((page->flags & PG_FICTITIOUS) != 0,
|
|
("not fictitious %p", page));
|
|
KASSERT(vm_page_wired(page), ("page %p not wired", page));
|
|
KASSERT(!vm_page_busied(page), ("page %p is busy", page));
|
|
|
|
vm_page_busy_acquire(page, 0);
|
|
vm_page_valid(page);
|
|
|
|
if (*mres != NULL)
|
|
vm_page_replace(page, object, pidx, *mres);
|
|
else
|
|
vm_page_insert(page, object, pidx);
|
|
*mres = page;
|
|
return (VM_PAGER_OK);
|
|
}
|
|
|
|
/*----------------------- Privcmd char device methods ------------------------*/
|
|
static int
|
|
privcmd_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size,
|
|
vm_object_t *object, int nprot)
|
|
{
|
|
struct privcmd_map *map;
|
|
|
|
map = malloc(sizeof(*map), M_PRIVCMD, M_WAITOK | M_ZERO);
|
|
|
|
map->size = OFF_TO_IDX(size);
|
|
map->pseudo_phys_res_id = 0;
|
|
|
|
map->pseudo_phys_res = xenmem_alloc(privcmd_dev,
|
|
&map->pseudo_phys_res_id, size);
|
|
if (map->pseudo_phys_res == NULL) {
|
|
free(map, M_PRIVCMD);
|
|
return (ENOMEM);
|
|
}
|
|
|
|
map->phys_base_addr = rman_get_start(map->pseudo_phys_res);
|
|
map->mem = cdev_pager_allocate(map, OBJT_MGTDEVICE, &privcmd_pg_ops,
|
|
size, nprot, *offset, NULL);
|
|
if (map->mem == NULL) {
|
|
xenmem_free(privcmd_dev, map->pseudo_phys_res_id,
|
|
map->pseudo_phys_res);
|
|
free(map, M_PRIVCMD);
|
|
return (ENOMEM);
|
|
}
|
|
|
|
*object = map->mem;
|
|
|
|
return (0);
|
|
}
|
|
|
|
static struct privcmd_map *
|
|
setup_virtual_area(struct thread *td, unsigned long addr, unsigned long num)
|
|
{
|
|
vm_map_t map;
|
|
vm_map_entry_t entry;
|
|
vm_object_t mem;
|
|
vm_pindex_t pindex;
|
|
vm_prot_t prot;
|
|
boolean_t wired;
|
|
struct privcmd_map *umap;
|
|
int error;
|
|
|
|
if ((num == 0) || ((addr & PAGE_MASK) != 0))
|
|
return NULL;
|
|
|
|
map = &td->td_proc->p_vmspace->vm_map;
|
|
error = vm_map_lookup(&map, addr, VM_PROT_NONE, &entry, &mem, &pindex,
|
|
&prot, &wired);
|
|
if (error != KERN_SUCCESS || (entry->start != addr) ||
|
|
(entry->end != addr + (num * PAGE_SIZE)))
|
|
return NULL;
|
|
|
|
vm_map_lookup_done(map, entry);
|
|
if ((mem->type != OBJT_MGTDEVICE) ||
|
|
(mem->un_pager.devp.ops != &privcmd_pg_ops))
|
|
return NULL;
|
|
|
|
umap = mem->handle;
|
|
/* Allocate a bitset to store broken page mappings. */
|
|
umap->err = BITSET_ALLOC(num, M_PRIVCMD, M_WAITOK | M_ZERO);
|
|
|
|
return umap;
|
|
}
|
|
|
|
static int
|
|
privcmd_ioctl(struct cdev *dev, unsigned long cmd, caddr_t arg,
|
|
int mode, struct thread *td)
|
|
{
|
|
int error;
|
|
unsigned int i;
|
|
void *data;
|
|
const struct per_user_data *u;
|
|
|
|
error = devfs_get_cdevpriv(&data);
|
|
if (error != 0)
|
|
return (EINVAL);
|
|
/*
|
|
* Constify user-data to prevent unintended changes to the restriction
|
|
* limits.
|
|
*/
|
|
u = data;
|
|
|
|
switch (cmd) {
|
|
case IOCTL_PRIVCMD_HYPERCALL: {
|
|
struct ioctl_privcmd_hypercall *hcall;
|
|
|
|
hcall = (struct ioctl_privcmd_hypercall *)arg;
|
|
|
|
/* Forbid hypercalls if restricted. */
|
|
if (u->dom != DOMID_INVALID) {
|
|
error = EPERM;
|
|
break;
|
|
}
|
|
|
|
#ifdef __amd64__
|
|
/*
|
|
* The hypervisor page table walker will refuse to access
|
|
* user-space pages if SMAP is enabled, so temporary disable it
|
|
* while performing the hypercall.
|
|
*/
|
|
if (cpu_stdext_feature & CPUID_STDEXT_SMAP)
|
|
stac();
|
|
#endif
|
|
error = privcmd_hypercall(hcall->op, hcall->arg[0],
|
|
hcall->arg[1], hcall->arg[2], hcall->arg[3], hcall->arg[4]);
|
|
#ifdef __amd64__
|
|
if (cpu_stdext_feature & CPUID_STDEXT_SMAP)
|
|
clac();
|
|
#endif
|
|
if (error >= 0) {
|
|
hcall->retval = error;
|
|
error = 0;
|
|
} else {
|
|
error = xen_translate_error(error);
|
|
hcall->retval = 0;
|
|
}
|
|
break;
|
|
}
|
|
case IOCTL_PRIVCMD_MMAPBATCH: {
|
|
struct ioctl_privcmd_mmapbatch *mmap;
|
|
struct xen_add_to_physmap_range add;
|
|
xen_ulong_t *idxs;
|
|
xen_pfn_t *gpfns;
|
|
int *errs;
|
|
unsigned int index;
|
|
struct privcmd_map *umap;
|
|
uint16_t num;
|
|
|
|
mmap = (struct ioctl_privcmd_mmapbatch *)arg;
|
|
|
|
if (u->dom != DOMID_INVALID && u->dom != mmap->dom) {
|
|
error = EPERM;
|
|
break;
|
|
}
|
|
|
|
umap = setup_virtual_area(td, mmap->addr, mmap->num);
|
|
if (umap == NULL) {
|
|
error = EINVAL;
|
|
break;
|
|
}
|
|
|
|
add.domid = DOMID_SELF;
|
|
add.space = XENMAPSPACE_gmfn_foreign;
|
|
add.foreign_domid = mmap->dom;
|
|
|
|
/*
|
|
* The 'size' field in the xen_add_to_physmap_range only
|
|
* allows for UINT16_MAX mappings in a single hypercall.
|
|
*/
|
|
num = MIN(mmap->num, UINT16_MAX);
|
|
|
|
idxs = malloc(sizeof(*idxs) * num, M_PRIVCMD, M_WAITOK);
|
|
gpfns = malloc(sizeof(*gpfns) * num, M_PRIVCMD, M_WAITOK);
|
|
errs = malloc(sizeof(*errs) * num, M_PRIVCMD, M_WAITOK);
|
|
|
|
set_xen_guest_handle(add.idxs, idxs);
|
|
set_xen_guest_handle(add.gpfns, gpfns);
|
|
set_xen_guest_handle(add.errs, errs);
|
|
|
|
for (index = 0; index < mmap->num; index += num) {
|
|
num = MIN(mmap->num - index, UINT16_MAX);
|
|
add.size = num;
|
|
|
|
error = copyin(&mmap->arr[index], idxs,
|
|
sizeof(idxs[0]) * num);
|
|
if (error != 0)
|
|
goto mmap_out;
|
|
|
|
for (i = 0; i < num; i++)
|
|
gpfns[i] = atop(umap->phys_base_addr +
|
|
(i + index) * PAGE_SIZE);
|
|
|
|
bzero(errs, sizeof(*errs) * num);
|
|
|
|
error = HYPERVISOR_memory_op(
|
|
XENMEM_add_to_physmap_range, &add);
|
|
if (error != 0) {
|
|
error = xen_translate_error(error);
|
|
goto mmap_out;
|
|
}
|
|
|
|
for (i = 0; i < num; i++) {
|
|
if (errs[i] != 0) {
|
|
errs[i] = xen_translate_error(errs[i]);
|
|
|
|
/* Mark the page as invalid. */
|
|
BIT_SET(mmap->num, index + i,
|
|
umap->err);
|
|
}
|
|
}
|
|
|
|
error = copyout(errs, &mmap->err[index],
|
|
sizeof(errs[0]) * num);
|
|
if (error != 0)
|
|
goto mmap_out;
|
|
}
|
|
|
|
umap->mapped = true;
|
|
|
|
mmap_out:
|
|
free(idxs, M_PRIVCMD);
|
|
free(gpfns, M_PRIVCMD);
|
|
free(errs, M_PRIVCMD);
|
|
if (!umap->mapped)
|
|
free(umap->err, M_PRIVCMD);
|
|
|
|
break;
|
|
}
|
|
case IOCTL_PRIVCMD_MMAP_RESOURCE: {
|
|
struct ioctl_privcmd_mmapresource *mmap;
|
|
struct xen_mem_acquire_resource adq;
|
|
xen_pfn_t *gpfns;
|
|
struct privcmd_map *umap;
|
|
|
|
mmap = (struct ioctl_privcmd_mmapresource *)arg;
|
|
|
|
if (u->dom != DOMID_INVALID && u->dom != mmap->dom) {
|
|
error = EPERM;
|
|
break;
|
|
}
|
|
|
|
bzero(&adq, sizeof(adq));
|
|
|
|
adq.domid = mmap->dom;
|
|
adq.type = mmap->type;
|
|
adq.id = mmap->id;
|
|
|
|
/* Shortcut for getting the resource size. */
|
|
if (mmap->addr == 0 && mmap->num == 0) {
|
|
error = HYPERVISOR_memory_op(XENMEM_acquire_resource,
|
|
&adq);
|
|
if (error != 0)
|
|
error = xen_translate_error(error);
|
|
else
|
|
mmap->num = adq.nr_frames;
|
|
break;
|
|
}
|
|
|
|
umap = setup_virtual_area(td, mmap->addr, mmap->num);
|
|
if (umap == NULL) {
|
|
error = EINVAL;
|
|
break;
|
|
}
|
|
|
|
adq.nr_frames = mmap->num;
|
|
adq.frame = mmap->idx;
|
|
|
|
gpfns = malloc(sizeof(*gpfns) * mmap->num, M_PRIVCMD, M_WAITOK);
|
|
for (i = 0; i < mmap->num; i++)
|
|
gpfns[i] = atop(umap->phys_base_addr) + i;
|
|
set_xen_guest_handle(adq.frame_list, gpfns);
|
|
|
|
error = HYPERVISOR_memory_op(XENMEM_acquire_resource, &adq);
|
|
if (error != 0)
|
|
error = xen_translate_error(error);
|
|
else
|
|
umap->mapped = true;
|
|
|
|
free(gpfns, M_PRIVCMD);
|
|
if (!umap->mapped)
|
|
free(umap->err, M_PRIVCMD);
|
|
|
|
break;
|
|
}
|
|
case IOCTL_PRIVCMD_DM_OP: {
|
|
const struct ioctl_privcmd_dmop *dmop;
|
|
struct privcmd_dmop_buf *bufs;
|
|
struct xen_dm_op_buf *hbufs;
|
|
|
|
dmop = (struct ioctl_privcmd_dmop *)arg;
|
|
|
|
if (u->dom != DOMID_INVALID && u->dom != dmop->dom) {
|
|
error = EPERM;
|
|
break;
|
|
}
|
|
|
|
if (dmop->num == 0)
|
|
break;
|
|
|
|
if (dmop->num > MAX_DMOP_BUFFERS) {
|
|
error = E2BIG;
|
|
break;
|
|
}
|
|
|
|
bufs = malloc(sizeof(*bufs) * dmop->num, M_PRIVCMD, M_WAITOK);
|
|
|
|
error = copyin(dmop->ubufs, bufs, sizeof(*bufs) * dmop->num);
|
|
if (error != 0) {
|
|
free(bufs, M_PRIVCMD);
|
|
break;
|
|
}
|
|
|
|
hbufs = malloc(sizeof(*hbufs) * dmop->num, M_PRIVCMD, M_WAITOK);
|
|
for (i = 0; i < dmop->num; i++) {
|
|
set_xen_guest_handle(hbufs[i].h, bufs[i].uptr);
|
|
hbufs[i].size = bufs[i].size;
|
|
}
|
|
|
|
#ifdef __amd64__
|
|
if (cpu_stdext_feature & CPUID_STDEXT_SMAP)
|
|
stac();
|
|
#endif
|
|
error = HYPERVISOR_dm_op(dmop->dom, dmop->num, hbufs);
|
|
#ifdef __amd64__
|
|
if (cpu_stdext_feature & CPUID_STDEXT_SMAP)
|
|
clac();
|
|
#endif
|
|
if (error != 0)
|
|
error = xen_translate_error(error);
|
|
|
|
free(bufs, M_PRIVCMD);
|
|
free(hbufs, M_PRIVCMD);
|
|
|
|
|
|
break;
|
|
}
|
|
case IOCTL_PRIVCMD_RESTRICT: {
|
|
struct per_user_data *u;
|
|
domid_t dom;
|
|
|
|
dom = *(domid_t *)arg;
|
|
|
|
error = devfs_get_cdevpriv((void **)&u);
|
|
if (error != 0)
|
|
break;
|
|
|
|
if (u->dom != DOMID_INVALID && u->dom != dom) {
|
|
error = -EINVAL;
|
|
break;
|
|
}
|
|
u->dom = dom;
|
|
|
|
break;
|
|
}
|
|
default:
|
|
error = ENOSYS;
|
|
break;
|
|
}
|
|
|
|
return (error);
|
|
}
|
|
|
|
static void
|
|
user_release(void *arg)
|
|
{
|
|
|
|
free(arg, M_PRIVCMD);
|
|
}
|
|
|
|
static int
|
|
privcmd_open(struct cdev *dev, int flag, int otyp, struct thread *td)
|
|
{
|
|
struct per_user_data *u;
|
|
int error;
|
|
|
|
u = malloc(sizeof(*u), M_PRIVCMD, M_WAITOK);
|
|
u->dom = DOMID_INVALID;
|
|
|
|
/* Assign the allocated per_user_data to this open instance. */
|
|
error = devfs_set_cdevpriv(u, user_release);
|
|
if (error != 0) {
|
|
free(u, M_PRIVCMD);
|
|
}
|
|
|
|
return (error);
|
|
}
|
|
|
|
/*------------------ Private Device Attachment Functions --------------------*/
|
|
static void
|
|
privcmd_identify(driver_t *driver, device_t parent)
|
|
{
|
|
|
|
KASSERT(xen_domain(),
|
|
("Trying to attach privcmd device on non Xen domain"));
|
|
|
|
if (BUS_ADD_CHILD(parent, 0, "privcmd", 0) == NULL)
|
|
panic("unable to attach privcmd user-space device");
|
|
}
|
|
|
|
static int
|
|
privcmd_probe(device_t dev)
|
|
{
|
|
|
|
privcmd_dev = dev;
|
|
device_set_desc(dev, "Xen privileged interface user-space device");
|
|
return (BUS_PROBE_NOWILDCARD);
|
|
}
|
|
|
|
static int
|
|
privcmd_attach(device_t dev)
|
|
{
|
|
|
|
make_dev_credf(MAKEDEV_ETERNAL, &privcmd_devsw, 0, NULL, UID_ROOT,
|
|
GID_WHEEL, 0600, "xen/privcmd");
|
|
return (0);
|
|
}
|
|
|
|
/*-------------------- Private Device Attachment Data -----------------------*/
|
|
static device_method_t privcmd_methods[] = {
|
|
DEVMETHOD(device_identify, privcmd_identify),
|
|
DEVMETHOD(device_probe, privcmd_probe),
|
|
DEVMETHOD(device_attach, privcmd_attach),
|
|
|
|
DEVMETHOD_END
|
|
};
|
|
|
|
static driver_t privcmd_driver = {
|
|
"privcmd",
|
|
privcmd_methods,
|
|
0,
|
|
};
|
|
|
|
devclass_t privcmd_devclass;
|
|
|
|
DRIVER_MODULE(privcmd, xenpv, privcmd_driver, privcmd_devclass, 0, 0);
|
|
MODULE_DEPEND(privcmd, xenpv, 1, 1, 1);
|