Implement proper support for memory map operations in the LinuxKPI,
like open, close and fault using the character device pager. Some notes about the implementation: 1) Linux drivers set the vm_ops and vm_private_data fields during a mmap() call to indicate that the driver wants to use the LinuxKPI VM operations. Else these operations are not used. 2) The vm_private_data pointer is associated with a VM area structure and inserted into an internal LinuxKPI list. If the vm_private_data pointer already exists, the existing VM area structure is used instead of the allocated one which gets freed. 3) The LinuxKPI's vm_private_data pointer is used as the callback handle for the FreeBSD VM object. The VM subsystem in FreeBSD has a similar list to identify equal handles and will only call the character device pager's close function once. 4) All LinuxKPI VM operations are serialized through the mmap_sem sempaphore, which is per procedure, which prevents simultaneous access to the shared VM area structure when receiving page faults. Obtained from: kmacy @ MFC after: 1 week Sponsored by: Mellanox Technologies
This commit is contained in:
parent
e54b103e70
commit
1ea4c85781
Notes:
svn2git
2020-12-20 02:59:44 +00:00
svn path=/head/; revision=316562
@ -38,6 +38,7 @@
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/mm_types.h>
|
||||
#include <linux/pfn.h>
|
||||
#include <linux/list.h>
|
||||
|
||||
#include <asm/pgtable.h>
|
||||
|
||||
@ -89,12 +90,25 @@ CTASSERT((VM_PROT_ALL & -(1 << 8)) == 0);
|
||||
typedef int (*pte_fn_t)(pte_t *, pgtable_t, unsigned long addr, void *data);
|
||||
|
||||
struct vm_area_struct {
|
||||
vm_offset_t vm_start;
|
||||
vm_offset_t vm_end;
|
||||
vm_offset_t vm_pgoff;
|
||||
vm_paddr_t vm_pfn; /* PFN For mmap. */
|
||||
vm_size_t vm_len; /* length for mmap. */
|
||||
vm_memattr_t vm_page_prot;
|
||||
vm_offset_t vm_start;
|
||||
vm_offset_t vm_end;
|
||||
vm_offset_t vm_pgoff;
|
||||
pgprot_t vm_page_prot;
|
||||
unsigned long vm_flags;
|
||||
struct mm_struct *vm_mm;
|
||||
void *vm_private_data;
|
||||
const struct vm_operations_struct *vm_ops;
|
||||
struct linux_file *vm_file;
|
||||
|
||||
/* internal operation */
|
||||
vm_paddr_t vm_pfn; /* PFN for memory map */
|
||||
vm_size_t vm_len; /* length for memory map */
|
||||
vm_pindex_t vm_pfn_first;
|
||||
int vm_pfn_count;
|
||||
int *vm_pfn_pcount;
|
||||
vm_object_t vm_obj;
|
||||
vm_map_t vm_cached_map;
|
||||
TAILQ_ENTRY(vm_area_struct) vm_entry;
|
||||
};
|
||||
|
||||
struct vm_fault {
|
||||
|
@ -47,6 +47,28 @@ typedef unsigned long pgprot_t;
|
||||
|
||||
#define page vm_page
|
||||
|
||||
#define LINUXKPI_PROT_VALID (1 << 4)
|
||||
#define LINUXKPI_CACHE_MODE_SHIFT 3
|
||||
|
||||
static inline pgprot_t
|
||||
cachemode2protval(vm_memattr_t attr)
|
||||
{
|
||||
return ((attr | LINUXKPI_PROT_VALID) << LINUXKPI_CACHE_MODE_SHIFT);
|
||||
}
|
||||
|
||||
static inline vm_memattr_t
|
||||
pgprot2cachemode(pgprot_t prot)
|
||||
{
|
||||
int val;
|
||||
|
||||
val = prot >> LINUXKPI_CACHE_MODE_SHIFT;
|
||||
|
||||
if (val & LINUXKPI_PROT_VALID)
|
||||
return (val & ~LINUXKPI_PROT_VALID);
|
||||
else
|
||||
return (VM_MEMATTR_DEFAULT);
|
||||
}
|
||||
|
||||
#define virt_to_page(x) PHYS_TO_VM_PAGE(vtophys((x)))
|
||||
#define page_to_pfn(pp) (VM_PAGE_TO_PHYS((pp)) >> PAGE_SHIFT)
|
||||
#define pfn_to_page(pfn) (PHYS_TO_VM_PAGE((pfn) << PAGE_SHIFT))
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2010 Isilon Systems, Inc.
|
||||
* Copyright (c) 2010 iX Systems, Inc.
|
||||
* Copyright (c) 2010 Panasas, Inc.
|
||||
* Copyright (c) 2013-2016 Mellanox Technologies, Ltd.
|
||||
* Copyright (c) 2013-2017 Mellanox Technologies, Ltd.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@ -88,6 +88,8 @@ MALLOC_DEFINE(M_KMALLOC, "linux", "Linux kmalloc compat");
|
||||
#undef cdev
|
||||
#define RB_ROOT(head) (head)->rbh_root
|
||||
|
||||
static struct vm_area_struct *linux_cdev_handle_find(void *handle);
|
||||
|
||||
struct kobject linux_class_root;
|
||||
struct device linux_root_device;
|
||||
struct class linux_class_misc;
|
||||
@ -393,6 +395,166 @@ linux_file_dtor(void *cdp)
|
||||
kfree(filp);
|
||||
}
|
||||
|
||||
static int
|
||||
linux_cdev_pager_populate(vm_object_t vm_obj, vm_pindex_t pidx, int fault_type,
|
||||
vm_prot_t max_prot, vm_pindex_t *first, vm_pindex_t *last)
|
||||
{
|
||||
struct vm_area_struct *vmap;
|
||||
struct vm_fault vmf;
|
||||
int err;
|
||||
|
||||
linux_set_current(curthread);
|
||||
|
||||
/* get VM area structure */
|
||||
vmap = linux_cdev_handle_find(vm_obj->handle);
|
||||
MPASS(vmap != NULL);
|
||||
MPASS(vmap->vm_private_data == vm_obj->handle);
|
||||
|
||||
/* fill out VM fault structure */
|
||||
vmf.virtual_address = (void *)(pidx << PAGE_SHIFT);
|
||||
vmf.flags = (fault_type & VM_PROT_WRITE) ? FAULT_FLAG_WRITE : 0;
|
||||
vmf.pgoff = 0;
|
||||
vmf.page = NULL;
|
||||
|
||||
VM_OBJECT_WUNLOCK(vm_obj);
|
||||
|
||||
down_write(&vmap->vm_mm->mmap_sem);
|
||||
if (unlikely(vmap->vm_ops == NULL)) {
|
||||
err = VM_FAULT_SIGBUS;
|
||||
} else {
|
||||
vmap->vm_pfn_count = 0;
|
||||
vmap->vm_pfn_pcount = &vmap->vm_pfn_count;
|
||||
vmap->vm_obj = vm_obj;
|
||||
|
||||
err = vmap->vm_ops->fault(vmap, &vmf);
|
||||
|
||||
while (vmap->vm_pfn_count == 0 && err == VM_FAULT_NOPAGE) {
|
||||
kern_yield(0);
|
||||
err = vmap->vm_ops->fault(vmap, &vmf);
|
||||
}
|
||||
}
|
||||
|
||||
/* translate return code */
|
||||
switch (err) {
|
||||
case VM_FAULT_OOM:
|
||||
err = VM_PAGER_AGAIN;
|
||||
break;
|
||||
case VM_FAULT_SIGBUS:
|
||||
err = VM_PAGER_BAD;
|
||||
break;
|
||||
case VM_FAULT_NOPAGE:
|
||||
/*
|
||||
* By contract the fault handler will return having
|
||||
* busied all the pages itself. If pidx is already
|
||||
* found in the object, it will simply xbusy the first
|
||||
* page and return with vm_pfn_count set to 1.
|
||||
*/
|
||||
*first = vmap->vm_pfn_first;
|
||||
*last = *first + vmap->vm_pfn_count - 1;
|
||||
err = VM_PAGER_OK;
|
||||
break;
|
||||
default:
|
||||
err = VM_PAGER_ERROR;
|
||||
break;
|
||||
}
|
||||
up_write(&vmap->vm_mm->mmap_sem);
|
||||
VM_OBJECT_WLOCK(vm_obj);
|
||||
return (err);
|
||||
}
|
||||
|
||||
static struct rwlock linux_vma_lock;
|
||||
static TAILQ_HEAD(, vm_area_struct) linux_vma_head =
|
||||
TAILQ_HEAD_INITIALIZER(linux_vma_head);
|
||||
|
||||
static struct vm_area_struct *
|
||||
linux_cdev_handle_insert(void *handle, struct vm_area_struct *vmap)
|
||||
{
|
||||
struct vm_area_struct *ptr;
|
||||
|
||||
rw_wlock(&linux_vma_lock);
|
||||
TAILQ_FOREACH(ptr, &linux_vma_head, vm_entry) {
|
||||
if (ptr->vm_private_data == handle) {
|
||||
rw_wunlock(&linux_vma_lock);
|
||||
kfree(vmap);
|
||||
return (NULL);
|
||||
}
|
||||
}
|
||||
TAILQ_INSERT_TAIL(&linux_vma_head, vmap, vm_entry);
|
||||
rw_wunlock(&linux_vma_lock);
|
||||
return (vmap);
|
||||
}
|
||||
|
||||
static void
|
||||
linux_cdev_handle_remove(struct vm_area_struct *vmap)
|
||||
{
|
||||
if (vmap == NULL)
|
||||
return;
|
||||
|
||||
rw_wlock(&linux_vma_lock);
|
||||
TAILQ_REMOVE(&linux_vma_head, vmap, vm_entry);
|
||||
rw_wunlock(&linux_vma_lock);
|
||||
kfree(vmap);
|
||||
}
|
||||
|
||||
static struct vm_area_struct *
|
||||
linux_cdev_handle_find(void *handle)
|
||||
{
|
||||
struct vm_area_struct *vmap;
|
||||
|
||||
rw_rlock(&linux_vma_lock);
|
||||
TAILQ_FOREACH(vmap, &linux_vma_head, vm_entry) {
|
||||
if (vmap->vm_private_data == handle)
|
||||
break;
|
||||
}
|
||||
rw_runlock(&linux_vma_lock);
|
||||
return (vmap);
|
||||
}
|
||||
|
||||
static int
|
||||
linux_cdev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
|
||||
vm_ooffset_t foff, struct ucred *cred, u_short *color)
|
||||
{
|
||||
const struct vm_operations_struct *vm_ops;
|
||||
struct vm_area_struct *vmap;
|
||||
|
||||
vmap = linux_cdev_handle_find(handle);
|
||||
MPASS(vmap != NULL);
|
||||
|
||||
*color = 0;
|
||||
|
||||
down_write(&vmap->vm_mm->mmap_sem);
|
||||
vm_ops = vmap->vm_ops;
|
||||
if (likely(vm_ops != NULL))
|
||||
vm_ops->open(vmap);
|
||||
up_write(&vmap->vm_mm->mmap_sem);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
linux_cdev_pager_dtor(void *handle)
|
||||
{
|
||||
const struct vm_operations_struct *vm_ops;
|
||||
struct vm_area_struct *vmap;
|
||||
|
||||
vmap = linux_cdev_handle_find(handle);
|
||||
MPASS(vmap != NULL);
|
||||
|
||||
down_write(&vmap->vm_mm->mmap_sem);
|
||||
vm_ops = vmap->vm_ops;
|
||||
if (likely(vm_ops != NULL))
|
||||
vm_ops->close(vmap);
|
||||
up_write(&vmap->vm_mm->mmap_sem);
|
||||
|
||||
linux_cdev_handle_remove(vmap);
|
||||
}
|
||||
|
||||
static struct cdev_pager_ops linux_cdev_pager_ops = {
|
||||
.cdev_pg_populate = linux_cdev_pager_populate,
|
||||
.cdev_pg_ctor = linux_cdev_pager_ctor,
|
||||
.cdev_pg_dtor = linux_cdev_pager_dtor
|
||||
};
|
||||
|
||||
static int
|
||||
linux_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
|
||||
{
|
||||
@ -707,10 +869,11 @@ static int
|
||||
linux_dev_mmap_single(struct cdev *dev, vm_ooffset_t *offset,
|
||||
vm_size_t size, struct vm_object **object, int nprot)
|
||||
{
|
||||
struct vm_area_struct *vmap;
|
||||
struct linux_file *filp;
|
||||
struct thread *td;
|
||||
struct file *file;
|
||||
struct vm_area_struct vma;
|
||||
vm_memattr_t attr;
|
||||
int error;
|
||||
|
||||
td = curthread;
|
||||
@ -720,39 +883,82 @@ linux_dev_mmap_single(struct cdev *dev, vm_ooffset_t *offset,
|
||||
if ((error = devfs_get_cdevpriv((void **)&filp)) != 0)
|
||||
return (error);
|
||||
filp->f_flags = file->f_flag;
|
||||
linux_set_current(td);
|
||||
vma.vm_start = 0;
|
||||
vma.vm_end = size;
|
||||
vma.vm_pgoff = *offset / PAGE_SIZE;
|
||||
vma.vm_pfn = 0;
|
||||
vma.vm_page_prot = VM_MEMATTR_DEFAULT;
|
||||
if (filp->f_op->mmap) {
|
||||
error = -filp->f_op->mmap(filp, &vma);
|
||||
if (error == 0) {
|
||||
struct sglist *sg;
|
||||
|
||||
sg = sglist_alloc(1, M_WAITOK);
|
||||
sglist_append_phys(sg,
|
||||
(vm_paddr_t)vma.vm_pfn << PAGE_SHIFT, vma.vm_len);
|
||||
*object = vm_pager_allocate(OBJT_SG, sg, vma.vm_len,
|
||||
nprot, 0, td->td_ucred);
|
||||
if (*object == NULL) {
|
||||
sglist_free(sg);
|
||||
error = EINVAL;
|
||||
goto done;
|
||||
}
|
||||
*offset = 0;
|
||||
if (vma.vm_page_prot != VM_MEMATTR_DEFAULT) {
|
||||
VM_OBJECT_WLOCK(*object);
|
||||
vm_object_set_memattr(*object,
|
||||
vma.vm_page_prot);
|
||||
VM_OBJECT_WUNLOCK(*object);
|
||||
}
|
||||
if (filp->f_op->mmap == NULL)
|
||||
return (ENODEV);
|
||||
|
||||
linux_set_current(td);
|
||||
|
||||
vmap = kzalloc(sizeof(*vmap), GFP_KERNEL);
|
||||
vmap->vm_start = 0;
|
||||
vmap->vm_end = size;
|
||||
vmap->vm_pgoff = *offset / PAGE_SIZE;
|
||||
vmap->vm_pfn = 0;
|
||||
vmap->vm_flags = vmap->vm_page_prot = nprot;
|
||||
vmap->vm_ops = NULL;
|
||||
vmap->vm_file = filp;
|
||||
vmap->vm_mm = current->mm;
|
||||
|
||||
if (unlikely(down_write_killable(&vmap->vm_mm->mmap_sem))) {
|
||||
error = EINTR;
|
||||
} else {
|
||||
error = -filp->f_op->mmap(filp, vmap);
|
||||
up_write(&vmap->vm_mm->mmap_sem);
|
||||
}
|
||||
|
||||
if (error != 0) {
|
||||
kfree(vmap);
|
||||
return (error);
|
||||
}
|
||||
|
||||
attr = pgprot2cachemode(vmap->vm_page_prot);
|
||||
|
||||
if (vmap->vm_ops != NULL) {
|
||||
void *vm_private_data;
|
||||
|
||||
if (vmap->vm_ops->fault == NULL ||
|
||||
vmap->vm_ops->open == NULL ||
|
||||
vmap->vm_ops->close == NULL ||
|
||||
vmap->vm_private_data == NULL) {
|
||||
kfree(vmap);
|
||||
return (EINVAL);
|
||||
}
|
||||
} else
|
||||
error = ENODEV;
|
||||
done:
|
||||
return (error);
|
||||
|
||||
vm_private_data = vmap->vm_private_data;
|
||||
|
||||
vmap = linux_cdev_handle_insert(vm_private_data, vmap);
|
||||
|
||||
*object = cdev_pager_allocate(vm_private_data, OBJT_MGTDEVICE,
|
||||
&linux_cdev_pager_ops, size, nprot, *offset, curthread->td_ucred);
|
||||
|
||||
if (*object == NULL) {
|
||||
linux_cdev_handle_remove(vmap);
|
||||
return (EINVAL);
|
||||
}
|
||||
} else {
|
||||
struct sglist *sg;
|
||||
|
||||
sg = sglist_alloc(1, M_WAITOK);
|
||||
sglist_append_phys(sg, (vm_paddr_t)vmap->vm_pfn << PAGE_SHIFT, vmap->vm_len);
|
||||
|
||||
*object = vm_pager_allocate(OBJT_SG, sg, vmap->vm_len,
|
||||
nprot, 0, curthread->td_ucred);
|
||||
|
||||
kfree(vmap);
|
||||
|
||||
if (*object == NULL) {
|
||||
sglist_free(sg);
|
||||
return (EINVAL);
|
||||
}
|
||||
}
|
||||
|
||||
if (attr != VM_MEMATTR_DEFAULT) {
|
||||
VM_OBJECT_WLOCK(*object);
|
||||
vm_object_set_memattr(*object, attr);
|
||||
VM_OBJECT_WUNLOCK(*object);
|
||||
}
|
||||
*offset = 0;
|
||||
return (0);
|
||||
}
|
||||
|
||||
struct cdevsw linuxcdevsw = {
|
||||
@ -1484,6 +1690,7 @@ linux_compat_init(void *arg)
|
||||
#if defined(__i386__) || defined(__amd64__)
|
||||
linux_cpu_has_clflush = (cpu_feature & CPUID_CLFSH);
|
||||
#endif
|
||||
rw_init(&linux_vma_lock, "lkpi-vma-lock");
|
||||
|
||||
rootoid = SYSCTL_ADD_ROOT_NODE(NULL,
|
||||
OID_AUTO, "sys", CTLFLAG_RD|CTLFLAG_MPSAFE, NULL, "sys");
|
||||
@ -1514,6 +1721,8 @@ linux_compat_uninit(void *arg)
|
||||
linux_kobject_kfree_name(&linux_class_root);
|
||||
linux_kobject_kfree_name(&linux_root_device.kobj);
|
||||
linux_kobject_kfree_name(&linux_class_misc.kobj);
|
||||
|
||||
rw_destroy(&linux_vma_lock);
|
||||
}
|
||||
SYSUNINIT(linux_compat, SI_SUB_DRIVERS, SI_ORDER_SECOND, linux_compat_uninit, NULL);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user