5975e53d40
Suppose that we have an exclusively busy page, and a thread which can accept shared-busy page. In this case, typical code waiting for the page xbusy state to pass is again: VM_OBJECT_WLOCK(object); ... if (vm_page_xbusied(m)) { vm_page_lock(m); VM_OBJECT_WUNLOCK(object); <---1 vm_page_busy_sleep(p, "vmopax"); goto again; } Suppose that the xbusy state owner locked the object, unbusied the page and unlocked the object after we are at the line [1], but before we executed the load of the busy_lock word in vm_page_busy_sleep(). If it happens that there is still no waiters recorded for the busy state, the xbusy owner did not acquired the page lock, so it proceeded. More, suppose that some other thread happen to share-busy the page after xbusy state was relinquished but before the m->busy_lock is read in vm_page_busy_sleep(). Again, that thread only needs vm_object lock to proceed. Then, vm_page_busy_sleep() reads busy_lock value equal to the VPB_SHARERS_WORD(1). In this case, all tests in vm_page_busy_sleep(9) pass and we are going to sleep, despite the page being share-busied. Update check for m->busy_lock == VPB_UNBUSIED in vm_page_busy_sleep(9) to also accept shared-busy state if we only wait for the xbusy state to pass. Merge sequential if()s with the same 'then' clause in vm_page_busy_sleep(). Note that the current code does not share-busy pages from parallel threads, the only way to have more that one sbusy owner is right now is to recurse. Reported and tested by: pho (previous version) Reviewed by: alc, markj Sponsored by: The FreeBSD Foundation MFC after: 1 week Differential revision: https://reviews.freebsd.org/D8196
564 lines
13 KiB
C
564 lines
13 KiB
C
/**************************************************************************
|
|
*
|
|
* Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA
|
|
* All Rights Reserved.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the
|
|
* "Software"), to deal in the Software without restriction, including
|
|
* without limitation the rights to use, copy, modify, merge, publish,
|
|
* distribute, sub license, and/or sell copies of the Software, and to
|
|
* permit persons to whom the Software is furnished to do so, subject to
|
|
* the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the
|
|
* next paragraph) shall be included in all copies or substantial portions
|
|
* of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
|
* THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
|
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
|
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
|
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
*
|
|
**************************************************************************/
|
|
/*
|
|
* Authors: Thomas Hellstrom <thellstrom-at-vmware-dot-com>
|
|
*/
|
|
/*
|
|
* Copyright (c) 2013 The FreeBSD Foundation
|
|
* All rights reserved.
|
|
*
|
|
* Portions of this software were developed by Konstantin Belousov
|
|
* <kib@FreeBSD.org> under sponsorship from the FreeBSD Foundation.
|
|
*/
|
|
|
|
#include <sys/cdefs.h>
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
#include "opt_vm.h"
|
|
|
|
#include <dev/drm2/drmP.h>
|
|
#include <dev/drm2/ttm/ttm_module.h>
|
|
#include <dev/drm2/ttm/ttm_bo_driver.h>
|
|
#include <dev/drm2/ttm/ttm_placement.h>
|
|
|
|
#include <vm/vm.h>
|
|
#include <vm/vm_page.h>
|
|
#include <vm/vm_pageout.h>
|
|
|
|
#define TTM_BO_VM_NUM_PREFAULT 16
|
|
|
|
RB_GENERATE(ttm_bo_device_buffer_objects, ttm_buffer_object, vm_rb,
|
|
ttm_bo_cmp_rb_tree_items);
|
|
|
|
int
|
|
ttm_bo_cmp_rb_tree_items(struct ttm_buffer_object *a,
|
|
struct ttm_buffer_object *b)
|
|
{
|
|
|
|
if (a->vm_node->start < b->vm_node->start) {
|
|
return (-1);
|
|
} else if (a->vm_node->start > b->vm_node->start) {
|
|
return (1);
|
|
} else {
|
|
return (0);
|
|
}
|
|
}
|
|
|
|
static struct ttm_buffer_object *ttm_bo_vm_lookup_rb(struct ttm_bo_device *bdev,
|
|
unsigned long page_start,
|
|
unsigned long num_pages)
|
|
{
|
|
unsigned long cur_offset;
|
|
struct ttm_buffer_object *bo;
|
|
struct ttm_buffer_object *best_bo = NULL;
|
|
|
|
bo = RB_ROOT(&bdev->addr_space_rb);
|
|
while (bo != NULL) {
|
|
cur_offset = bo->vm_node->start;
|
|
if (page_start >= cur_offset) {
|
|
best_bo = bo;
|
|
if (page_start == cur_offset)
|
|
break;
|
|
bo = RB_RIGHT(bo, vm_rb);
|
|
} else
|
|
bo = RB_LEFT(bo, vm_rb);
|
|
}
|
|
|
|
if (unlikely(best_bo == NULL))
|
|
return NULL;
|
|
|
|
if (unlikely((best_bo->vm_node->start + best_bo->num_pages) <
|
|
(page_start + num_pages)))
|
|
return NULL;
|
|
|
|
return best_bo;
|
|
}
|
|
|
|
static int
|
|
ttm_bo_vm_fault(vm_object_t vm_obj, vm_ooffset_t offset,
|
|
int prot, vm_page_t *mres)
|
|
{
|
|
|
|
struct ttm_buffer_object *bo = vm_obj->handle;
|
|
struct ttm_bo_device *bdev = bo->bdev;
|
|
struct ttm_tt *ttm = NULL;
|
|
vm_page_t m, m1;
|
|
int ret;
|
|
int retval = VM_PAGER_OK;
|
|
struct ttm_mem_type_manager *man =
|
|
&bdev->man[bo->mem.mem_type];
|
|
|
|
vm_object_pip_add(vm_obj, 1);
|
|
if (*mres != NULL) {
|
|
vm_page_lock(*mres);
|
|
vm_page_remove(*mres);
|
|
vm_page_unlock(*mres);
|
|
}
|
|
retry:
|
|
VM_OBJECT_WUNLOCK(vm_obj);
|
|
m = NULL;
|
|
|
|
reserve:
|
|
ret = ttm_bo_reserve(bo, false, false, false, 0);
|
|
if (unlikely(ret != 0)) {
|
|
if (ret == -EBUSY) {
|
|
kern_yield(0);
|
|
goto reserve;
|
|
}
|
|
}
|
|
|
|
if (bdev->driver->fault_reserve_notify) {
|
|
ret = bdev->driver->fault_reserve_notify(bo);
|
|
switch (ret) {
|
|
case 0:
|
|
break;
|
|
case -EBUSY:
|
|
case -ERESTARTSYS:
|
|
case -EINTR:
|
|
kern_yield(0);
|
|
goto reserve;
|
|
default:
|
|
retval = VM_PAGER_ERROR;
|
|
goto out_unlock;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Wait for buffer data in transit, due to a pipelined
|
|
* move.
|
|
*/
|
|
|
|
mtx_lock(&bdev->fence_lock);
|
|
if (test_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags)) {
|
|
/*
|
|
* Here, the behavior differs between Linux and FreeBSD.
|
|
*
|
|
* On Linux, the wait is interruptible (3rd argument to
|
|
* ttm_bo_wait). There must be some mechanism to resume
|
|
* page fault handling, once the signal is processed.
|
|
*
|
|
* On FreeBSD, the wait is uninteruptible. This is not a
|
|
* problem as we can't end up with an unkillable process
|
|
* here, because the wait will eventually time out.
|
|
*
|
|
* An example of this situation is the Xorg process
|
|
* which uses SIGALRM internally. The signal could
|
|
* interrupt the wait, causing the page fault to fail
|
|
* and the process to receive SIGSEGV.
|
|
*/
|
|
ret = ttm_bo_wait(bo, false, false, false);
|
|
mtx_unlock(&bdev->fence_lock);
|
|
if (unlikely(ret != 0)) {
|
|
retval = VM_PAGER_ERROR;
|
|
goto out_unlock;
|
|
}
|
|
} else
|
|
mtx_unlock(&bdev->fence_lock);
|
|
|
|
ret = ttm_mem_io_lock(man, true);
|
|
if (unlikely(ret != 0)) {
|
|
retval = VM_PAGER_ERROR;
|
|
goto out_unlock;
|
|
}
|
|
ret = ttm_mem_io_reserve_vm(bo);
|
|
if (unlikely(ret != 0)) {
|
|
retval = VM_PAGER_ERROR;
|
|
goto out_io_unlock;
|
|
}
|
|
|
|
/*
|
|
* Strictly, we're not allowed to modify vma->vm_page_prot here,
|
|
* since the mmap_sem is only held in read mode. However, we
|
|
* modify only the caching bits of vma->vm_page_prot and
|
|
* consider those bits protected by
|
|
* the bo->mutex, as we should be the only writers.
|
|
* There shouldn't really be any readers of these bits except
|
|
* within vm_insert_mixed()? fork?
|
|
*
|
|
* TODO: Add a list of vmas to the bo, and change the
|
|
* vma->vm_page_prot when the object changes caching policy, with
|
|
* the correct locks held.
|
|
*/
|
|
if (!bo->mem.bus.is_iomem) {
|
|
/* Allocate all page at once, most common usage */
|
|
ttm = bo->ttm;
|
|
if (ttm->bdev->driver->ttm_tt_populate(ttm)) {
|
|
retval = VM_PAGER_ERROR;
|
|
goto out_io_unlock;
|
|
}
|
|
}
|
|
|
|
if (bo->mem.bus.is_iomem) {
|
|
m = PHYS_TO_VM_PAGE(bo->mem.bus.base + bo->mem.bus.offset +
|
|
offset);
|
|
KASSERT((m->flags & PG_FICTITIOUS) != 0,
|
|
("physical address %#jx not fictitious",
|
|
(uintmax_t)(bo->mem.bus.base + bo->mem.bus.offset
|
|
+ offset)));
|
|
pmap_page_set_memattr(m, ttm_io_prot(bo->mem.placement));
|
|
} else {
|
|
ttm = bo->ttm;
|
|
m = ttm->pages[OFF_TO_IDX(offset)];
|
|
if (unlikely(!m)) {
|
|
retval = VM_PAGER_ERROR;
|
|
goto out_io_unlock;
|
|
}
|
|
pmap_page_set_memattr(m,
|
|
(bo->mem.placement & TTM_PL_FLAG_CACHED) ?
|
|
VM_MEMATTR_WRITE_BACK : ttm_io_prot(bo->mem.placement));
|
|
}
|
|
|
|
VM_OBJECT_WLOCK(vm_obj);
|
|
if (vm_page_busied(m)) {
|
|
vm_page_lock(m);
|
|
VM_OBJECT_WUNLOCK(vm_obj);
|
|
vm_page_busy_sleep(m, "ttmpbs", false);
|
|
VM_OBJECT_WLOCK(vm_obj);
|
|
ttm_mem_io_unlock(man);
|
|
ttm_bo_unreserve(bo);
|
|
goto retry;
|
|
}
|
|
m1 = vm_page_lookup(vm_obj, OFF_TO_IDX(offset));
|
|
if (m1 == NULL) {
|
|
if (vm_page_insert(m, vm_obj, OFF_TO_IDX(offset))) {
|
|
VM_OBJECT_WUNLOCK(vm_obj);
|
|
VM_WAIT;
|
|
VM_OBJECT_WLOCK(vm_obj);
|
|
ttm_mem_io_unlock(man);
|
|
ttm_bo_unreserve(bo);
|
|
goto retry;
|
|
}
|
|
} else {
|
|
KASSERT(m == m1,
|
|
("inconsistent insert bo %p m %p m1 %p offset %jx",
|
|
bo, m, m1, (uintmax_t)offset));
|
|
}
|
|
m->valid = VM_PAGE_BITS_ALL;
|
|
vm_page_xbusy(m);
|
|
if (*mres != NULL) {
|
|
KASSERT(*mres != m, ("losing %p %p", *mres, m));
|
|
vm_page_lock(*mres);
|
|
vm_page_free(*mres);
|
|
vm_page_unlock(*mres);
|
|
}
|
|
*mres = m;
|
|
|
|
out_io_unlock1:
|
|
ttm_mem_io_unlock(man);
|
|
out_unlock1:
|
|
ttm_bo_unreserve(bo);
|
|
vm_object_pip_wakeup(vm_obj);
|
|
return (retval);
|
|
|
|
out_io_unlock:
|
|
VM_OBJECT_WLOCK(vm_obj);
|
|
goto out_io_unlock1;
|
|
|
|
out_unlock:
|
|
VM_OBJECT_WLOCK(vm_obj);
|
|
goto out_unlock1;
|
|
}
|
|
|
|
static int
|
|
ttm_bo_vm_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
|
|
vm_ooffset_t foff, struct ucred *cred, u_short *color)
|
|
{
|
|
|
|
/*
|
|
* On Linux, a reference to the buffer object is acquired here.
|
|
* The reason is that this function is not called when the
|
|
* mmap() is initialized, but only when a process forks for
|
|
* instance. Therefore on Linux, the reference on the bo is
|
|
* acquired either in ttm_bo_mmap() or ttm_bo_vm_open(). It's
|
|
* then released in ttm_bo_vm_close().
|
|
*
|
|
* Here, this function is called during mmap() initialization.
|
|
* Thus, the reference acquired in ttm_bo_mmap_single() is
|
|
* sufficient.
|
|
*/
|
|
|
|
*color = 0;
|
|
return (0);
|
|
}
|
|
|
|
static void
|
|
ttm_bo_vm_dtor(void *handle)
|
|
{
|
|
struct ttm_buffer_object *bo = handle;
|
|
|
|
ttm_bo_unref(&bo);
|
|
}
|
|
|
|
static struct cdev_pager_ops ttm_pager_ops = {
|
|
.cdev_pg_fault = ttm_bo_vm_fault,
|
|
.cdev_pg_ctor = ttm_bo_vm_ctor,
|
|
.cdev_pg_dtor = ttm_bo_vm_dtor
|
|
};
|
|
|
|
int
|
|
ttm_bo_mmap_single(struct ttm_bo_device *bdev, vm_ooffset_t *offset, vm_size_t size,
|
|
struct vm_object **obj_res, int nprot)
|
|
{
|
|
struct ttm_bo_driver *driver;
|
|
struct ttm_buffer_object *bo;
|
|
struct vm_object *vm_obj;
|
|
int ret;
|
|
|
|
rw_wlock(&bdev->vm_lock);
|
|
bo = ttm_bo_vm_lookup_rb(bdev, OFF_TO_IDX(*offset), OFF_TO_IDX(size));
|
|
if (likely(bo != NULL))
|
|
refcount_acquire(&bo->kref);
|
|
rw_wunlock(&bdev->vm_lock);
|
|
|
|
if (unlikely(bo == NULL)) {
|
|
printf("[TTM] Could not find buffer object to map\n");
|
|
return (-EINVAL);
|
|
}
|
|
|
|
driver = bo->bdev->driver;
|
|
if (unlikely(!driver->verify_access)) {
|
|
ret = -EPERM;
|
|
goto out_unref;
|
|
}
|
|
ret = driver->verify_access(bo);
|
|
if (unlikely(ret != 0))
|
|
goto out_unref;
|
|
|
|
vm_obj = cdev_pager_allocate(bo, OBJT_MGTDEVICE, &ttm_pager_ops,
|
|
size, nprot, 0, curthread->td_ucred);
|
|
if (vm_obj == NULL) {
|
|
ret = -EINVAL;
|
|
goto out_unref;
|
|
}
|
|
/*
|
|
* Note: We're transferring the bo reference to vm_obj->handle here.
|
|
*/
|
|
*offset = 0;
|
|
*obj_res = vm_obj;
|
|
return 0;
|
|
out_unref:
|
|
ttm_bo_unref(&bo);
|
|
return ret;
|
|
}
|
|
|
|
void
|
|
ttm_bo_release_mmap(struct ttm_buffer_object *bo)
|
|
{
|
|
vm_object_t vm_obj;
|
|
vm_page_t m;
|
|
int i;
|
|
|
|
vm_obj = cdev_pager_lookup(bo);
|
|
if (vm_obj == NULL)
|
|
return;
|
|
|
|
VM_OBJECT_WLOCK(vm_obj);
|
|
retry:
|
|
for (i = 0; i < bo->num_pages; i++) {
|
|
m = vm_page_lookup(vm_obj, i);
|
|
if (m == NULL)
|
|
continue;
|
|
if (vm_page_sleep_if_busy(m, "ttm_unm"))
|
|
goto retry;
|
|
cdev_pager_free_page(vm_obj, m);
|
|
}
|
|
VM_OBJECT_WUNLOCK(vm_obj);
|
|
|
|
vm_object_deallocate(vm_obj);
|
|
}
|
|
|
|
#if 0
|
|
int ttm_fbdev_mmap(struct vm_area_struct *vma, struct ttm_buffer_object *bo)
|
|
{
|
|
if (vma->vm_pgoff != 0)
|
|
return -EACCES;
|
|
|
|
vma->vm_ops = &ttm_bo_vm_ops;
|
|
vma->vm_private_data = ttm_bo_reference(bo);
|
|
vma->vm_flags |= VM_IO | VM_MIXEDMAP | VM_DONTEXPAND;
|
|
return 0;
|
|
}
|
|
|
|
ssize_t ttm_bo_io(struct ttm_bo_device *bdev, struct file *filp,
|
|
const char __user *wbuf, char __user *rbuf, size_t count,
|
|
loff_t *f_pos, bool write)
|
|
{
|
|
struct ttm_buffer_object *bo;
|
|
struct ttm_bo_driver *driver;
|
|
struct ttm_bo_kmap_obj map;
|
|
unsigned long dev_offset = (*f_pos >> PAGE_SHIFT);
|
|
unsigned long kmap_offset;
|
|
unsigned long kmap_end;
|
|
unsigned long kmap_num;
|
|
size_t io_size;
|
|
unsigned int page_offset;
|
|
char *virtual;
|
|
int ret;
|
|
bool no_wait = false;
|
|
bool dummy;
|
|
|
|
read_lock(&bdev->vm_lock);
|
|
bo = ttm_bo_vm_lookup_rb(bdev, dev_offset, 1);
|
|
if (likely(bo != NULL))
|
|
ttm_bo_reference(bo);
|
|
read_unlock(&bdev->vm_lock);
|
|
|
|
if (unlikely(bo == NULL))
|
|
return -EFAULT;
|
|
|
|
driver = bo->bdev->driver;
|
|
if (unlikely(!driver->verify_access)) {
|
|
ret = -EPERM;
|
|
goto out_unref;
|
|
}
|
|
|
|
ret = driver->verify_access(bo, filp);
|
|
if (unlikely(ret != 0))
|
|
goto out_unref;
|
|
|
|
kmap_offset = dev_offset - bo->vm_node->start;
|
|
if (unlikely(kmap_offset >= bo->num_pages)) {
|
|
ret = -EFBIG;
|
|
goto out_unref;
|
|
}
|
|
|
|
page_offset = *f_pos & ~PAGE_MASK;
|
|
io_size = bo->num_pages - kmap_offset;
|
|
io_size = (io_size << PAGE_SHIFT) - page_offset;
|
|
if (count < io_size)
|
|
io_size = count;
|
|
|
|
kmap_end = (*f_pos + count - 1) >> PAGE_SHIFT;
|
|
kmap_num = kmap_end - kmap_offset + 1;
|
|
|
|
ret = ttm_bo_reserve(bo, true, no_wait, false, 0);
|
|
|
|
switch (ret) {
|
|
case 0:
|
|
break;
|
|
case -EBUSY:
|
|
ret = -EAGAIN;
|
|
goto out_unref;
|
|
default:
|
|
goto out_unref;
|
|
}
|
|
|
|
ret = ttm_bo_kmap(bo, kmap_offset, kmap_num, &map);
|
|
if (unlikely(ret != 0)) {
|
|
ttm_bo_unreserve(bo);
|
|
goto out_unref;
|
|
}
|
|
|
|
virtual = ttm_kmap_obj_virtual(&map, &dummy);
|
|
virtual += page_offset;
|
|
|
|
if (write)
|
|
ret = copy_from_user(virtual, wbuf, io_size);
|
|
else
|
|
ret = copy_to_user(rbuf, virtual, io_size);
|
|
|
|
ttm_bo_kunmap(&map);
|
|
ttm_bo_unreserve(bo);
|
|
ttm_bo_unref(&bo);
|
|
|
|
if (unlikely(ret != 0))
|
|
return -EFBIG;
|
|
|
|
*f_pos += io_size;
|
|
|
|
return io_size;
|
|
out_unref:
|
|
ttm_bo_unref(&bo);
|
|
return ret;
|
|
}
|
|
|
|
ssize_t ttm_bo_fbdev_io(struct ttm_buffer_object *bo, const char __user *wbuf,
|
|
char __user *rbuf, size_t count, loff_t *f_pos,
|
|
bool write)
|
|
{
|
|
struct ttm_bo_kmap_obj map;
|
|
unsigned long kmap_offset;
|
|
unsigned long kmap_end;
|
|
unsigned long kmap_num;
|
|
size_t io_size;
|
|
unsigned int page_offset;
|
|
char *virtual;
|
|
int ret;
|
|
bool no_wait = false;
|
|
bool dummy;
|
|
|
|
kmap_offset = (*f_pos >> PAGE_SHIFT);
|
|
if (unlikely(kmap_offset >= bo->num_pages))
|
|
return -EFBIG;
|
|
|
|
page_offset = *f_pos & ~PAGE_MASK;
|
|
io_size = bo->num_pages - kmap_offset;
|
|
io_size = (io_size << PAGE_SHIFT) - page_offset;
|
|
if (count < io_size)
|
|
io_size = count;
|
|
|
|
kmap_end = (*f_pos + count - 1) >> PAGE_SHIFT;
|
|
kmap_num = kmap_end - kmap_offset + 1;
|
|
|
|
ret = ttm_bo_reserve(bo, true, no_wait, false, 0);
|
|
|
|
switch (ret) {
|
|
case 0:
|
|
break;
|
|
case -EBUSY:
|
|
return -EAGAIN;
|
|
default:
|
|
return ret;
|
|
}
|
|
|
|
ret = ttm_bo_kmap(bo, kmap_offset, kmap_num, &map);
|
|
if (unlikely(ret != 0)) {
|
|
ttm_bo_unreserve(bo);
|
|
return ret;
|
|
}
|
|
|
|
virtual = ttm_kmap_obj_virtual(&map, &dummy);
|
|
virtual += page_offset;
|
|
|
|
if (write)
|
|
ret = copy_from_user(virtual, wbuf, io_size);
|
|
else
|
|
ret = copy_to_user(rbuf, virtual, io_size);
|
|
|
|
ttm_bo_kunmap(&map);
|
|
ttm_bo_unreserve(bo);
|
|
ttm_bo_unref(&bo);
|
|
|
|
if (unlikely(ret != 0))
|
|
return ret;
|
|
|
|
*f_pos += io_size;
|
|
|
|
return io_size;
|
|
}
|
|
#endif
|