Use sf_buf_alloc() instead of vm_map_find() on exec_map to create the

ephemeral mappings that are used as the source for three copy
operations from kernel space to user space.  There are two reasons for
making this change: (1) Under heavy load exec_map can fill up causing
vm_map_find() to fail.  When it fails, the nascent process is aborted
(SIGABRT).  Whereas, this reimplementation using sf_buf_alloc()
sleeps.  (2) Although it is possible to sleep on vm_map_find()'s
failure until address space becomes available (see kmem_alloc_wait()),
using sf_buf_alloc() is faster.  Furthermore, the reimplementation
uses a CPU private mapping, avoiding a TLB shootdown on
multiprocessors.

Problem uncovered by: kris@
Reviewed by: tegge@
MFC after: 3 weeks
This commit is contained in:
Alan Cox 2005-12-16 18:34:14 +00:00
parent 6ba9ec2d09
commit da61b9a69e
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=153485
3 changed files with 103 additions and 61 deletions

View File

@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$");
#include <sys/proc.h>
#include <sys/procfs.h>
#include <sys/resourcevar.h>
#include <sys/sf_buf.h>
#include <sys/systm.h>
#include <sys/signalvar.h>
#include <sys/stat.h>
@ -239,9 +240,9 @@ __elfN(map_partial)(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
vm_offset_t start, vm_offset_t end, vm_prot_t prot,
vm_prot_t max)
{
int error, rv;
struct sf_buf *sf;
int error;
vm_offset_t off;
vm_offset_t data_buf = 0;
/*
* Create the page if it doesn't exist yet. Ignore errors.
@ -255,25 +256,13 @@ __elfN(map_partial)(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
* Find the page from the underlying object.
*/
if (object) {
vm_object_reference(object);
rv = vm_map_find(exec_map,
object,
trunc_page(offset),
&data_buf,
PAGE_SIZE,
TRUE,
VM_PROT_READ,
VM_PROT_ALL,
MAP_COPY_ON_WRITE | MAP_PREFAULT_PARTIAL);
if (rv != KERN_SUCCESS) {
vm_object_deallocate(object);
return (rv);
}
sf = vm_imgact_map_page(object, offset);
if (sf == NULL)
return (KERN_FAILURE);
off = offset - trunc_page(offset);
error = copyout((caddr_t)data_buf + off, (caddr_t)start,
error = copyout((caddr_t)sf_buf_kva(sf) + off, (caddr_t)start,
end - start);
vm_map_remove(exec_map, data_buf, data_buf + PAGE_SIZE);
vm_imgact_unmap_page(sf);
if (error) {
return (KERN_FAILURE);
}
@ -287,7 +276,8 @@ __elfN(map_insert)(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
vm_offset_t start, vm_offset_t end, vm_prot_t prot,
vm_prot_t max, int cow)
{
vm_offset_t data_buf, off;
struct sf_buf *sf;
vm_offset_t off;
vm_size_t sz;
int error, rv;
@ -316,35 +306,23 @@ __elfN(map_insert)(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
FALSE, prot, max, 0);
if (rv)
return (rv);
data_buf = 0;
while (start < end) {
vm_object_reference(object);
rv = vm_map_find(exec_map,
object,
trunc_page(offset),
&data_buf,
2 * PAGE_SIZE,
TRUE,
VM_PROT_READ,
VM_PROT_ALL,
(MAP_COPY_ON_WRITE
| MAP_PREFAULT_PARTIAL));
if (rv != KERN_SUCCESS) {
vm_object_deallocate(object);
return (rv);
}
if (object == NULL)
return (KERN_SUCCESS);
for (; start < end; start += sz) {
sf = vm_imgact_map_page(object, offset);
if (sf == NULL)
return (KERN_FAILURE);
off = offset - trunc_page(offset);
sz = end - start;
if (sz > PAGE_SIZE)
sz = PAGE_SIZE;
error = copyout((caddr_t)data_buf + off,
if (sz > PAGE_SIZE - off)
sz = PAGE_SIZE - off;
error = copyout((caddr_t)sf_buf_kva(sf) + off,
(caddr_t)start, sz);
vm_map_remove(exec_map, data_buf,
data_buf + 2 * PAGE_SIZE);
vm_imgact_unmap_page(sf);
if (error) {
return (KERN_FAILURE);
}
start += sz;
offset += sz;
}
rv = KERN_SUCCESS;
} else {
@ -365,12 +343,12 @@ __elfN(load_section)(struct proc *p, struct vmspace *vmspace,
caddr_t vmaddr, size_t memsz, size_t filsz, vm_prot_t prot,
size_t pagesize)
{
struct sf_buf *sf;
size_t map_len;
vm_offset_t map_addr;
int error, rv, cow;
size_t copy_len;
vm_offset_t file_addr;
vm_offset_t data_buf = 0;
error = 0;
@ -455,27 +433,17 @@ __elfN(load_section)(struct proc *p, struct vmspace *vmspace,
if (copy_len != 0) {
vm_offset_t off;
vm_object_reference(object);
rv = vm_map_find(exec_map,
object,
trunc_page(offset + filsz),
&data_buf,
PAGE_SIZE,
TRUE,
VM_PROT_READ,
VM_PROT_ALL,
MAP_COPY_ON_WRITE | MAP_PREFAULT_PARTIAL);
if (rv != KERN_SUCCESS) {
vm_object_deallocate(object);
return (EINVAL);
}
sf = vm_imgact_map_page(object, offset + filsz);
if (sf == NULL)
return (EIO);
/* send the page fragment to user space */
off = trunc_page_ps(offset + filsz, pagesize) -
trunc_page(offset + filsz);
error = copyout((caddr_t)data_buf + off, (caddr_t)map_addr,
copy_len);
vm_map_remove(exec_map, data_buf, data_buf + PAGE_SIZE);
error = copyout((caddr_t)sf_buf_kva(sf) + off,
(caddr_t)map_addr, copy_len);
vm_imgact_unmap_page(sf);
if (error) {
return (error);
}

View File

@ -86,6 +86,8 @@ void vsunlock(void *, size_t);
void vm_object_print(/* db_expr_t */ long, boolean_t, /* db_expr_t */ long,
char *);
int vm_fault_quick(caddr_t v, int prot);
struct sf_buf *vm_imgact_map_page(vm_object_t object, vm_ooffset_t offset);
void vm_imgact_unmap_page(struct sf_buf *sf);
void vm_thread_dispose(struct thread *td);
void vm_thread_dispose_altkstack(struct thread *td);
void vm_thread_new(struct thread *td, int pages);

View File

@ -70,6 +70,8 @@ __FBSDID("$FreeBSD$");
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/resourcevar.h>
#include <sys/sched.h>
#include <sys/sf_buf.h>
#include <sys/shm.h>
#include <sys/vmmeter.h>
#include <sys/sx.h>
@ -239,6 +241,76 @@ vsunlock(void *addr, size_t len)
VM_MAP_WIRE_SYSTEM | VM_MAP_WIRE_NOHOLES);
}
/*
* Pin the page contained within the given object at the given offset. If the
* page is not resident, allocate and load it using the given object's pager.
* Return the pinned page if successful; otherwise, return NULL.
*/
static vm_page_t
vm_imgact_hold_page(vm_object_t object, vm_ooffset_t offset)
{
vm_page_t m, ma[1];
vm_pindex_t pindex;
int rv;
VM_OBJECT_LOCK(object);
pindex = OFF_TO_IDX(offset);
m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
if ((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL) {
ma[0] = m;
rv = vm_pager_get_pages(object, ma, 1, 0);
m = vm_page_lookup(object, pindex);
if (m == NULL)
goto out;
if (m->valid == 0 || rv != VM_PAGER_OK) {
vm_page_lock_queues();
vm_page_free(m);
vm_page_unlock_queues();
m = NULL;
goto out;
}
}
vm_page_lock_queues();
vm_page_hold(m);
vm_page_wakeup(m);
vm_page_unlock_queues();
out:
VM_OBJECT_UNLOCK(object);
return (m);
}
/*
* Return a CPU private mapping to the page at the given offset within the
* given object. The page is pinned before it is mapped.
*/
struct sf_buf *
vm_imgact_map_page(vm_object_t object, vm_ooffset_t offset)
{
vm_page_t m;
m = vm_imgact_hold_page(object, offset);
if (m == NULL)
return (NULL);
sched_pin();
return (sf_buf_alloc(m, SFB_CPUPRIVATE));
}
/*
* Destroy the given CPU private mapping and unpin the page that it mapped.
*/
void
vm_imgact_unmap_page(struct sf_buf *sf)
{
vm_page_t m;
m = sf_buf_page(sf);
sf_buf_free(sf);
sched_unpin();
vm_page_lock_queues();
vm_page_unhold(m);
vm_page_unlock_queues();
}
#ifndef KSTACK_MAX_PAGES
#define KSTACK_MAX_PAGES 32
#endif