o) Use the direct map where possible for uiomove_fromphys, based on code from

sparc64.
o) Use uiomove_fromphys rather than the broken fpage mechanism for /dev/mem.
o) Update sf_buf allocator to not share buffers and to do a pmap_qremove when
   done with an sf_buf so as to better track valid mappings.
This commit is contained in:
Juli Mallett 2010-04-16 23:48:28 +00:00
parent d0985cfb41
commit 11484eb34f
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=206714
4 changed files with 150 additions and 212 deletions

View File

@ -1,5 +1,5 @@
/*-
* Copyright (c) 2003, 2005 Alan L. Cox <alc@cs.rice.edu>
* Copyright (c) 2003 Alan L. Cox <alc@cs.rice.edu>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -23,29 +23,20 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* from: src/sys/i386/include/sf_buf.h,v 1.4 2005/02/13 06:23:13 alc
* $FreeBSD$
*/
#ifndef _MACHINE_SF_BUF_H_
#define _MACHINE_SF_BUF_H_
#define _MACHINE_SF_BUF_H_
#include <sys/queue.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
#include <vm/vm_page.h>
struct vm_page;
struct sf_buf {
LIST_ENTRY(sf_buf) list_entry; /* list of buffers */
TAILQ_ENTRY(sf_buf) free_entry; /* list of buffers */
SLIST_ENTRY(sf_buf) free_list; /* list of free buffer slots */
struct vm_page *m; /* currently mapped page */
vm_offset_t kva; /* va of mapping */
int ref_count; /* usage of this mapping */
#ifdef SMP
cpumask_t cpumask; /* cpus on which mapping is valid */
#endif
};
static __inline vm_offset_t

View File

@ -1,13 +1,12 @@
/* $OpenBSD: mem.c,v 1.2 1998/08/31 17:42:34 millert Exp $ */
/* $NetBSD: mem.c,v 1.6 1995/04/10 11:55:03 mycroft Exp $ */
/*
/*-
* Copyright (c) 1988 University of Utah.
* Copyright (c) 1982, 1986, 1990, 1993
* The Regents of the University of California. All rights reserved.
* Copyright (c) 1982, 1986, 1990 The Regents of the University of California.
* All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* the Systems Programming Group of the University of Utah Computer
* Science Department and Ralph Campbell.
* Science Department, and code derived from software contributed to
* Berkeley by William Jolitz.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@ -33,161 +32,136 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)mem.c 8.3 (Berkeley) 1/12/94
* JNPR: mem.c,v 1.3 2007/08/09 11:23:32 katta Exp $
*/
/*
* Memory special file
* from: Utah $Hdr: mem.c 1.13 89/10/08$
* from: @(#)mem.c 7.2 (Berkeley) 5/9/91
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
/*
* Memory special file
*/
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/conf.h>
#include <sys/fcntl.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/memrange.h>
#include <sys/module.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/signalvar.h>
#include <vm/vm.h>
#include <vm/vm_extern.h>
#include <vm/pmap.h>
#include <vm/vm_map.h>
#include <sys/user.h>
#include <sys/msgbuf.h>
#include <sys/systm.h>
#include <sys/systm.h>
#include <sys/buf.h>
#include <sys/signalvar.h>
#include <sys/uio.h>
#include <sys/sched.h>
#include <sys/malloc.h>
#include <machine/pte.h>
#include <machine/cpu.h>
#include <machine/md_var.h>
#include <machine/atomic.h>
#include <machine/vmparam.h>
#include <vm/vm.h>
#include <vm/pmap.h>
#include <vm/vm_extern.h>
#include <vm/vm_page.h>
#include <machine/memdev.h>
struct mem_range_softc mem_range_softc;
extern struct sysmaps sysmaps_pcpu[];
/*ARGSUSED*/
/* ARGSUSED */
int
memrw(dev, uio, flags)
struct cdev *dev;
struct uio *uio;
int flags;
memrw(struct cdev *dev, struct uio *uio, int flags)
{
register vm_offset_t v;
register int c;
register struct iovec *iov;
struct iovec *iov;
int error = 0;
vm_offset_t va, eva, off, v;
vm_prot_t prot;
struct vm_page m;
vm_page_t marr;
vm_size_t cnt;
while (uio->uio_resid > 0 && error == 0) {
cnt = 0;
error = 0;
GIANT_REQUIRED;
while (uio->uio_resid > 0 && !error) {
iov = uio->uio_iov;
if (iov->iov_len == 0) {
uio->uio_iov++;
uio->uio_iovcnt--;
if (uio->uio_iovcnt < 0)
panic("mmrw");
panic("memrw");
continue;
}
/* minor device 0 is physical memory */
if (dev2unit(dev) == CDEV_MINOR_MEM) {
v = uio->uio_offset;
c = iov->iov_len;
vm_offset_t va;
vm_paddr_t pa;
register int o;
off = uio->uio_offset & PAGE_MASK;
cnt = PAGE_SIZE - ((vm_offset_t)iov->iov_base &
PAGE_MASK);
cnt = min(cnt, PAGE_SIZE - off);
cnt = min(cnt, iov->iov_len);
if (is_cacheable_mem(v) &&
is_cacheable_mem(v + c - 1)) {
struct fpage *fp;
struct sysmaps *sysmaps;
sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
mtx_lock(&sysmaps->lock);
sched_pin();
fp = &sysmaps->fp[PMAP_FPAGE1];
pa = uio->uio_offset & ~PAGE_MASK;
va = pmap_map_fpage(pa, fp, FALSE);
o = (int)uio->uio_offset & PAGE_MASK;
c = (u_int)(PAGE_SIZE -
((uintptr_t)iov->iov_base & PAGE_MASK));
c = min(c, (u_int)(PAGE_SIZE - o));
c = min(c, (u_int)iov->iov_len);
error = uiomove((caddr_t)(va + o), (int)c, uio);
pmap_unmap_fpage(pa, fp);
sched_unpin();
mtx_unlock(&sysmaps->lock);
} else
return (EFAULT);
continue;
m.phys_addr = trunc_page(v);
marr = &m;
error = uiomove_fromphys(&marr, off, cnt, uio);
}
/* minor device 1 is kernel memory */
else if (dev2unit(dev) == CDEV_MINOR_KMEM) {
v = uio->uio_offset;
c = min(iov->iov_len, MAXPHYS);
va = uio->uio_offset;
vm_offset_t addr, eaddr;
vm_offset_t wired_tlb_virtmem_end;
va = trunc_page(uio->uio_offset);
eva = round_page(uio->uio_offset
+ iov->iov_len);
/*
* Make sure that all of the pages are currently
* resident so that we don't create any zero-fill pages.
/*
* Make sure that all the pages are currently resident
* so that we don't create any zero-fill pages.
*/
addr = trunc_page(uio->uio_offset);
eaddr = round_page(uio->uio_offset + c);
if (addr > (vm_offset_t) VM_MIN_KERNEL_ADDRESS) {
wired_tlb_virtmem_end = VM_MIN_KERNEL_ADDRESS +
VM_KERNEL_ALLOC_OFFSET;
if ((addr < wired_tlb_virtmem_end) &&
(eaddr >= wired_tlb_virtmem_end))
addr = wired_tlb_virtmem_end;
if (addr >= wired_tlb_virtmem_end) {
for (; addr < eaddr; addr += PAGE_SIZE)
if (pmap_extract(kernel_pmap,
addr) == 0)
return EFAULT;
if (!kernacc(
(caddr_t)(uintptr_t)uio->uio_offset, c,
uio->uio_rw == UIO_READ ?
VM_PROT_READ : VM_PROT_WRITE))
return (EFAULT);
}
}
else if (MIPS_IS_KSEG0_ADDR(v)) {
if (MIPS_KSEG0_TO_PHYS(v + c) >= ctob(physmem))
for (; va < eva; va += PAGE_SIZE)
if (pmap_extract(kernel_pmap, va) == 0)
return (EFAULT);
}
else if (MIPS_IS_KSEG1_ADDR(v)) {
if (MIPS_KSEG1_TO_PHYS(v + c) >= ctob(physmem))
return (EFAULT);
}
else
prot = (uio->uio_rw == UIO_READ)
? VM_PROT_READ : VM_PROT_WRITE;
va = uio->uio_offset;
if (kernacc((void *) va, iov->iov_len, prot)
== FALSE)
return (EFAULT);
error = uiomove((void *)va, iov->iov_len, uio);
error = uiomove((caddr_t)v, c, uio);
continue;
}
}
return (error);
}
/*ARGSUSED*/
/*
* allow user processes to MMAP some memory sections
* instead of going through read/write
*/
int
memmmap(struct cdev *dev, vm_ooffset_t off, vm_paddr_t *paddr,
memmmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr,
int prot, vm_memattr_t *memattr)
{
/*
* /dev/mem is the only one that makes sense through this
* interface. For /dev/kmem any physaddr we return here
* could be transient and hence incorrect or invalid at
* a later time.
*/
if (dev2unit(dev) != CDEV_MINOR_MEM)
return (-1);
return (EOPNOTSUPP);
*paddr = offset;
return (0);
}
void

View File

@ -32,8 +32,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)kern_subr.c 8.3 (Berkeley) 1/21/94
* from: src/sys/i386/i386/uio_machdep.c,v 1.8 2005/02/13 23:09:36 alc
* @(#)kern_subr.c 8.3 (Berkeley) 1/21/94
*/
#include <sys/cdefs.h>
@ -44,17 +43,18 @@ __FBSDID("$FreeBSD$");
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/sched.h>
#include <sys/sf_buf.h>
#include <sys/systm.h>
#include <sys/uio.h>
#include <vm/vm.h>
#include <vm/vm_page.h>
#include <vm/vm_param.h>
/*
* Implement uiomove(9) from physical memory using sf_bufs to reduce
* the creation and destruction of ephemeral mappings.
* Implement uiomove(9) from physical memory using a combination
* of the direct mapping and sf_bufs to reduce the creation and
* destruction of ephemeral mappings.
*/
int
uiomove_fromphys(vm_page_t ma[], vm_offset_t offset, int n, struct uio *uio)
@ -64,6 +64,8 @@ uiomove_fromphys(vm_page_t ma[], vm_offset_t offset, int n, struct uio *uio)
struct iovec *iov;
void *cp;
vm_offset_t page_offset;
vm_paddr_t pa;
vm_page_t m;
size_t cnt;
int error = 0;
int save = 0;
@ -85,10 +87,16 @@ uiomove_fromphys(vm_page_t ma[], vm_offset_t offset, int n, struct uio *uio)
if (cnt > n)
cnt = n;
page_offset = offset & PAGE_MASK;
cnt = min(cnt, PAGE_SIZE - page_offset);
sched_pin();
sf = sf_buf_alloc(ma[offset >> PAGE_SHIFT], SFB_CPUPRIVATE);
cp = (char *)sf_buf_kva(sf) + page_offset;
cnt = ulmin(cnt, PAGE_SIZE - page_offset);
m = ma[offset >> PAGE_SHIFT];
pa = VM_PAGE_TO_PHYS(m);
if (pa < MIPS_KSEG0_LARGEST_PHYS) {
cp = (char *)MIPS_PHYS_TO_KSEG0(pa);
sf = NULL;
} else {
sf = sf_buf_alloc(m, 0);
cp = (char *)sf_buf_kva(sf) + page_offset;
}
switch (uio->uio_segflg) {
case UIO_USERSPACE:
if (ticks - PCPU_GET(switchticks) >= hogticks)
@ -98,8 +106,8 @@ uiomove_fromphys(vm_page_t ma[], vm_offset_t offset, int n, struct uio *uio)
else
error = copyin(iov->iov_base, cp, cnt);
if (error) {
sf_buf_free(sf);
sched_unpin();
if (sf != NULL)
sf_buf_free(sf);
goto out;
}
break;
@ -112,8 +120,8 @@ uiomove_fromphys(vm_page_t ma[], vm_offset_t offset, int n, struct uio *uio)
case UIO_NOCOPY:
break;
}
sf_buf_free(sf);
sched_unpin();
if (sf != NULL)
sf_buf_free(sf);
iov->iov_base = (char *)iov->iov_base + cnt;
iov->iov_len -= cnt;
uio->uio_resid -= cnt;

View File

@ -56,6 +56,7 @@ __FBSDID("$FreeBSD$");
#include <sys/sysctl.h>
#include <sys/unistd.h>
#include <machine/asm.h>
#include <machine/cache.h>
#include <machine/clock.h>
#include <machine/cpu.h>
@ -63,12 +64,15 @@ __FBSDID("$FreeBSD$");
#include <machine/pcb.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
#include <sys/lock.h>
#include <vm/vm_kern.h>
#include <vm/vm_page.h>
#include <vm/vm_map.h>
#include <vm/vm_extern.h>
#include <vm/pmap.h>
#include <vm/vm_kern.h>
#include <vm/vm_map.h>
#include <vm/vm_page.h>
#include <vm/vm_pageout.h>
#include <vm/vm_param.h>
#include <vm/uma.h>
#include <vm/uma_int.h>
#include <sys/user.h>
#include <sys/mbuf.h>
@ -81,25 +85,17 @@ __FBSDID("$FreeBSD$");
static void sf_buf_init(void *arg);
SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL);
LIST_HEAD(sf_head, sf_buf);
/*
* A hash table of active sendfile(2) buffers
* Expanded sf_freelist head. Really an SLIST_HEAD() in disguise, with the
* sf_freelist head with the sf_lock mutex.
*/
static struct sf_head *sf_buf_active;
static u_long sf_buf_hashmask;
static struct {
SLIST_HEAD(, sf_buf) sf_head;
struct mtx sf_lock;
} sf_freelist;
#define SF_BUF_HASH(m) (((m) - vm_page_array) & sf_buf_hashmask)
static TAILQ_HEAD(, sf_buf) sf_buf_freelist;
static u_int sf_buf_alloc_want;
/*
* A lock used to synchronize access to the hash table and free list
*/
static struct mtx sf_buf_lock;
/*
* Finish a fork operation, with process p2 nearly set up.
* Copy and update the pcb, set up the stack so that the child
@ -471,56 +467,34 @@ sf_buf_init(void *arg)
nsfbufs = NSFBUFS;
TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs);
sf_buf_active = hashinit(nsfbufs, M_TEMP, &sf_buf_hashmask);
TAILQ_INIT(&sf_buf_freelist);
mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", NULL, MTX_DEF);
SLIST_INIT(&sf_freelist.sf_head);
sf_base = kmem_alloc_nofault(kernel_map, nsfbufs * PAGE_SIZE);
sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
M_NOWAIT | M_ZERO);
for (i = 0; i < nsfbufs; i++) {
sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
TAILQ_INSERT_TAIL(&sf_buf_freelist, &sf_bufs[i], free_entry);
SLIST_INSERT_HEAD(&sf_freelist.sf_head, &sf_bufs[i], free_list);
}
sf_buf_alloc_want = 0;
mtx_init(&sf_buf_lock, "sf_buf", NULL, MTX_DEF);
}
/*
* Allocate an sf_buf for the given vm_page. On this machine, however, there
* is no sf_buf object. Instead, an opaque pointer to the given vm_page is
* returned.
* Get an sf_buf from the freelist. Will block if none are available.
*/
struct sf_buf *
sf_buf_alloc(struct vm_page *m, int flags)
{
struct sf_head *hash_list;
struct sf_buf *sf;
int error;
hash_list = &sf_buf_active[SF_BUF_HASH(m)];
mtx_lock(&sf_buf_lock);
LIST_FOREACH(sf, hash_list, list_entry) {
if (sf->m == m) {
sf->ref_count++;
if (sf->ref_count == 1) {
TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
nsfbufsused++;
nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
}
/*
* Flush all mappings in order to have up to date
* physycal memory
*/
pmap_flush_pvcache(sf->m);
mips_dcache_inv_range(sf->kva, PAGE_SIZE);
goto done;
}
}
while ((sf = TAILQ_FIRST(&sf_buf_freelist)) == NULL) {
mtx_lock(&sf_freelist.sf_lock);
while ((sf = SLIST_FIRST(&sf_freelist.sf_head)) == NULL) {
if (flags & SFB_NOWAIT)
goto done;
break;
sf_buf_alloc_want++;
mbstat.sf_allocwait++;
error = msleep(&sf_buf_freelist, &sf_buf_lock,
error = msleep(&sf_freelist, &sf_freelist.sf_lock,
(flags & SFB_CATCH) ? PCATCH | PVM : PVM, "sfbufa", 0);
sf_buf_alloc_want--;
@ -528,42 +502,33 @@ sf_buf_alloc(struct vm_page *m, int flags)
* If we got a signal, don't risk going back to sleep.
*/
if (error)
goto done;
break;
}
TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
if (sf->m != NULL)
LIST_REMOVE(sf, list_entry);
LIST_INSERT_HEAD(hash_list, sf, list_entry);
sf->ref_count = 1;
sf->m = m;
nsfbufsused++;
nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
pmap_qenter(sf->kva, &sf->m, 1);
done:
mtx_unlock(&sf_buf_lock);
if (sf != NULL) {
SLIST_REMOVE_HEAD(&sf_freelist.sf_head, free_list);
sf->m = m;
nsfbufsused++;
nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
pmap_qenter(sf->kva, &sf->m, 1);
}
mtx_unlock(&sf_freelist.sf_lock);
return (sf);
}
/*
* Free the sf_buf. In fact, do nothing because there are no resources
* associated with the sf_buf.
* Release resources back to the system.
*/
void
sf_buf_free(struct sf_buf *sf)
{
mtx_lock(&sf_buf_lock);
sf->ref_count--;
/*
* Make sure all changes in KVA end up in physical memory
*/
mips_dcache_wbinv_range(sf->kva, PAGE_SIZE);
if (sf->ref_count == 0) {
TAILQ_INSERT_TAIL(&sf_buf_freelist, sf, free_entry);
nsfbufsused--;
if (sf_buf_alloc_want > 0)
wakeup_one(&sf_buf_freelist);
}
mtx_unlock(&sf_buf_lock);
pmap_qremove(sf->kva, 1);
mtx_lock(&sf_freelist.sf_lock);
SLIST_INSERT_HEAD(&sf_freelist.sf_head, sf, free_list);
nsfbufsused--;
if (sf_buf_alloc_want > 0)
wakeup_one(&sf_freelist);
mtx_unlock(&sf_freelist.sf_lock);
}
/*