Merge all MD sf_buf allocators into one MI, residing in kern/subr_sfbuf.c

The MD allocators were very common, however there were some minor
differencies. These differencies were all consolidated in the MI allocator,
under ifdefs. The defines from machine/vmparam.h turn on features required
for a particular machine. For details look in the comment in sys/sf_buf.h.

As result no MD code left in sys/*/*/vm_machdep.c. Some arches still have
machine/sf_buf.h, which is usually quite small.

Tested by:	glebius (i386), tuexen (arm32), kevlo (arm32)
Reviewed by:	kib
Sponsored by:	Netflix
Sponsored by:	Nginx, Inc.
This commit is contained in:
Gleb Smirnoff 2014-08-05 09:44:10 +00:00
parent c5377460ea
commit c8d2ffd6a7
24 changed files with 476 additions and 975 deletions

View File

@ -29,42 +29,23 @@
#ifndef _MACHINE_SF_BUF_H_
#define _MACHINE_SF_BUF_H_
#include <vm/vm.h>
#include <vm/vm_param.h>
#include <vm/vm_page.h>
/*
* On this machine, the only purpose for which sf_buf is used is to implement
* an opaque pointer required by the machine-independent parts of the kernel.
* That pointer references the vm_page that is "mapped" by the sf_buf. The
* actual mapping is provided by the direct virtual-to-physical mapping.
*/
struct sf_buf;
static inline struct sf_buf *
sf_buf_alloc(struct vm_page *m, int pri)
{
return ((struct sf_buf *)m);
}
static inline void
sf_buf_free(struct sf_buf *sf)
{
}
static __inline vm_offset_t
static inline vm_offset_t
sf_buf_kva(struct sf_buf *sf)
{
return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS((vm_page_t)sf)));
}
static __inline vm_page_t
static inline vm_page_t
sf_buf_page(struct sf_buf *sf)
{
return ((vm_page_t)sf);
}
#endif /* !_MACHINE_SF_BUF_H_ */

View File

@ -50,7 +50,6 @@ __FBSDID("$FreeBSD$");
#include <sys/mbuf.h>
#include <sys/proc.h>
#include <sys/socketvar.h>
#include <sys/sf_buf.h>
#include <sys/syscall.h>
#include <sys/sysctl.h>
#include <sys/sysent.h>
@ -83,42 +82,6 @@ __FBSDID("$FreeBSD$");
CTASSERT(sizeof(struct switchframe) == 24);
CTASSERT(sizeof(struct trapframe) == 80);
#ifndef NSFBUFS
#define NSFBUFS (512 + maxusers * 16)
#endif
static int nsfbufs;
static int nsfbufspeak;
static int nsfbufsused;
SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
"Maximum number of sendfile(2) sf_bufs available");
SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
"Number of sendfile(2) sf_bufs at peak usage");
SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
"Number of sendfile(2) sf_bufs in use");
static void sf_buf_init(void *arg);
SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL);
LIST_HEAD(sf_head, sf_buf);
/*
* A hash table of active sendfile(2) buffers
*/
static struct sf_head *sf_buf_active;
static u_long sf_buf_hashmask;
#define SF_BUF_HASH(m) (((m) - vm_page_array) & sf_buf_hashmask)
static TAILQ_HEAD(, sf_buf) sf_buf_freelist;
static u_int sf_buf_alloc_want;
/*
* A lock used to synchronize access to the hash table and free list
*/
static struct mtx sf_buf_lock;
/*
* Finish a fork operation, with process p2 nearly set up.
* Copy and update the pcb, set up the stack so that the child
@ -184,106 +147,6 @@ cpu_thread_swapout(struct thread *td)
{
}
/*
* Detatch mapped page and release resources back to the system.
*/
void
sf_buf_free(struct sf_buf *sf)
{
mtx_lock(&sf_buf_lock);
sf->ref_count--;
if (sf->ref_count == 0) {
TAILQ_INSERT_TAIL(&sf_buf_freelist, sf, free_entry);
nsfbufsused--;
pmap_kremove(sf->kva);
sf->m = NULL;
LIST_REMOVE(sf, list_entry);
if (sf_buf_alloc_want > 0)
wakeup(&sf_buf_freelist);
}
mtx_unlock(&sf_buf_lock);
}
/*
* Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
*/
static void
sf_buf_init(void *arg)
{
struct sf_buf *sf_bufs;
vm_offset_t sf_base;
int i;
nsfbufs = NSFBUFS;
TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs);
sf_buf_active = hashinit(nsfbufs, M_TEMP, &sf_buf_hashmask);
TAILQ_INIT(&sf_buf_freelist);
sf_base = kva_alloc(nsfbufs * PAGE_SIZE);
sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
M_NOWAIT | M_ZERO);
for (i = 0; i < nsfbufs; i++) {
sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
TAILQ_INSERT_TAIL(&sf_buf_freelist, &sf_bufs[i], free_entry);
}
sf_buf_alloc_want = 0;
mtx_init(&sf_buf_lock, "sf_buf", NULL, MTX_DEF);
}
/*
* Get an sf_buf from the freelist. Will block if none are available.
*/
struct sf_buf *
sf_buf_alloc(struct vm_page *m, int flags)
{
struct sf_head *hash_list;
struct sf_buf *sf;
int error;
hash_list = &sf_buf_active[SF_BUF_HASH(m)];
mtx_lock(&sf_buf_lock);
LIST_FOREACH(sf, hash_list, list_entry) {
if (sf->m == m) {
sf->ref_count++;
if (sf->ref_count == 1) {
TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
nsfbufsused++;
nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
}
goto done;
}
}
while ((sf = TAILQ_FIRST(&sf_buf_freelist)) == NULL) {
if (flags & SFB_NOWAIT)
goto done;
sf_buf_alloc_want++;
SFSTAT_INC(sf_allocwait);
error = msleep(&sf_buf_freelist, &sf_buf_lock,
(flags & SFB_CATCH) ? PCATCH | PVM : PVM, "sfbufa", 0);
sf_buf_alloc_want--;
/*
* If we got a signal, don't risk going back to sleep.
*/
if (error)
goto done;
}
TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
if (sf->m != NULL)
LIST_REMOVE(sf, list_entry);
LIST_INSERT_HEAD(hash_list, sf, list_entry);
sf->ref_count = 1;
sf->m = m;
nsfbufsused++;
nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
pmap_kenter(sf->kva, VM_PAGE_TO_PHYS(sf->m));
done:
mtx_unlock(&sf_buf_lock);
return (sf);
}
void
cpu_set_syscall_retval(struct thread *td, int error)
{

View File

@ -29,33 +29,18 @@
#ifndef _MACHINE_SF_BUF_H_
#define _MACHINE_SF_BUF_H_
#include <sys/queue.h>
struct vm_page;
struct sf_buf {
LIST_ENTRY(sf_buf) list_entry; /* list of buffers */
TAILQ_ENTRY(sf_buf) free_entry; /* list of buffers */
struct vm_page *m; /* currently mapped page */
vm_offset_t kva; /* va of mapping */
int ref_count; /* usage of this mapping */
};
static __inline vm_offset_t
sf_buf_kva(struct sf_buf *sf)
static inline void
sf_buf_map(struct sf_buf *sf, int flags)
{
return (sf->kva);
pmap_kenter(sf->kva, VM_PAGE_TO_PHYS(sf->m));
}
static __inline struct vm_page *
sf_buf_page(struct sf_buf *sf)
static inline int
sf_buf_unmap(struct sf_buf *sf)
{
return (sf->m);
pmap_kremove(sf->kva);
return (1);
}
struct sf_buf * sf_buf_alloc(struct vm_page *m, int flags);
void sf_buf_free(struct sf_buf *sf);
#endif /* !_MACHINE_SF_BUF_H_ */

View File

@ -170,4 +170,7 @@ extern vm_offset_t vm_max_kernel_address;
#define VM_MAX_AUTOTUNE_MAXUSERS 384
#endif
#define SFBUF
#define SFBUF_MAP
#endif /* _MACHINE_VMPARAM_H_ */

View File

@ -77,6 +77,7 @@ font.h optional sc \
clean "font.h ${SC_DFLT_FONT}-8x14 ${SC_DFLT_FONT}-8x16 ${SC_DFLT_FONT}-8x8"
kern/subr_busdma_bufalloc.c standard
kern/subr_dummy_vdso_tc.c standard
kern/subr_sfbuf.c standard
libkern/arm/aeabi_unwind.c standard
libkern/arm/divsi3.S standard
libkern/arm/ffs.S standard

View File

@ -520,6 +520,7 @@ isa/vga_isa.c optional vga
kern/kern_clocksource.c standard
kern/imgact_aout.c optional compat_aout
kern/imgact_gzip.c optional gzip
kern/subr_sfbuf.c standard
libkern/divdi3.c standard
libkern/flsll.c standard
libkern/memmove.c standard

View File

@ -51,6 +51,7 @@ mips/mips/vm_machdep.c standard
kern/kern_clocksource.c standard
kern/link_elf_obj.c standard
kern/subr_dummy_vdso_tc.c standard
kern/subr_sfbuf.c optional mips | mipsel | mipsn32
# gcc/clang runtime
libkern/ffsl.c standard

View File

@ -205,6 +205,7 @@ i386/svr4/svr4_machdep.c optional compat_svr4
kern/kern_clocksource.c standard
kern/imgact_aout.c optional compat_aout
kern/imgact_gzip.c optional gzip
kern/subr_sfbuf.c standard
libkern/divdi3.c standard
libkern/flsll.c standard
libkern/memmove.c standard

View File

@ -71,6 +71,7 @@ dev/vt/hw/ofwfb/ofwfb.c optional vt aim
kern/kern_clocksource.c standard
kern/subr_dummy_vdso_tc.c standard
kern/syscalls.c optional ktr
kern/subr_sfbuf.c standard
libkern/ashldi3.c optional powerpc
libkern/ashrdi3.c optional powerpc
libkern/bcmp.c standard

View File

@ -64,6 +64,7 @@ dev/vt/hw/ofwfb/ofwfb.c optional vt
kern/kern_clocksource.c standard
kern/subr_dummy_vdso_tc.c standard
kern/syscalls.c optional ktr
kern/subr_sfbuf.c standard
libkern/ffs.c standard
libkern/ffsl.c standard
libkern/fls.c standard

View File

@ -118,38 +118,6 @@ static u_int cpu_reset_proxyid;
static volatile u_int cpu_reset_proxy_active;
#endif
static int nsfbufs;
static int nsfbufspeak;
static int nsfbufsused;
SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
"Maximum number of sendfile(2) sf_bufs available");
SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
"Number of sendfile(2) sf_bufs at peak usage");
SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
"Number of sendfile(2) sf_bufs in use");
static void sf_buf_init(void *arg);
SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL);
LIST_HEAD(sf_head, sf_buf);
/*
* A hash table of active sendfile(2) buffers
*/
static struct sf_head *sf_buf_active;
static u_long sf_buf_hashmask;
#define SF_BUF_HASH(m) (((m) - vm_page_array) & sf_buf_hashmask)
static TAILQ_HEAD(, sf_buf) sf_buf_freelist;
static u_int sf_buf_alloc_want;
/*
* A lock used to synchronize access to the hash table and free list
*/
static struct mtx sf_buf_lock;
extern int _ucodesel, _udatasel;
/*
@ -749,122 +717,13 @@ cpu_reset_real()
while(1);
}
/*
* Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
*/
static void
sf_buf_init(void *arg)
{
struct sf_buf *sf_bufs;
vm_offset_t sf_base;
int i;
nsfbufs = NSFBUFS;
TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs);
sf_buf_active = hashinit(nsfbufs, M_TEMP, &sf_buf_hashmask);
TAILQ_INIT(&sf_buf_freelist);
sf_base = kva_alloc(nsfbufs * PAGE_SIZE);
sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
M_NOWAIT | M_ZERO);
for (i = 0; i < nsfbufs; i++) {
sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
TAILQ_INSERT_TAIL(&sf_buf_freelist, &sf_bufs[i], free_entry);
}
sf_buf_alloc_want = 0;
mtx_init(&sf_buf_lock, "sf_buf", NULL, MTX_DEF);
}
/*
* Invalidate the cache lines that may belong to the page, if
* (possibly old) mapping of the page by sf buffer exists. Returns
* TRUE when mapping was found and cache invalidated.
*/
boolean_t
sf_buf_invalidate_cache(vm_page_t m)
{
struct sf_head *hash_list;
struct sf_buf *sf;
boolean_t ret;
hash_list = &sf_buf_active[SF_BUF_HASH(m)];
ret = FALSE;
mtx_lock(&sf_buf_lock);
LIST_FOREACH(sf, hash_list, list_entry) {
if (sf->m == m) {
/*
* Use pmap_qenter to update the pte for
* existing mapping, in particular, the PAT
* settings are recalculated.
*/
pmap_qenter(sf->kva, &m, 1);
pmap_invalidate_cache_range(sf->kva, sf->kva +
PAGE_SIZE);
ret = TRUE;
break;
}
}
mtx_unlock(&sf_buf_lock);
return (ret);
}
/*
* Get an sf_buf from the freelist. May block if none are available.
*/
struct sf_buf *
sf_buf_alloc(struct vm_page *m, int flags)
void
sf_buf_map(struct sf_buf *sf, int flags)
{
pt_entry_t opte, *ptep;
struct sf_head *hash_list;
struct sf_buf *sf;
#ifdef SMP
cpuset_t other_cpus;
u_int cpuid;
#endif
int error;
KASSERT(curthread->td_pinned > 0 || (flags & SFB_CPUPRIVATE) == 0,
("sf_buf_alloc(SFB_CPUPRIVATE): curthread not pinned"));
hash_list = &sf_buf_active[SF_BUF_HASH(m)];
mtx_lock(&sf_buf_lock);
LIST_FOREACH(sf, hash_list, list_entry) {
if (sf->m == m) {
sf->ref_count++;
if (sf->ref_count == 1) {
TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
nsfbufsused++;
nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
}
#ifdef SMP
goto shootdown;
#else
goto done;
#endif
}
}
while ((sf = TAILQ_FIRST(&sf_buf_freelist)) == NULL) {
if (flags & SFB_NOWAIT)
goto done;
sf_buf_alloc_want++;
SFSTAT_INC(sf_allocwait);
error = msleep(&sf_buf_freelist, &sf_buf_lock,
(flags & SFB_CATCH) ? PCATCH | PVM : PVM, "sfbufa", 0);
sf_buf_alloc_want--;
/*
* If we got a signal, don't risk going back to sleep.
*/
if (error)
goto done;
}
TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
if (sf->m != NULL)
LIST_REMOVE(sf, list_entry);
LIST_INSERT_HEAD(hash_list, sf, list_entry);
sf->ref_count = 1;
sf->m = m;
nsfbufsused++;
nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
/*
* Update the sf_buf's virtual-to-physical mapping, flushing the
@ -876,11 +735,11 @@ sf_buf_alloc(struct vm_page *m, int flags)
ptep = vtopte(sf->kva);
opte = *ptep;
#ifdef XEN
PT_SET_MA(sf->kva, xpmap_ptom(VM_PAGE_TO_PHYS(m)) | pgeflag
| PG_RW | PG_V | pmap_cache_bits(m->md.pat_mode, 0));
PT_SET_MA(sf->kva, xpmap_ptom(VM_PAGE_TO_PHYS(sf->m)) | pgeflag
| PG_RW | PG_V | pmap_cache_bits(sf->m->md.pat_mode, 0));
#else
*ptep = VM_PAGE_TO_PHYS(m) | pgeflag | PG_RW | PG_V |
pmap_cache_bits(m->md.pat_mode, 0);
*ptep = VM_PAGE_TO_PHYS(sf->m) | pgeflag | PG_RW | PG_V |
pmap_cache_bits(sf->m->md.pat_mode, 0);
#endif
/*
@ -892,7 +751,21 @@ sf_buf_alloc(struct vm_page *m, int flags)
#ifdef SMP
if ((opte & (PG_V | PG_A)) == (PG_V | PG_A))
CPU_ZERO(&sf->cpumask);
shootdown:
sf_buf_shootdown(sf, flags);
#else
if ((opte & (PG_V | PG_A)) == (PG_V | PG_A))
pmap_invalidate_page(kernel_pmap, sf->kva);
#endif
}
#ifdef SMP
void
sf_buf_shootdown(struct sf_buf *sf, int flags)
{
cpuset_t other_cpus;
u_int cpuid;
sched_pin();
cpuid = PCPU_GET(cpuid);
if (!CPU_ISSET(cpuid, &sf->cpumask)) {
@ -909,42 +782,50 @@ shootdown:
}
}
sched_unpin();
#else
if ((opte & (PG_V | PG_A)) == (PG_V | PG_A))
pmap_invalidate_page(kernel_pmap, sf->kva);
}
#endif
done:
mtx_unlock(&sf_buf_lock);
return (sf);
/*
* MD part of sf_buf_free().
*/
int
sf_buf_unmap(struct sf_buf *sf)
{
#ifdef XEN
/*
* Xen doesn't like having dangling R/W mappings
*/
pmap_qremove(sf->kva, 1);
return (1);
#else
return (0);
#endif
}
static void
sf_buf_invalidate(struct sf_buf *sf)
{
vm_page_t m = sf->m;
/*
* Use pmap_qenter to update the pte for
* existing mapping, in particular, the PAT
* settings are recalculated.
*/
pmap_qenter(sf->kva, &m, 1);
pmap_invalidate_cache_range(sf->kva, sf->kva + PAGE_SIZE);
}
/*
* Remove a reference from the given sf_buf, adding it to the free
* list when its reference count reaches zero. A freed sf_buf still,
* however, retains its virtual-to-physical mapping until it is
* recycled or reactivated by sf_buf_alloc(9).
* Invalidate the cache lines that may belong to the page, if
* (possibly old) mapping of the page by sf buffer exists. Returns
* TRUE when mapping was found and cache invalidated.
*/
void
sf_buf_free(struct sf_buf *sf)
boolean_t
sf_buf_invalidate_cache(vm_page_t m)
{
mtx_lock(&sf_buf_lock);
sf->ref_count--;
if (sf->ref_count == 0) {
TAILQ_INSERT_TAIL(&sf_buf_freelist, sf, free_entry);
nsfbufsused--;
#ifdef XEN
/*
* Xen doesn't like having dangling R/W mappings
*/
pmap_qremove(sf->kva, 1);
sf->m = NULL;
LIST_REMOVE(sf, list_entry);
#endif
if (sf_buf_alloc_want > 0)
wakeup(&sf_buf_freelist);
}
mtx_unlock(&sf_buf_lock);
return (sf_buf_process_page(m, sf_buf_invalidate));
}
/*

View File

@ -1,5 +1,5 @@
/*-
* Copyright (c) 2003, 2005 Alan L. Cox <alc@cs.rice.edu>
* Copyright (c) 2014 Gleb Smirnoff <glebius@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -29,39 +29,8 @@
#ifndef _MACHINE_SF_BUF_H_
#define _MACHINE_SF_BUF_H_
#include <sys/_cpuset.h>
#include <sys/queue.h>
struct vm_page;
struct sf_buf {
LIST_ENTRY(sf_buf) list_entry; /* list of buffers */
TAILQ_ENTRY(sf_buf) free_entry; /* list of buffers */
struct vm_page *m; /* currently mapped page */
vm_offset_t kva; /* va of mapping */
int ref_count; /* usage of this mapping */
#ifdef SMP
cpuset_t cpumask; /* cpus on which mapping is valid */
#endif
};
struct sf_buf * sf_buf_alloc(struct vm_page *m, int flags);
void sf_buf_free(struct sf_buf *sf);
static __inline vm_offset_t
sf_buf_kva(struct sf_buf *sf)
{
return (sf->kva);
}
static __inline struct vm_page *
sf_buf_page(struct sf_buf *sf)
{
return (sf->m);
}
boolean_t sf_buf_invalidate_cache(vm_page_t m);
void sf_buf_map(struct sf_buf *, int);
int sf_buf_unmap(struct sf_buf *);
boolean_t sf_buf_invalidate_cache(vm_page_t);
#endif /* !_MACHINE_SF_BUF_H_ */

View File

@ -198,4 +198,9 @@
#define VM_MAX_AUTOTUNE_MAXUSERS 384
#endif
#define SFBUF
#define SFBUF_MAP
#define SFBUF_CPUSET
#define SFBUF_PROCESS_PAGE
#endif /* _MACHINE_VMPARAM_H_ */

226
sys/kern/subr_sfbuf.c Normal file
View File

@ -0,0 +1,226 @@
/*-
* Copyright (c) 2014 Gleb Smirnoff <glebius@FreeBSD.org>
* Copyright (c) 2003, 2005 Alan L. Cox <alc@cs.rice.edu>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mutex.h>
#include <sys/sf_buf.h>
#include <sys/smp.h>
#include <sys/sysctl.h>
#include <vm/vm.h>
#include <vm/vm_extern.h>
#include <vm/vm_page.h>
#ifndef NSFBUFS
#define NSFBUFS (512 + maxusers * 16)
#endif
static int nsfbufs;
static int nsfbufspeak;
static int nsfbufsused;
SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
"Maximum number of sendfile(2) sf_bufs available");
SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
"Number of sendfile(2) sf_bufs at peak usage");
SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
"Number of sendfile(2) sf_bufs in use");
static void sf_buf_init(void *arg);
SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL);
LIST_HEAD(sf_head, sf_buf);
/*
* A hash table of active sendfile(2) buffers
*/
static struct sf_head *sf_buf_active;
static u_long sf_buf_hashmask;
#define SF_BUF_HASH(m) (((m) - vm_page_array) & sf_buf_hashmask)
static TAILQ_HEAD(, sf_buf) sf_buf_freelist;
static u_int sf_buf_alloc_want;
/*
* A lock used to synchronize access to the hash table and free list
*/
static struct mtx sf_buf_lock;
/*
* Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
*/
static void
sf_buf_init(void *arg)
{
struct sf_buf *sf_bufs;
vm_offset_t sf_base;
int i;
#ifdef SFBUF_OPTIONAL_DIRECT_MAP
if (SFBUF_OPTIONAL_DIRECT_MAP)
return;
#endif
nsfbufs = NSFBUFS;
TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs);
sf_buf_active = hashinit(nsfbufs, M_TEMP, &sf_buf_hashmask);
TAILQ_INIT(&sf_buf_freelist);
sf_base = kva_alloc(nsfbufs * PAGE_SIZE);
sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
M_NOWAIT | M_ZERO);
KASSERT(sf_bufs, ("%s: malloc failure", __func__));
for (i = 0; i < nsfbufs; i++) {
sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
TAILQ_INSERT_TAIL(&sf_buf_freelist, &sf_bufs[i], free_entry);
}
sf_buf_alloc_want = 0;
mtx_init(&sf_buf_lock, "sf_buf", NULL, MTX_DEF);
}
/*
* Get an sf_buf from the freelist. May block if none are available.
*/
struct sf_buf *
sf_buf_alloc(struct vm_page *m, int flags)
{
struct sf_head *hash_list;
struct sf_buf *sf;
int error;
#ifdef SFBUF_OPTIONAL_DIRECT_MAP
if (SFBUF_OPTIONAL_DIRECT_MAP)
return ((struct sf_buf *)m);
#endif
KASSERT(curthread->td_pinned > 0 || (flags & SFB_CPUPRIVATE) == 0,
("sf_buf_alloc(SFB_CPUPRIVATE): curthread not pinned"));
hash_list = &sf_buf_active[SF_BUF_HASH(m)];
mtx_lock(&sf_buf_lock);
LIST_FOREACH(sf, hash_list, list_entry) {
if (sf->m == m) {
sf->ref_count++;
if (sf->ref_count == 1) {
TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
nsfbufsused++;
nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
}
#if defined(SMP) && defined(SFBUF_CPUSET)
sf_buf_shootdown(sf, flags);
#endif
goto done;
}
}
while ((sf = TAILQ_FIRST(&sf_buf_freelist)) == NULL) {
if (flags & SFB_NOWAIT)
goto done;
sf_buf_alloc_want++;
SFSTAT_INC(sf_allocwait);
error = msleep(&sf_buf_freelist, &sf_buf_lock,
(flags & SFB_CATCH) ? PCATCH | PVM : PVM, "sfbufa", 0);
sf_buf_alloc_want--;
/*
* If we got a signal, don't risk going back to sleep.
*/
if (error)
goto done;
}
TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
if (sf->m != NULL)
LIST_REMOVE(sf, list_entry);
LIST_INSERT_HEAD(hash_list, sf, list_entry);
sf->ref_count = 1;
sf->m = m;
nsfbufsused++;
nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
sf_buf_map(sf, flags);
done:
mtx_unlock(&sf_buf_lock);
return (sf);
}
/*
* Remove a reference from the given sf_buf, adding it to the free
* list when its reference count reaches zero. A freed sf_buf still,
* however, retains its virtual-to-physical mapping until it is
* recycled or reactivated by sf_buf_alloc(9).
*/
void
sf_buf_free(struct sf_buf *sf)
{
#ifdef SFBUF_OPTIONAL_DIRECT_MAP
if (SFBUF_OPTIONAL_DIRECT_MAP)
return;
#endif
mtx_lock(&sf_buf_lock);
sf->ref_count--;
if (sf->ref_count == 0) {
TAILQ_INSERT_TAIL(&sf_buf_freelist, sf, free_entry);
nsfbufsused--;
if (sf_buf_unmap(sf)) {
sf->m = NULL;
LIST_REMOVE(sf, list_entry);
}
if (sf_buf_alloc_want > 0)
wakeup(&sf_buf_freelist);
}
mtx_unlock(&sf_buf_lock);
}
#ifdef SFBUF_PROCESS_PAGE
/*
* Run callback function on sf_buf that holds a certain page.
*/
boolean_t
sf_buf_process_page(vm_page_t m, void (*cb)(struct sf_buf *))
{
struct sf_head *hash_list;
struct sf_buf *sf;
hash_list = &sf_buf_active[SF_BUF_HASH(m)];
mtx_lock(&sf_buf_lock);
LIST_FOREACH(sf, hash_list, list_entry) {
if (sf->m == m) {
cb(sf);
mtx_unlock(&sf_buf_lock);
return (TRUE);
}
}
mtx_unlock(&sf_buf_lock);
return (FALSE);
}
#endif /* SFBUF_PROCESS_PAGE */

View File

@ -29,31 +29,9 @@
#ifndef _MACHINE_SF_BUF_H_
#define _MACHINE_SF_BUF_H_
#ifdef __mips_n64
#include <vm/vm.h>
#include <vm/vm_param.h>
#include <vm/vm_page.h>
#else
#include <sys/queue.h>
#endif
#ifdef __mips_n64 /* In 64 bit the whole memory is directly mapped */
#ifdef __mips_n64
/* In 64 bit the whole memory is directly mapped */
struct sf_buf;
static inline struct sf_buf *
sf_buf_alloc(struct vm_page *m, int pri)
{
return ((struct sf_buf *)m);
}
static inline void
sf_buf_free(struct sf_buf *sf)
{
}
static __inline vm_offset_t
static inline vm_offset_t
sf_buf_kva(struct sf_buf *sf)
{
vm_page_t m;
@ -62,38 +40,12 @@ sf_buf_kva(struct sf_buf *sf)
return (MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(m)));
}
static __inline struct vm_page *
static inline struct vm_page *
sf_buf_page(struct sf_buf *sf)
{
return ((vm_page_t)sf);
}
#else /* ! __mips_n64 */
struct vm_page;
struct sf_buf {
SLIST_ENTRY(sf_buf) free_list; /* list of free buffer slots */
struct vm_page *m; /* currently mapped page */
vm_offset_t kva; /* va of mapping */
};
struct sf_buf * sf_buf_alloc(struct vm_page *m, int flags);
void sf_buf_free(struct sf_buf *sf);
static __inline vm_offset_t
sf_buf_kva(struct sf_buf *sf)
{
return (sf->kva);
}
static __inline struct vm_page *
sf_buf_page(struct sf_buf *sf)
{
return (sf->m);
}
#endif /* __mips_n64 */
#endif /* !_MACHINE_SF_BUF_H_ */

View File

@ -187,4 +187,8 @@
#define ZERO_REGION_SIZE (64 * 1024) /* 64KB */
#ifndef __mips_n64
#define SFBUF
#endif
#endif /* !_MACHINE_VMPARAM_H_ */

View File

@ -76,9 +76,6 @@ __FBSDID("$FreeBSD$");
#include <sys/user.h>
#include <sys/mbuf.h>
#ifndef __mips_n64
#include <sys/sf_buf.h>
#endif
/* Duplicated from asm.h */
#if defined(__mips_o32)
@ -92,38 +89,6 @@ __FBSDID("$FreeBSD$");
#define CALLFRAME_SIZ (SZREG * 4)
#endif
#ifndef __mips_n64
#ifndef NSFBUFS
#define NSFBUFS (512 + maxusers * 16)
#endif
static int nsfbufs;
static int nsfbufspeak;
static int nsfbufsused;
SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
"Maximum number of sendfile(2) sf_bufs available");
SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
"Number of sendfile(2) sf_bufs at peak usage");
SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
"Number of sendfile(2) sf_bufs in use");
static void sf_buf_init(void *arg);
SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL);
/*
* Expanded sf_freelist head. Really an SLIST_HEAD() in disguise, with the
* sf_freelist head with the sf_lock mutex.
*/
static struct {
SLIST_HEAD(, sf_buf) sf_head;
struct mtx sf_lock;
} sf_freelist;
static u_int sf_buf_alloc_want;
#endif /* !__mips_n64 */
/*
* Finish a fork operation, with process p2 nearly set up.
* Copy and update the pcb, set up the stack so that the child
@ -512,84 +477,6 @@ cpu_set_upcall_kse(struct thread *td, void (*entry)(void *), void *arg,
#define ZIDLE_LO(v) ((v) * 2 / 3)
#define ZIDLE_HI(v) ((v) * 4 / 5)
/*
* Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
*/
#ifndef __mips_n64
static void
sf_buf_init(void *arg)
{
struct sf_buf *sf_bufs;
vm_offset_t sf_base;
int i;
nsfbufs = NSFBUFS;
TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs);
mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", NULL, MTX_DEF);
SLIST_INIT(&sf_freelist.sf_head);
sf_base = kva_alloc(nsfbufs * PAGE_SIZE);
sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
M_NOWAIT | M_ZERO);
for (i = 0; i < nsfbufs; i++) {
sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
SLIST_INSERT_HEAD(&sf_freelist.sf_head, &sf_bufs[i], free_list);
}
sf_buf_alloc_want = 0;
}
/*
* Get an sf_buf from the freelist. Will block if none are available.
*/
struct sf_buf *
sf_buf_alloc(struct vm_page *m, int flags)
{
struct sf_buf *sf;
int error;
mtx_lock(&sf_freelist.sf_lock);
while ((sf = SLIST_FIRST(&sf_freelist.sf_head)) == NULL) {
if (flags & SFB_NOWAIT)
break;
sf_buf_alloc_want++;
SFSTAT_INC(sf_allocwait);
error = msleep(&sf_freelist, &sf_freelist.sf_lock,
(flags & SFB_CATCH) ? PCATCH | PVM : PVM, "sfbufa", 0);
sf_buf_alloc_want--;
/*
* If we got a signal, don't risk going back to sleep.
*/
if (error)
break;
}
if (sf != NULL) {
SLIST_REMOVE_HEAD(&sf_freelist.sf_head, free_list);
sf->m = m;
nsfbufsused++;
nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
pmap_qenter(sf->kva, &sf->m, 1);
}
mtx_unlock(&sf_freelist.sf_lock);
return (sf);
}
/*
* Release resources back to the system.
*/
void
sf_buf_free(struct sf_buf *sf)
{
pmap_qremove(sf->kva, 1);
mtx_lock(&sf_freelist.sf_lock);
SLIST_INSERT_HEAD(&sf_freelist.sf_head, sf, free_list);
nsfbufsused--;
if (sf_buf_alloc_want > 0)
wakeup(&sf_freelist);
mtx_unlock(&sf_freelist.sf_lock);
}
#endif /* !__mips_n64 */
/*
* Software interrupt handler for queued VM system processing.
*/

View File

@ -1,80 +0,0 @@
/*-
* Copyright (c) 2003 Alan L. Cox <alc@cs.rice.edu>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _MACHINE_SF_BUF_H_
#define _MACHINE_SF_BUF_H_
#include <vm/vm.h>
#include <vm/vm_param.h>
#include <vm/vm_page.h>
#include <machine/md_var.h>
#include <sys/queue.h>
struct vm_page;
struct sf_buf {
LIST_ENTRY(sf_buf) list_entry; /* list of buffers */
TAILQ_ENTRY(sf_buf) free_entry; /* list of buffers */
struct vm_page *m; /* currently mapped page */
vm_offset_t kva; /* va of mapping */
int ref_count; /* usage of this mapping */
};
struct sf_buf * sf_buf_alloc(struct vm_page *m, int flags);
void sf_buf_free(struct sf_buf *sf);
/*
* On 32-bit OEA, the only purpose for which sf_buf is used is to implement
* an opaque pointer required by the machine-independent parts of the kernel.
* That pointer references the vm_page that is "mapped" by the sf_buf. The
* actual mapping is provided by the direct virtual-to-physical mapping.
*
* On OEA64 and Book-E, we need to do something a little more complicated. Use
* the runtime-detected hw_direct_map to pick between the two cases. Our
* friends in vm_machdep.c will do the same to ensure nothing gets confused.
*/
static __inline vm_offset_t
sf_buf_kva(struct sf_buf *sf)
{
if (hw_direct_map)
return (VM_PAGE_TO_PHYS((vm_page_t)sf));
return (sf->kva);
}
static __inline struct vm_page *
sf_buf_page(struct sf_buf *sf)
{
if (hw_direct_map)
return ((vm_page_t)sf);
return (sf->m);
}
#endif /* !_MACHINE_SF_BUF_H_ */

View File

@ -197,4 +197,18 @@ struct pmap_physseg {
#define ZERO_REGION_SIZE (64 * 1024) /* 64KB */
/*
* On 32-bit OEA, the only purpose for which sf_buf is used is to implement
* an opaque pointer required by the machine-independent parts of the kernel.
* That pointer references the vm_page that is "mapped" by the sf_buf. The
* actual mapping is provided by the direct virtual-to-physical mapping.
*
* On OEA64 and Book-E, we need to do something a little more complicated. Use
* the runtime-detected hw_direct_map to pick between the two cases. Our
* friends in vm_machdep.c will do the same to ensure nothing gets confused.
*/
#define SFBUF
#define SFBUF_NOMD
#define SFBUF_OPTIONAL_DIRECT_MAP hw_direct_map
#endif /* _MACHINE_VMPARAM_H_ */

View File

@ -80,7 +80,6 @@
#include <sys/vmmeter.h>
#include <sys/kernel.h>
#include <sys/mbuf.h>
#include <sys/sf_buf.h>
#include <sys/sysctl.h>
#include <sys/sysent.h>
#include <sys/unistd.h>
@ -100,47 +99,6 @@
#include <vm/vm_map.h>
#include <vm/vm_extern.h>
/*
* On systems without a direct mapped region (e.g. PPC64),
* we use the same code as the Book E implementation. Since
* we need to have runtime detection of this, define some machinery
* for sf_bufs in this case, and ignore it on systems with direct maps.
*/
#ifndef NSFBUFS
#define NSFBUFS (512 + maxusers * 16)
#endif
static int nsfbufs;
static int nsfbufspeak;
static int nsfbufsused;
SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
"Maximum number of sendfile(2) sf_bufs available");
SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
"Number of sendfile(2) sf_bufs at peak usage");
SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
"Number of sendfile(2) sf_bufs in use");
static void sf_buf_init(void *arg);
SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL);
LIST_HEAD(sf_head, sf_buf);
/* A hash table of active sendfile(2) buffers */
static struct sf_head *sf_buf_active;
static u_long sf_buf_hashmask;
#define SF_BUF_HASH(m) (((m) - vm_page_array) & sf_buf_hashmask)
static TAILQ_HEAD(, sf_buf) sf_buf_freelist;
static u_int sf_buf_alloc_want;
/*
* A lock used to synchronize access to the hash table and free list
*/
static struct mtx sf_buf_lock;
#ifdef __powerpc64__
extern uintptr_t tocbase;
#endif
@ -244,124 +202,6 @@ cpu_exit(struct thread *td)
}
/*
* Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
*/
static void
sf_buf_init(void *arg)
{
struct sf_buf *sf_bufs;
vm_offset_t sf_base;
int i;
/* Don't bother on systems with a direct map */
if (hw_direct_map)
return;
nsfbufs = NSFBUFS;
TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs);
sf_buf_active = hashinit(nsfbufs, M_TEMP, &sf_buf_hashmask);
TAILQ_INIT(&sf_buf_freelist);
sf_base = kva_alloc(nsfbufs * PAGE_SIZE);
sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
M_NOWAIT | M_ZERO);
for (i = 0; i < nsfbufs; i++) {
sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
TAILQ_INSERT_TAIL(&sf_buf_freelist, &sf_bufs[i], free_entry);
}
sf_buf_alloc_want = 0;
mtx_init(&sf_buf_lock, "sf_buf", NULL, MTX_DEF);
}
/*
* Get an sf_buf from the freelist. Will block if none are available.
*/
struct sf_buf *
sf_buf_alloc(struct vm_page *m, int flags)
{
struct sf_head *hash_list;
struct sf_buf *sf;
int error;
if (hw_direct_map) {
/* Shortcut the direct mapped case */
return ((struct sf_buf *)m);
}
hash_list = &sf_buf_active[SF_BUF_HASH(m)];
mtx_lock(&sf_buf_lock);
LIST_FOREACH(sf, hash_list, list_entry) {
if (sf->m == m) {
sf->ref_count++;
if (sf->ref_count == 1) {
TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
nsfbufsused++;
nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
}
goto done;
}
}
while ((sf = TAILQ_FIRST(&sf_buf_freelist)) == NULL) {
if (flags & SFB_NOWAIT)
goto done;
sf_buf_alloc_want++;
SFSTAT_INC(sf_allocwait);
error = msleep(&sf_buf_freelist, &sf_buf_lock,
(flags & SFB_CATCH) ? PCATCH | PVM : PVM, "sfbufa", 0);
sf_buf_alloc_want--;
/*
* If we got a signal, don't risk going back to sleep.
*/
if (error)
goto done;
}
TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
if (sf->m != NULL)
LIST_REMOVE(sf, list_entry);
LIST_INSERT_HEAD(hash_list, sf, list_entry);
sf->ref_count = 1;
sf->m = m;
nsfbufsused++;
nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
pmap_qenter(sf->kva, &sf->m, 1);
done:
mtx_unlock(&sf_buf_lock);
return (sf);
}
/*
* Detach mapped page and release resources back to the system.
*
* Remove a reference from the given sf_buf, adding it to the free
* list when its reference count reaches zero. A freed sf_buf still,
* however, retains its virtual-to-physical mapping until it is
* recycled or reactivated by sf_buf_alloc(9).
*/
void
sf_buf_free(struct sf_buf *sf)
{
if (hw_direct_map)
return;
mtx_lock(&sf_buf_lock);
sf->ref_count--;
if (sf->ref_count == 0) {
TAILQ_INSERT_TAIL(&sf_buf_freelist, sf, free_entry);
nsfbufsused--;
if (sf_buf_alloc_want > 0)
wakeup(&sf_buf_freelist);
}
mtx_unlock(&sf_buf_lock);
}
/*
* Software interrupt handler for queued VM system processing.
*/

View File

@ -1,59 +0,0 @@
/*-
* Copyright (c) 2003 Alan L. Cox <alc@cs.rice.edu>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _MACHINE_SF_BUF_H_
#define _MACHINE_SF_BUF_H_
#include <sys/queue.h>
struct vm_page;
struct sf_buf {
SLIST_ENTRY(sf_buf) free_list; /* list of free buffer slots */
struct vm_page *m; /* currently mapped page */
vm_offset_t kva; /* va of mapping */
};
struct sf_buf * sf_buf_alloc(struct vm_page *m, int flags);
void sf_buf_free(struct sf_buf *sf);
static __inline vm_offset_t
sf_buf_kva(struct sf_buf *sf)
{
return (sf->kva);
}
static __inline struct vm_page *
sf_buf_page(struct sf_buf *sf)
{
return (sf->m);
}
#endif /* !_MACHINE_SF_BUF_H_ */

View File

@ -239,4 +239,7 @@ extern vm_offset_t vm_max_kernel_address;
*/
#define ZERO_REGION_SIZE PAGE_SIZE
#define SFBUF
#define SFBUF_NOMD
#endif /* !_MACHINE_VMPARAM_H_ */

View File

@ -53,7 +53,6 @@ __FBSDID("$FreeBSD$");
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/sysent.h>
#include <sys/sf_buf.h>
#include <sys/sched.h>
#include <sys/sysctl.h>
#include <sys/unistd.h>
@ -84,35 +83,6 @@ __FBSDID("$FreeBSD$");
#include <machine/tlb.h>
#include <machine/tstate.h>
#ifndef NSFBUFS
#define NSFBUFS (512 + maxusers * 16)
#endif
static int nsfbufs;
static int nsfbufspeak;
static int nsfbufsused;
SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
"Maximum number of sendfile(2) sf_bufs available");
SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
"Number of sendfile(2) sf_bufs at peak usage");
SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
"Number of sendfile(2) sf_bufs in use");
static void sf_buf_init(void *arg);
SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL);
/*
* Expanded sf_freelist head. Really an SLIST_HEAD() in disguise, with the
* sf_freelist head with the sf_lock mutex.
*/
static struct {
SLIST_HEAD(, sf_buf) sf_head;
struct mtx sf_lock;
} sf_freelist;
static u_int sf_buf_alloc_want;
PMAP_STATS_VAR(uma_nsmall_alloc);
PMAP_STATS_VAR(uma_nsmall_alloc_oc);
PMAP_STATS_VAR(uma_nsmall_free);
@ -417,83 +387,6 @@ is_physical_memory(vm_paddr_t addr)
return (0);
}
/*
* Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
*/
static void
sf_buf_init(void *arg)
{
struct sf_buf *sf_bufs;
vm_offset_t sf_base;
int i;
nsfbufs = NSFBUFS;
TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs);
mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", NULL, MTX_DEF);
SLIST_INIT(&sf_freelist.sf_head);
sf_base = kva_alloc(nsfbufs * PAGE_SIZE);
sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
M_NOWAIT | M_ZERO);
for (i = 0; i < nsfbufs; i++) {
sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
SLIST_INSERT_HEAD(&sf_freelist.sf_head, &sf_bufs[i], free_list);
}
sf_buf_alloc_want = 0;
}
/*
* Get an sf_buf from the freelist. Will block if none are available.
*/
struct sf_buf *
sf_buf_alloc(struct vm_page *m, int flags)
{
struct sf_buf *sf;
int error;
mtx_lock(&sf_freelist.sf_lock);
while ((sf = SLIST_FIRST(&sf_freelist.sf_head)) == NULL) {
if (flags & SFB_NOWAIT)
break;
sf_buf_alloc_want++;
SFSTAT_INC(sf_allocwait);
error = msleep(&sf_freelist, &sf_freelist.sf_lock,
(flags & SFB_CATCH) ? PCATCH | PVM : PVM, "sfbufa", 0);
sf_buf_alloc_want--;
/*
* If we got a signal, don't risk going back to sleep.
*/
if (error)
break;
}
if (sf != NULL) {
SLIST_REMOVE_HEAD(&sf_freelist.sf_head, free_list);
sf->m = m;
nsfbufsused++;
nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
pmap_qenter(sf->kva, &sf->m, 1);
}
mtx_unlock(&sf_freelist.sf_lock);
return (sf);
}
/*
* Release resources back to the system.
*/
void
sf_buf_free(struct sf_buf *sf)
{
pmap_qremove(sf->kva, 1);
mtx_lock(&sf_freelist.sf_lock);
SLIST_INSERT_HEAD(&sf_freelist.sf_head, sf, free_list);
nsfbufsused--;
if (sf_buf_alloc_want > 0)
wakeup(&sf_freelist);
mtx_unlock(&sf_freelist.sf_lock);
}
void
swi_vm(void *v)
{

View File

@ -1,4 +1,5 @@
/*-
* Copyright (c) 2014 Gleb Smirnoff <glebius@FreeBSD.org>
* Copyright (c) 2003-2004 Alan L. Cox <alc@cs.rice.edu>
* All rights reserved.
*
@ -29,6 +30,146 @@
#ifndef _SYS_SF_BUF_H_
#define _SYS_SF_BUF_H_
struct sfstat { /* sendfile statistics */
uint64_t sf_iocnt; /* times sendfile had to do disk I/O */
uint64_t sf_allocfail; /* times sfbuf allocation failed */
uint64_t sf_allocwait; /* times sfbuf allocation had to wait */
};
#ifdef _KERNEL
#include <sys/types.h>
#include <sys/systm.h>
#include <sys/counter.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
#include <vm/vm_page.h>
/*
* Sf_bufs, or sendfile(2) buffers provide a vm_page that is mapped
* into kernel address space. Note, that they aren't used only
* by sendfile(2)!
*
* Sf_bufs could be implemented as a feature of vm_page_t, but that
* would require growth of the structure. That's why they are implemented
* as a separate hash indexed by vm_page address. Implementation lives in
* kern/subr_sfbuf.c. Meanwhile, most 64-bit machines have a physical map,
* so they don't require this hash at all, thus ignore subr_sfbuf.c.
*
* Different 32-bit architectures demand different requirements on sf_buf
* hash and functions. They request features in machine/vmparam.h, which
* enable parts of this file. They can also optionally provide helpers in
* machine/sf_buf.h
*
* Defines are:
* SFBUF This machine requires sf_buf hash.
* subr_sfbuf.c should be compiled.
* SFBUF_CPUSET This machine can perform SFB_CPUPRIVATE mappings,
* that do no invalidate cache on the rest of CPUs.
* SFBUF_NOMD This machine doesn't have machine/sf_buf.h
*
* SFBUF_OPTIONAL_DIRECT_MAP Value of this define is used as boolean
* variable that tells whether machine is
* capable of direct map or not at runtime.
* SFBUF_MAP This machine provides its own sf_buf_map() and
* sf_buf_unmap().
* SFBUF_PROCESS_PAGE This machine provides sf_buf_process_page()
* function.
*/
#ifdef SFBUF
#if defined(SMP) && defined(SFBUF_CPUSET)
#include <sys/_cpuset.h>
#endif
#include <sys/queue.h>
struct sf_buf {
LIST_ENTRY(sf_buf) list_entry; /* list of buffers */
TAILQ_ENTRY(sf_buf) free_entry; /* list of buffers */
vm_page_t m; /* currently mapped page */
vm_offset_t kva; /* va of mapping */
int ref_count; /* usage of this mapping */
#if defined(SMP) && defined(SFBUF_CPUSET)
cpuset_t cpumask; /* where mapping is valid */
#endif
};
#else /* ! SFBUF */
struct sf_buf;
#endif /* SFBUF */
#ifndef SFBUF_NOMD
#include <machine/sf_buf.h>
#endif
#ifdef SFBUF_OPTIONAL_DIRECT_MAP
#include <machine/md_var.h>
#endif
#ifdef SFBUF
struct sf_buf *sf_buf_alloc(struct vm_page *, int);
void sf_buf_free(struct sf_buf *);
static inline vm_offset_t
sf_buf_kva(struct sf_buf *sf)
{
#ifdef SFBUF_OPTIONAL_DIRECT_MAP
if (SFBUF_OPTIONAL_DIRECT_MAP)
return (VM_PAGE_TO_PHYS((vm_page_t)sf));
#endif
return (sf->kva);
}
static inline vm_page_t
sf_buf_page(struct sf_buf *sf)
{
#ifdef SFBUF_OPTIONAL_DIRECT_MAP
if (SFBUF_OPTIONAL_DIRECT_MAP)
return ((vm_page_t)sf);
#endif
return (sf->m);
}
#ifndef SFBUF_MAP
#include <vm/pmap.h>
static inline void
sf_buf_map(struct sf_buf *sf, int flags)
{
pmap_qenter(sf->kva, &sf->m, 1);
}
static inline int
sf_buf_unmap(struct sf_buf *sf)
{
return (0);
}
#endif /* SFBUF_MAP */
#if defined(SMP) && defined(SFBUF_CPUSET)
void sf_buf_shootdown(struct sf_buf *, int);
#endif
#ifdef SFBUF_PROCESS_PAGE
boolean_t sf_buf_process_page(vm_page_t, void (*)(struct sf_buf *));
#endif
#else /* ! SFBUF */
static inline struct sf_buf *
sf_buf_alloc(struct vm_page *m, int pri)
{
return ((struct sf_buf *)m);
}
static inline void
sf_buf_free(struct sf_buf *sf)
{
}
#endif /* SFBUF */
/*
* Options to sf_buf_alloc() are specified through its flags argument. This
* argument's value should be the result of a bitwise or'ing of one or more
@ -40,19 +181,6 @@
#define SFB_DEFAULT 0
#define SFB_NOWAIT 4 /* Return NULL if all bufs are used. */
struct vm_page;
struct sfstat { /* sendfile statistics */
uint64_t sf_iocnt; /* times sendfile had to do disk I/O */
uint64_t sf_allocfail; /* times sfbuf allocation failed */
uint64_t sf_allocwait; /* times sfbuf allocation had to wait */
};
#ifdef _KERNEL
#include <machine/sf_buf.h>
#include <sys/systm.h>
#include <sys/counter.h>
extern counter_u64_t sfstat[sizeof(struct sfstat) / sizeof(uint64_t)];
#define SFSTAT_ADD(name, val) \
counter_u64_add(sfstat[offsetof(struct sfstat, name) / sizeof(uint64_t)],\