From 411d10a600b6d3bf43415ba408b7e221f8688bc2 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Fri, 29 Aug 2003 20:04:10 +0000 Subject: [PATCH] Migrate the sf_buf allocator that is used by sendfile(2) and zero-copy sockets into machine-dependent files. The rationale for this migration is illustrated by the modified amd64 allocator. It uses the amd64's direct map to avoid emphemeral mappings in the kernel's address space. On an SMP, the emphemeral mappings result in an IPI for TLB shootdown for each transmitted page. Yuck. Maintainers of other 64-bit platforms with direct maps should be able to use the amd64 allocator as a reference implementation. --- sys/alpha/alpha/vm_machdep.c | 101 ++++++++++++++++++++++++++++++ sys/amd64/amd64/vm_machdep.c | 95 ++++++++++++++++++++++++++++ sys/i386/i386/vm_machdep.c | 101 ++++++++++++++++++++++++++++++ sys/ia64/ia64/vm_machdep.c | 101 ++++++++++++++++++++++++++++++ sys/kern/uipc_syscalls.c | 99 ----------------------------- sys/powerpc/aim/vm_machdep.c | 101 ++++++++++++++++++++++++++++++ sys/powerpc/powerpc/vm_machdep.c | 101 ++++++++++++++++++++++++++++++ sys/sparc64/sparc64/vm_machdep.c | 104 +++++++++++++++++++++++++++++++ 8 files changed, 704 insertions(+), 99 deletions(-) diff --git a/sys/alpha/alpha/vm_machdep.c b/sys/alpha/alpha/vm_machdep.c index 9139dc7d694b..05b9728b4b79 100644 --- a/sys/alpha/alpha/vm_machdep.c +++ b/sys/alpha/alpha/vm_machdep.c @@ -82,6 +82,8 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include +#include #include #include @@ -101,6 +103,20 @@ __FBSDID("$FreeBSD$"); #include +static void sf_buf_init(void *arg); +SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL) + +/* + * Expanded sf_freelist head. Really an SLIST_HEAD() in disguise, with the + * sf_freelist head with the sf_lock mutex. + */ +static struct { + SLIST_HEAD(, sf_buf) sf_head; + struct mtx sf_lock; +} sf_freelist; + +static u_int sf_buf_alloc_want; + /* * Finish a fork operation, with process p2 nearly set up. * Copy and update the pcb, set up the stack so that the child @@ -369,6 +385,91 @@ cpu_reset() prom_halt(0); } +/* + * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-)) + */ +static void +sf_buf_init(void *arg) +{ + struct sf_buf *sf_bufs; + vm_offset_t sf_base; + int i; + + mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", NULL, MTX_DEF); + mtx_lock(&sf_freelist.sf_lock); + SLIST_INIT(&sf_freelist.sf_head); + sf_base = kmem_alloc_nofault(kernel_map, nsfbufs * PAGE_SIZE); + sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP, + M_NOWAIT | M_ZERO); + for (i = 0; i < nsfbufs; i++) { + sf_bufs[i].kva = sf_base + i * PAGE_SIZE; + SLIST_INSERT_HEAD(&sf_freelist.sf_head, &sf_bufs[i], free_list); + } + sf_buf_alloc_want = 0; + mtx_unlock(&sf_freelist.sf_lock); +} + +/* + * Get an sf_buf from the freelist. Will block if none are available. + */ +struct sf_buf * +sf_buf_alloc(struct vm_page *m) +{ + struct sf_buf *sf; + int error; + + mtx_lock(&sf_freelist.sf_lock); + while ((sf = SLIST_FIRST(&sf_freelist.sf_head)) == NULL) { + sf_buf_alloc_want++; + error = msleep(&sf_freelist, &sf_freelist.sf_lock, PVM|PCATCH, + "sfbufa", 0); + sf_buf_alloc_want--; + + /* + * If we got a signal, don't risk going back to sleep. + */ + if (error) + break; + } + if (sf != NULL) { + SLIST_REMOVE_HEAD(&sf_freelist.sf_head, free_list); + sf->m = m; + pmap_qenter(sf->kva, &sf->m, 1); + } + mtx_unlock(&sf_freelist.sf_lock); + return (sf); +} + +/* + * Detatch mapped page and release resources back to the system. + */ +void +sf_buf_free(void *addr, void *args) +{ + struct sf_buf *sf; + struct vm_page *m; + + sf = args; + pmap_qremove((vm_offset_t)addr, 1); + m = sf->m; + vm_page_lock_queues(); + vm_page_unwire(m, 0); + /* + * Check for the object going away on us. This can + * happen since we don't hold a reference to it. + * If so, we're responsible for freeing the page. + */ + if (m->wire_count == 0 && m->object == NULL) + vm_page_free(m); + vm_page_unlock_queues(); + sf->m = NULL; + mtx_lock(&sf_freelist.sf_lock); + SLIST_INSERT_HEAD(&sf_freelist.sf_head, sf, free_list); + if (sf_buf_alloc_want > 0) + wakeup_one(&sf_freelist); + mtx_unlock(&sf_freelist.sf_lock); +} + /* * Software interrupt handler for queued VM system processing. */ diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c index aab07a8f18af..d08bb2bd75e1 100644 --- a/sys/amd64/amd64/vm_machdep.c +++ b/sys/amd64/amd64/vm_machdep.c @@ -57,7 +57,9 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include +#include #include #include @@ -78,6 +80,19 @@ __FBSDID("$FreeBSD$"); #include static void cpu_reset_real(void); +static void sf_buf_init(void *arg); +SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL) + +/* + * Expanded sf_freelist head. Really an SLIST_HEAD() in disguise, with the + * sf_freelist head with the sf_lock mutex. + */ +static struct { + SLIST_HEAD(, sf_buf) sf_head; + struct mtx sf_lock; +} sf_freelist; + +static u_int sf_buf_alloc_want; /* * Finish a fork operation, with process p2 nearly set up. @@ -348,6 +363,86 @@ cpu_reset_real() while(1); } +/* + * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-)) + */ +static void +sf_buf_init(void *arg) +{ + struct sf_buf *sf_bufs; + int i; + + mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", NULL, MTX_DEF); + mtx_lock(&sf_freelist.sf_lock); + SLIST_INIT(&sf_freelist.sf_head); + sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP, + M_NOWAIT | M_ZERO); + for (i = 0; i < nsfbufs; i++) + SLIST_INSERT_HEAD(&sf_freelist.sf_head, &sf_bufs[i], free_list); + sf_buf_alloc_want = 0; + mtx_unlock(&sf_freelist.sf_lock); +} + +/* + * Get an sf_buf from the freelist. Will block if none are available. + */ +struct sf_buf * +sf_buf_alloc(struct vm_page *m) +{ + struct sf_buf *sf; + int error; + + mtx_lock(&sf_freelist.sf_lock); + while ((sf = SLIST_FIRST(&sf_freelist.sf_head)) == NULL) { + sf_buf_alloc_want++; + error = msleep(&sf_freelist, &sf_freelist.sf_lock, PVM|PCATCH, + "sfbufa", 0); + sf_buf_alloc_want--; + + /* + * If we got a signal, don't risk going back to sleep. + */ + if (error) + break; + } + if (sf != NULL) { + SLIST_REMOVE_HEAD(&sf_freelist.sf_head, free_list); + sf->m = m; + sf->kva = PHYS_TO_DMAP(m->phys_addr); + } + mtx_unlock(&sf_freelist.sf_lock); + return (sf); +} + +/* + * Detatch mapped page and release resources back to the system. + */ +void +sf_buf_free(void *addr, void *args) +{ + struct sf_buf *sf; + struct vm_page *m; + + sf = args; + m = sf->m; + vm_page_lock_queues(); + vm_page_unwire(m, 0); + /* + * Check for the object going away on us. This can + * happen since we don't hold a reference to it. + * If so, we're responsible for freeing the page. + */ + if (m->wire_count == 0 && m->object == NULL) + vm_page_free(m); + vm_page_unlock_queues(); + sf->m = NULL; + mtx_lock(&sf_freelist.sf_lock); + SLIST_INSERT_HEAD(&sf_freelist.sf_head, sf, free_list); + if (sf_buf_alloc_want > 0) + wakeup_one(&sf_freelist); + mtx_unlock(&sf_freelist.sf_lock); +} + /* * Software interrupt handler for queued VM system processing. */ diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c index 7cec62039f64..19114e1b6bd7 100644 --- a/sys/i386/i386/vm_machdep.c +++ b/sys/i386/i386/vm_machdep.c @@ -62,8 +62,10 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include +#include #include #include @@ -95,6 +97,20 @@ static void cpu_reset_proxy(void); static u_int cpu_reset_proxyid; static volatile u_int cpu_reset_proxy_active; #endif +static void sf_buf_init(void *arg); +SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL) + +/* + * Expanded sf_freelist head. Really an SLIST_HEAD() in disguise, with the + * sf_freelist head with the sf_lock mutex. + */ +static struct { + SLIST_HEAD(, sf_buf) sf_head; + struct mtx sf_lock; +} sf_freelist; + +static u_int sf_buf_alloc_want; + extern int _ucodesel, _udatasel; /* @@ -547,6 +563,91 @@ cpu_reset_real() while(1); } +/* + * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-)) + */ +static void +sf_buf_init(void *arg) +{ + struct sf_buf *sf_bufs; + vm_offset_t sf_base; + int i; + + mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", NULL, MTX_DEF); + mtx_lock(&sf_freelist.sf_lock); + SLIST_INIT(&sf_freelist.sf_head); + sf_base = kmem_alloc_nofault(kernel_map, nsfbufs * PAGE_SIZE); + sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP, + M_NOWAIT | M_ZERO); + for (i = 0; i < nsfbufs; i++) { + sf_bufs[i].kva = sf_base + i * PAGE_SIZE; + SLIST_INSERT_HEAD(&sf_freelist.sf_head, &sf_bufs[i], free_list); + } + sf_buf_alloc_want = 0; + mtx_unlock(&sf_freelist.sf_lock); +} + +/* + * Get an sf_buf from the freelist. Will block if none are available. + */ +struct sf_buf * +sf_buf_alloc(struct vm_page *m) +{ + struct sf_buf *sf; + int error; + + mtx_lock(&sf_freelist.sf_lock); + while ((sf = SLIST_FIRST(&sf_freelist.sf_head)) == NULL) { + sf_buf_alloc_want++; + error = msleep(&sf_freelist, &sf_freelist.sf_lock, PVM|PCATCH, + "sfbufa", 0); + sf_buf_alloc_want--; + + /* + * If we got a signal, don't risk going back to sleep. + */ + if (error) + break; + } + if (sf != NULL) { + SLIST_REMOVE_HEAD(&sf_freelist.sf_head, free_list); + sf->m = m; + pmap_qenter(sf->kva, &sf->m, 1); + } + mtx_unlock(&sf_freelist.sf_lock); + return (sf); +} + +/* + * Detatch mapped page and release resources back to the system. + */ +void +sf_buf_free(void *addr, void *args) +{ + struct sf_buf *sf; + struct vm_page *m; + + sf = args; + pmap_qremove((vm_offset_t)addr, 1); + m = sf->m; + vm_page_lock_queues(); + vm_page_unwire(m, 0); + /* + * Check for the object going away on us. This can + * happen since we don't hold a reference to it. + * If so, we're responsible for freeing the page. + */ + if (m->wire_count == 0 && m->object == NULL) + vm_page_free(m); + vm_page_unlock_queues(); + sf->m = NULL; + mtx_lock(&sf_freelist.sf_lock); + SLIST_INSERT_HEAD(&sf_freelist.sf_head, sf, free_list); + if (sf_buf_alloc_want > 0) + wakeup_one(&sf_freelist); + mtx_unlock(&sf_freelist.sf_lock); +} + /* * Software interrupt handler for queued VM system processing. */ diff --git a/sys/ia64/ia64/vm_machdep.c b/sys/ia64/ia64/vm_machdep.c index ae8ff3b32cad..5d771ed50569 100644 --- a/sys/ia64/ia64/vm_machdep.c +++ b/sys/ia64/ia64/vm_machdep.c @@ -78,6 +78,8 @@ #include #include #include +#include +#include #include #include @@ -98,6 +100,20 @@ #include +static void sf_buf_init(void *arg); +SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL) + +/* + * Expanded sf_freelist head. Really an SLIST_HEAD() in disguise, with the + * sf_freelist head with the sf_lock mutex. + */ +static struct { + SLIST_HEAD(, sf_buf) sf_head; + struct mtx sf_lock; +} sf_freelist; + +static u_int sf_buf_alloc_want; + void cpu_thread_exit(struct thread *td) { @@ -306,6 +322,91 @@ cpu_sched_exit(td) { } +/* + * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-)) + */ +static void +sf_buf_init(void *arg) +{ + struct sf_buf *sf_bufs; + vm_offset_t sf_base; + int i; + + mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", NULL, MTX_DEF); + mtx_lock(&sf_freelist.sf_lock); + SLIST_INIT(&sf_freelist.sf_head); + sf_base = kmem_alloc_nofault(kernel_map, nsfbufs * PAGE_SIZE); + sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP, + M_NOWAIT | M_ZERO); + for (i = 0; i < nsfbufs; i++) { + sf_bufs[i].kva = sf_base + i * PAGE_SIZE; + SLIST_INSERT_HEAD(&sf_freelist.sf_head, &sf_bufs[i], free_list); + } + sf_buf_alloc_want = 0; + mtx_unlock(&sf_freelist.sf_lock); +} + +/* + * Get an sf_buf from the freelist. Will block if none are available. + */ +struct sf_buf * +sf_buf_alloc(struct vm_page *m) +{ + struct sf_buf *sf; + int error; + + mtx_lock(&sf_freelist.sf_lock); + while ((sf = SLIST_FIRST(&sf_freelist.sf_head)) == NULL) { + sf_buf_alloc_want++; + error = msleep(&sf_freelist, &sf_freelist.sf_lock, PVM|PCATCH, + "sfbufa", 0); + sf_buf_alloc_want--; + + /* + * If we got a signal, don't risk going back to sleep. + */ + if (error) + break; + } + if (sf != NULL) { + SLIST_REMOVE_HEAD(&sf_freelist.sf_head, free_list); + sf->m = m; + pmap_qenter(sf->kva, &sf->m, 1); + } + mtx_unlock(&sf_freelist.sf_lock); + return (sf); +} + +/* + * Detatch mapped page and release resources back to the system. + */ +void +sf_buf_free(void *addr, void *args) +{ + struct sf_buf *sf; + struct vm_page *m; + + sf = args; + pmap_qremove((vm_offset_t)addr, 1); + m = sf->m; + vm_page_lock_queues(); + vm_page_unwire(m, 0); + /* + * Check for the object going away on us. This can + * happen since we don't hold a reference to it. + * If so, we're responsible for freeing the page. + */ + if (m->wire_count == 0 && m->object == NULL) + vm_page_free(m); + vm_page_unlock_queues(); + sf->m = NULL; + mtx_lock(&sf_freelist.sf_lock); + SLIST_INSERT_HEAD(&sf_freelist.sf_head, sf, free_list); + if (sf_buf_alloc_want > 0) + wakeup_one(&sf_freelist); + mtx_unlock(&sf_freelist.sf_lock); +} + /* * Software interrupt handler for queued VM system processing. */ diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c index c9dff02f5041..2d08b7e32132 100644 --- a/sys/kern/uipc_syscalls.c +++ b/sys/kern/uipc_syscalls.c @@ -77,9 +77,6 @@ __FBSDID("$FreeBSD$"); #include #include -static void sf_buf_init(void *arg); -SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL) - static int sendit(struct thread *td, int s, struct msghdr *mp, int flags); static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp); @@ -90,17 +87,6 @@ static int getsockname1(struct thread *td, struct getsockname_args *uap, static int getpeername1(struct thread *td, struct getpeername_args *uap, int compat); -/* - * Expanded sf_freelist head. Really an SLIST_HEAD() in disguise, with the - * sf_freelist head with the sf_lock mutex. - */ -static struct { - SLIST_HEAD(, sf_buf) sf_head; - struct mtx sf_lock; -} sf_freelist; - -static u_int sf_buf_alloc_want; - /* * System call interface to the socket abstraction. */ @@ -1631,91 +1617,6 @@ getsockaddr(namp, uaddr, len) return error; } -/* - * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-)) - */ -static void -sf_buf_init(void *arg) -{ - struct sf_buf *sf_bufs; - vm_offset_t sf_base; - int i; - - mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", NULL, MTX_DEF); - mtx_lock(&sf_freelist.sf_lock); - SLIST_INIT(&sf_freelist.sf_head); - sf_base = kmem_alloc_nofault(kernel_map, nsfbufs * PAGE_SIZE); - sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP, - M_NOWAIT | M_ZERO); - for (i = 0; i < nsfbufs; i++) { - sf_bufs[i].kva = sf_base + i * PAGE_SIZE; - SLIST_INSERT_HEAD(&sf_freelist.sf_head, &sf_bufs[i], free_list); - } - sf_buf_alloc_want = 0; - mtx_unlock(&sf_freelist.sf_lock); -} - -/* - * Get an sf_buf from the freelist. Will block if none are available. - */ -struct sf_buf * -sf_buf_alloc(struct vm_page *m) -{ - struct sf_buf *sf; - int error; - - mtx_lock(&sf_freelist.sf_lock); - while ((sf = SLIST_FIRST(&sf_freelist.sf_head)) == NULL) { - sf_buf_alloc_want++; - error = msleep(&sf_freelist, &sf_freelist.sf_lock, PVM|PCATCH, - "sfbufa", 0); - sf_buf_alloc_want--; - - /* - * If we got a signal, don't risk going back to sleep. - */ - if (error) - break; - } - if (sf != NULL) { - SLIST_REMOVE_HEAD(&sf_freelist.sf_head, free_list); - sf->m = m; - pmap_qenter(sf->kva, &sf->m, 1); - } - mtx_unlock(&sf_freelist.sf_lock); - return (sf); -} - -/* - * Detatch mapped page and release resources back to the system. - */ -void -sf_buf_free(void *addr, void *args) -{ - struct sf_buf *sf; - struct vm_page *m; - - sf = args; - pmap_qremove((vm_offset_t)addr, 1); - m = sf->m; - vm_page_lock_queues(); - vm_page_unwire(m, 0); - /* - * Check for the object going away on us. This can - * happen since we don't hold a reference to it. - * If so, we're responsible for freeing the page. - */ - if (m->wire_count == 0 && m->object == NULL) - vm_page_free(m); - vm_page_unlock_queues(); - sf->m = NULL; - mtx_lock(&sf_freelist.sf_lock); - SLIST_INSERT_HEAD(&sf_freelist.sf_head, sf, free_list); - if (sf_buf_alloc_want > 0) - wakeup_one(&sf_freelist); - mtx_unlock(&sf_freelist.sf_lock); -} - /* * sendfile(2) * diff --git a/sys/powerpc/aim/vm_machdep.c b/sys/powerpc/aim/vm_machdep.c index 3f83f8843dc2..cef8327d7ad6 100644 --- a/sys/powerpc/aim/vm_machdep.c +++ b/sys/powerpc/aim/vm_machdep.c @@ -81,6 +81,8 @@ #include #include #include +#include +#include #include #include @@ -101,6 +103,20 @@ #include +static void sf_buf_init(void *arg); +SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL) + +/* + * Expanded sf_freelist head. Really an SLIST_HEAD() in disguise, with the + * sf_freelist head with the sf_lock mutex. + */ +static struct { + SLIST_HEAD(, sf_buf) sf_head; + struct mtx sf_lock; +} sf_freelist; + +static u_int sf_buf_alloc_want; + /* * Finish a fork operation, with process p2 nearly set up. * Copy and update the pcb, set up the stack so that the child @@ -219,6 +235,91 @@ cpu_reset() OF_exit(); } +/* + * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-)) + */ +static void +sf_buf_init(void *arg) +{ + struct sf_buf *sf_bufs; + vm_offset_t sf_base; + int i; + + mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", NULL, MTX_DEF); + mtx_lock(&sf_freelist.sf_lock); + SLIST_INIT(&sf_freelist.sf_head); + sf_base = kmem_alloc_nofault(kernel_map, nsfbufs * PAGE_SIZE); + sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP, + M_NOWAIT | M_ZERO); + for (i = 0; i < nsfbufs; i++) { + sf_bufs[i].kva = sf_base + i * PAGE_SIZE; + SLIST_INSERT_HEAD(&sf_freelist.sf_head, &sf_bufs[i], free_list); + } + sf_buf_alloc_want = 0; + mtx_unlock(&sf_freelist.sf_lock); +} + +/* + * Get an sf_buf from the freelist. Will block if none are available. + */ +struct sf_buf * +sf_buf_alloc(struct vm_page *m) +{ + struct sf_buf *sf; + int error; + + mtx_lock(&sf_freelist.sf_lock); + while ((sf = SLIST_FIRST(&sf_freelist.sf_head)) == NULL) { + sf_buf_alloc_want++; + error = msleep(&sf_freelist, &sf_freelist.sf_lock, PVM|PCATCH, + "sfbufa", 0); + sf_buf_alloc_want--; + + /* + * If we got a signal, don't risk going back to sleep. + */ + if (error) + break; + } + if (sf != NULL) { + SLIST_REMOVE_HEAD(&sf_freelist.sf_head, free_list); + sf->m = m; + pmap_qenter(sf->kva, &sf->m, 1); + } + mtx_unlock(&sf_freelist.sf_lock); + return (sf); +} + +/* + * Detatch mapped page and release resources back to the system. + */ +void +sf_buf_free(void *addr, void *args) +{ + struct sf_buf *sf; + struct vm_page *m; + + sf = args; + pmap_qremove((vm_offset_t)addr, 1); + m = sf->m; + vm_page_lock_queues(); + vm_page_unwire(m, 0); + /* + * Check for the object going away on us. This can + * happen since we don't hold a reference to it. + * If so, we're responsible for freeing the page. + */ + if (m->wire_count == 0 && m->object == NULL) + vm_page_free(m); + vm_page_unlock_queues(); + sf->m = NULL; + mtx_lock(&sf_freelist.sf_lock); + SLIST_INSERT_HEAD(&sf_freelist.sf_head, sf, free_list); + if (sf_buf_alloc_want > 0) + wakeup_one(&sf_freelist); + mtx_unlock(&sf_freelist.sf_lock); +} + /* * Software interrupt handler for queued VM system processing. */ diff --git a/sys/powerpc/powerpc/vm_machdep.c b/sys/powerpc/powerpc/vm_machdep.c index 3f83f8843dc2..cef8327d7ad6 100644 --- a/sys/powerpc/powerpc/vm_machdep.c +++ b/sys/powerpc/powerpc/vm_machdep.c @@ -81,6 +81,8 @@ #include #include #include +#include +#include #include #include @@ -101,6 +103,20 @@ #include +static void sf_buf_init(void *arg); +SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL) + +/* + * Expanded sf_freelist head. Really an SLIST_HEAD() in disguise, with the + * sf_freelist head with the sf_lock mutex. + */ +static struct { + SLIST_HEAD(, sf_buf) sf_head; + struct mtx sf_lock; +} sf_freelist; + +static u_int sf_buf_alloc_want; + /* * Finish a fork operation, with process p2 nearly set up. * Copy and update the pcb, set up the stack so that the child @@ -219,6 +235,91 @@ cpu_reset() OF_exit(); } +/* + * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-)) + */ +static void +sf_buf_init(void *arg) +{ + struct sf_buf *sf_bufs; + vm_offset_t sf_base; + int i; + + mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", NULL, MTX_DEF); + mtx_lock(&sf_freelist.sf_lock); + SLIST_INIT(&sf_freelist.sf_head); + sf_base = kmem_alloc_nofault(kernel_map, nsfbufs * PAGE_SIZE); + sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP, + M_NOWAIT | M_ZERO); + for (i = 0; i < nsfbufs; i++) { + sf_bufs[i].kva = sf_base + i * PAGE_SIZE; + SLIST_INSERT_HEAD(&sf_freelist.sf_head, &sf_bufs[i], free_list); + } + sf_buf_alloc_want = 0; + mtx_unlock(&sf_freelist.sf_lock); +} + +/* + * Get an sf_buf from the freelist. Will block if none are available. + */ +struct sf_buf * +sf_buf_alloc(struct vm_page *m) +{ + struct sf_buf *sf; + int error; + + mtx_lock(&sf_freelist.sf_lock); + while ((sf = SLIST_FIRST(&sf_freelist.sf_head)) == NULL) { + sf_buf_alloc_want++; + error = msleep(&sf_freelist, &sf_freelist.sf_lock, PVM|PCATCH, + "sfbufa", 0); + sf_buf_alloc_want--; + + /* + * If we got a signal, don't risk going back to sleep. + */ + if (error) + break; + } + if (sf != NULL) { + SLIST_REMOVE_HEAD(&sf_freelist.sf_head, free_list); + sf->m = m; + pmap_qenter(sf->kva, &sf->m, 1); + } + mtx_unlock(&sf_freelist.sf_lock); + return (sf); +} + +/* + * Detatch mapped page and release resources back to the system. + */ +void +sf_buf_free(void *addr, void *args) +{ + struct sf_buf *sf; + struct vm_page *m; + + sf = args; + pmap_qremove((vm_offset_t)addr, 1); + m = sf->m; + vm_page_lock_queues(); + vm_page_unwire(m, 0); + /* + * Check for the object going away on us. This can + * happen since we don't hold a reference to it. + * If so, we're responsible for freeing the page. + */ + if (m->wire_count == 0 && m->object == NULL) + vm_page_free(m); + vm_page_unlock_queues(); + sf->m = NULL; + mtx_lock(&sf_freelist.sf_lock); + SLIST_INSERT_HEAD(&sf_freelist.sf_head, sf, free_list); + if (sf_buf_alloc_want > 0) + wakeup_one(&sf_freelist); + mtx_unlock(&sf_freelist.sf_lock); +} + /* * Software interrupt handler for queued VM system processing. */ diff --git a/sys/sparc64/sparc64/vm_machdep.c b/sys/sparc64/sparc64/vm_machdep.c index 983e733894e4..2abca7aa3d88 100644 --- a/sys/sparc64/sparc64/vm_machdep.c +++ b/sys/sparc64/sparc64/vm_machdep.c @@ -52,7 +52,11 @@ #include #include #include +#include #include +#include +#include +#include #include #include #include @@ -63,6 +67,7 @@ #include #include #include +#include #include #include #include @@ -81,6 +86,20 @@ #include #include +static void sf_buf_init(void *arg); +SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL) + +/* + * Expanded sf_freelist head. Really an SLIST_HEAD() in disguise, with the + * sf_freelist head with the sf_lock mutex. + */ +static struct { + SLIST_HEAD(, sf_buf) sf_head; + struct mtx sf_lock; +} sf_freelist; + +static u_int sf_buf_alloc_want; + PMAP_STATS_VAR(uma_nsmall_alloc); PMAP_STATS_VAR(uma_nsmall_alloc_oc); PMAP_STATS_VAR(uma_nsmall_free); @@ -330,6 +349,91 @@ is_physical_memory(vm_paddr_t addr) return (0); } +/* + * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-)) + */ +static void +sf_buf_init(void *arg) +{ + struct sf_buf *sf_bufs; + vm_offset_t sf_base; + int i; + + mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", NULL, MTX_DEF); + mtx_lock(&sf_freelist.sf_lock); + SLIST_INIT(&sf_freelist.sf_head); + sf_base = kmem_alloc_nofault(kernel_map, nsfbufs * PAGE_SIZE); + sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP, + M_NOWAIT | M_ZERO); + for (i = 0; i < nsfbufs; i++) { + sf_bufs[i].kva = sf_base + i * PAGE_SIZE; + SLIST_INSERT_HEAD(&sf_freelist.sf_head, &sf_bufs[i], free_list); + } + sf_buf_alloc_want = 0; + mtx_unlock(&sf_freelist.sf_lock); +} + +/* + * Get an sf_buf from the freelist. Will block if none are available. + */ +struct sf_buf * +sf_buf_alloc(struct vm_page *m) +{ + struct sf_buf *sf; + int error; + + mtx_lock(&sf_freelist.sf_lock); + while ((sf = SLIST_FIRST(&sf_freelist.sf_head)) == NULL) { + sf_buf_alloc_want++; + error = msleep(&sf_freelist, &sf_freelist.sf_lock, PVM|PCATCH, + "sfbufa", 0); + sf_buf_alloc_want--; + + /* + * If we got a signal, don't risk going back to sleep. + */ + if (error) + break; + } + if (sf != NULL) { + SLIST_REMOVE_HEAD(&sf_freelist.sf_head, free_list); + sf->m = m; + pmap_qenter(sf->kva, &sf->m, 1); + } + mtx_unlock(&sf_freelist.sf_lock); + return (sf); +} + +/* + * Detatch mapped page and release resources back to the system. + */ +void +sf_buf_free(void *addr, void *args) +{ + struct sf_buf *sf; + struct vm_page *m; + + sf = args; + pmap_qremove((vm_offset_t)addr, 1); + m = sf->m; + vm_page_lock_queues(); + vm_page_unwire(m, 0); + /* + * Check for the object going away on us. This can + * happen since we don't hold a reference to it. + * If so, we're responsible for freeing the page. + */ + if (m->wire_count == 0 && m->object == NULL) + vm_page_free(m); + vm_page_unlock_queues(); + sf->m = NULL; + mtx_lock(&sf_freelist.sf_lock); + SLIST_INSERT_HEAD(&sf_freelist.sf_head, sf, free_list); + if (sf_buf_alloc_want > 0) + wakeup_one(&sf_freelist); + mtx_unlock(&sf_freelist.sf_lock); +} + void swi_vm(void *v) {