Add support for using unmapped mbufs with sendfile(2).
This can be enabled at runtime via the kern.ipc.mb_use_ext_pgs sysctl. It is disabled by default. Submitted by: gallatin (earlier version) Reviewed by: gallatin, hselasky, rrs Relnotes: yes Sponsored by: Netflix Differential Revision: https://reviews.freebsd.org/D20616
This commit is contained in:
parent
82334850ea
commit
cec06a3edc
Notes:
svn2git
2020-12-20 02:59:44 +00:00
svn path=/head/; revision=349530
@ -112,6 +112,11 @@ int nmbjumbop; /* limits number of page size jumbo clusters */
|
||||
int nmbjumbo9; /* limits number of 9k jumbo clusters */
|
||||
int nmbjumbo16; /* limits number of 16k jumbo clusters */
|
||||
|
||||
bool mb_use_ext_pgs; /* use EXT_PGS mbufs for sendfile */
|
||||
SYSCTL_BOOL(_kern_ipc, OID_AUTO, mb_use_ext_pgs, CTLFLAG_RWTUN,
|
||||
&mb_use_ext_pgs, 0,
|
||||
"Use unmapped mbufs for sendfile(2)");
|
||||
|
||||
static quad_t maxmbufmem; /* overall real memory limit for all mbufs */
|
||||
|
||||
SYSCTL_QUAD(_kern_ipc, OID_AUTO, maxmbufmem, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &maxmbufmem, 0,
|
||||
|
@ -34,6 +34,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include <sys/systm.h>
|
||||
#include <sys/capsicum.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <netinet/in.h>
|
||||
#include <sys/lock.h>
|
||||
#include <sys/mutex.h>
|
||||
#include <sys/sysproto.h>
|
||||
@ -62,6 +63,7 @@ __FBSDID("$FreeBSD$");
|
||||
|
||||
#define EXT_FLAG_SYNC EXT_FLAG_VENDOR1
|
||||
#define EXT_FLAG_NOCACHE EXT_FLAG_VENDOR2
|
||||
#define EXT_FLAG_CACHE_LAST EXT_FLAG_VENDOR3
|
||||
|
||||
/*
|
||||
* Structure describing a single sendfile(2) I/O, which may consist of
|
||||
@ -201,6 +203,39 @@ sendfile_free_mext(struct mbuf *m)
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
sendfile_free_mext_pg(struct mbuf *m)
|
||||
{
|
||||
struct mbuf_ext_pgs *ext_pgs;
|
||||
vm_page_t pg;
|
||||
int i;
|
||||
bool nocache, cache_last;
|
||||
|
||||
KASSERT(m->m_flags & M_EXT && m->m_ext.ext_type == EXT_PGS,
|
||||
("%s: m %p !M_EXT or !EXT_PGS", __func__, m));
|
||||
|
||||
nocache = m->m_ext.ext_flags & EXT_FLAG_NOCACHE;
|
||||
cache_last = m->m_ext.ext_flags & EXT_FLAG_CACHE_LAST;
|
||||
ext_pgs = m->m_ext.ext_pgs;
|
||||
|
||||
for (i = 0; i < ext_pgs->npgs; i++) {
|
||||
if (cache_last && i == ext_pgs->npgs - 1)
|
||||
nocache = false;
|
||||
pg = PHYS_TO_VM_PAGE(ext_pgs->pa[i]);
|
||||
sendfile_free_page(pg, nocache);
|
||||
}
|
||||
|
||||
if (m->m_ext.ext_flags & EXT_FLAG_SYNC) {
|
||||
struct sendfile_sync *sfs = m->m_ext.ext_arg2;
|
||||
|
||||
mtx_lock(&sfs->mtx);
|
||||
KASSERT(sfs->count > 0, ("Sendfile sync botchup count == 0"));
|
||||
if (--sfs->count == 0)
|
||||
cv_signal(&sfs->cv);
|
||||
mtx_unlock(&sfs->mtx);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Helper function to calculate how much data to put into page i of n.
|
||||
* Only first and last pages are special.
|
||||
@ -283,8 +318,6 @@ sendfile_iodone(void *arg, vm_page_t *pg, int count, int error)
|
||||
|
||||
CURVNET_SET(so->so_vnet);
|
||||
if (sfio->error) {
|
||||
struct mbuf *m;
|
||||
|
||||
/*
|
||||
* I/O operation failed. The state of data in the socket
|
||||
* is now inconsistent, and all what we can do is to tear
|
||||
@ -299,9 +332,7 @@ sendfile_iodone(void *arg, vm_page_t *pg, int count, int error)
|
||||
so->so_proto->pr_usrreqs->pru_abort(so);
|
||||
so->so_error = EIO;
|
||||
|
||||
m = sfio->m;
|
||||
for (int i = 0; i < sfio->npages; i++)
|
||||
m = m_free(m);
|
||||
mb_free_notready(sfio->m, sfio->npages);
|
||||
} else
|
||||
(void)(so->so_proto->pr_usrreqs->pru_ready)(so, sfio->m,
|
||||
sfio->npages);
|
||||
@ -540,13 +571,15 @@ vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio,
|
||||
struct vnode *vp;
|
||||
struct vm_object *obj;
|
||||
struct socket *so;
|
||||
struct mbuf_ext_pgs *ext_pgs;
|
||||
struct mbuf *m, *mh, *mhtail;
|
||||
struct sf_buf *sf;
|
||||
struct shmfd *shmfd;
|
||||
struct sendfile_sync *sfs;
|
||||
struct vattr va;
|
||||
off_t off, sbytes, rem, obj_size;
|
||||
int error, softerr, bsize, hdrlen;
|
||||
int bsize, error, ext_pgs_idx, hdrlen, max_pgs, softerr;
|
||||
bool use_ext_pgs;
|
||||
|
||||
obj = NULL;
|
||||
so = NULL;
|
||||
@ -554,6 +587,7 @@ vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio,
|
||||
sfs = NULL;
|
||||
hdrlen = sbytes = 0;
|
||||
softerr = 0;
|
||||
use_ext_pgs = false;
|
||||
|
||||
error = sendfile_getobj(td, fp, &obj, &vp, &shmfd, &obj_size, &bsize);
|
||||
if (error != 0)
|
||||
@ -714,6 +748,17 @@ vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio,
|
||||
|
||||
if (space > rem)
|
||||
space = rem;
|
||||
else if (space > PAGE_SIZE) {
|
||||
/*
|
||||
* Use page boundaries when possible for large
|
||||
* requests.
|
||||
*/
|
||||
if (off & PAGE_MASK)
|
||||
space -= (PAGE_SIZE - (off & PAGE_MASK));
|
||||
space = trunc_page(space);
|
||||
if (off & PAGE_MASK)
|
||||
space += (PAGE_SIZE - (off & PAGE_MASK));
|
||||
}
|
||||
|
||||
npages = howmany(space + (off & PAGE_MASK), PAGE_SIZE);
|
||||
|
||||
@ -751,6 +796,22 @@ vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio,
|
||||
* dumped into socket buffer.
|
||||
*/
|
||||
pa = sfio->pa;
|
||||
|
||||
/*
|
||||
* Use unmapped mbufs if enabled for TCP. Unmapped
|
||||
* bufs are restricted to TCP as that is what has been
|
||||
* tested. In particular, unmapped mbufs have not
|
||||
* been tested with UNIX-domain sockets.
|
||||
*/
|
||||
if (mb_use_ext_pgs &&
|
||||
so->so_proto->pr_protocol == IPPROTO_TCP) {
|
||||
use_ext_pgs = true;
|
||||
max_pgs = MBUF_PEXT_MAX_PGS;
|
||||
|
||||
/* Start at last index, to wrap on first use. */
|
||||
ext_pgs_idx = max_pgs - 1;
|
||||
}
|
||||
|
||||
for (int i = 0; i < npages; i++) {
|
||||
struct mbuf *m0;
|
||||
|
||||
@ -766,6 +827,66 @@ vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio,
|
||||
break;
|
||||
}
|
||||
|
||||
if (use_ext_pgs) {
|
||||
off_t xfs;
|
||||
|
||||
ext_pgs_idx++;
|
||||
if (ext_pgs_idx == max_pgs) {
|
||||
m0 = mb_alloc_ext_pgs(M_WAITOK, false,
|
||||
sendfile_free_mext_pg);
|
||||
|
||||
if (flags & SF_NOCACHE) {
|
||||
m0->m_ext.ext_flags |=
|
||||
EXT_FLAG_NOCACHE;
|
||||
|
||||
/*
|
||||
* See comment below regarding
|
||||
* ignoring SF_NOCACHE for the
|
||||
* last page.
|
||||
*/
|
||||
if ((npages - i <= max_pgs) &&
|
||||
((off + space) & PAGE_MASK) &&
|
||||
(rem > space || rhpages > 0))
|
||||
m0->m_ext.ext_flags |=
|
||||
EXT_FLAG_CACHE_LAST;
|
||||
}
|
||||
if (sfs != NULL) {
|
||||
m0->m_ext.ext_flags |=
|
||||
EXT_FLAG_SYNC;
|
||||
m0->m_ext.ext_arg2 = sfs;
|
||||
mtx_lock(&sfs->mtx);
|
||||
sfs->count++;
|
||||
mtx_unlock(&sfs->mtx);
|
||||
}
|
||||
ext_pgs = m0->m_ext.ext_pgs;
|
||||
if (i == 0)
|
||||
sfio->m = m0;
|
||||
ext_pgs_idx = 0;
|
||||
|
||||
/* Append to mbuf chain. */
|
||||
if (mtail != NULL)
|
||||
mtail->m_next = m0;
|
||||
else
|
||||
m = m0;
|
||||
mtail = m0;
|
||||
ext_pgs->first_pg_off =
|
||||
vmoff(i, off) & PAGE_MASK;
|
||||
}
|
||||
if (nios) {
|
||||
mtail->m_flags |= M_NOTREADY;
|
||||
ext_pgs->nrdy++;
|
||||
}
|
||||
|
||||
ext_pgs->pa[ext_pgs_idx] = VM_PAGE_TO_PHYS(pa[i]);
|
||||
ext_pgs->npgs++;
|
||||
xfs = xfsize(i, npages, off, space);
|
||||
ext_pgs->last_pg_len = xfs;
|
||||
MBUF_EXT_PGS_ASSERT_SANITY(ext_pgs);
|
||||
mtail->m_len += xfs;
|
||||
mtail->m_ext.ext_size += PAGE_SIZE;
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get a sendfile buf. When allocating the
|
||||
* first buffer for mbuf chain, we usually
|
||||
|
@ -1129,6 +1129,7 @@ extern int max_hdr; /* Largest link + protocol header */
|
||||
extern int max_linkhdr; /* Largest link-level header */
|
||||
extern int max_protohdr; /* Largest protocol header */
|
||||
extern int nmbclusters; /* Maximum number of clusters */
|
||||
extern bool mb_use_ext_pgs; /* Use ext_pgs for sendfile */
|
||||
|
||||
/*-
|
||||
* Network packets may have annotations attached by affixing a list of
|
||||
|
Loading…
Reference in New Issue
Block a user