Improvements to sendfile(2) mbuf free routine.

o Fall back to default m_ext free mech, using function pointer in
  m_ext_free, and remove sf_ext_free() called directly from mbuf code.
  Testing on modern CPUs showed no regression.
o Provide internally used flag EXT_FLAG_SYNC, to mark that I/O uses
  SF_SYNC flag.  Lack of the flag allows us not to dereference
  ext_arg2, saving from a cache line miss.
o Create function sendfile_free_page() that later will be used, for
  multi-page mbufs.  For now compiler will inline it into
  sendfile_free_mext().

In collaboration with:	gallatin
Differential Revision:	https://reviews.freebsd.org/D12615
This commit is contained in:
Gleb Smirnoff 2017-10-09 21:06:16 +00:00
parent 07e87a1d55
commit 9c82bec42d
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=324448
3 changed files with 53 additions and 75 deletions

View File

@ -675,14 +675,6 @@ mb_free_ext(struct mbuf *m)
uma_zfree(zone_mbuf, mref);
break;
case EXT_SFBUF:
sf_ext_free(mref->m_ext.ext_arg1, mref->m_ext.ext_arg2);
uma_zfree(zone_mbuf, mref);
break;
case EXT_SFBUF_NOCACHE:
sf_ext_free_nocache(mref->m_ext.ext_arg1,
mref->m_ext.ext_arg2);
uma_zfree(zone_mbuf, mref);
break;
case EXT_NET_DRV:
case EXT_MOD_TYPE:
case EXT_DISPOSABLE:

View File

@ -62,6 +62,9 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_object.h>
#include <vm/vm_pager.h>
#define EXT_FLAG_SYNC EXT_FLAG_VENDOR1
#define EXT_FLAG_NOCACHE EXT_FLAG_VENDOR2
/*
* Structure describing a single sendfile(2) I/O, which may consist of
* several underlying pager I/Os.
@ -122,63 +125,53 @@ SYSCTL_PROC(_kern_ipc, OID_AUTO, sfstat, CTLTYPE_OPAQUE | CTLFLAG_RW,
* Detach mapped page and release resources back to the system. Called
* by mbuf(9) code when last reference to a page is freed.
*/
void
sf_ext_free(void *arg1, void *arg2)
static void
sendfile_free_page(vm_page_t pg, bool nocache)
{
struct sf_buf *sf = arg1;
struct sendfile_sync *sfs = arg2;
vm_page_t pg = sf_buf_page(sf);
sf_buf_free(sf);
vm_page_lock(pg);
/*
* Check for the object going away on us. This can
* happen since we don't hold a reference to it.
* If so, we're responsible for freeing the page.
* In either case check for the object going away on us. This can
* happen since we don't hold a reference to it. If so, we're
* responsible for freeing the page. In 'noncache' case try to free
* the page, but only if it is cheap to.
*/
if (vm_page_unwire(pg, PQ_INACTIVE) && pg->object == NULL)
vm_page_free(pg);
vm_page_unlock(pg);
if (sfs != NULL) {
mtx_lock(&sfs->mtx);
KASSERT(sfs->count > 0, ("Sendfile sync botchup count == 0"));
if (--sfs->count == 0)
cv_signal(&sfs->cv);
mtx_unlock(&sfs->mtx);
}
}
/*
* Same as above, but forces the page to be detached from the object
* and go into free pool.
*/
void
sf_ext_free_nocache(void *arg1, void *arg2)
{
struct sf_buf *sf = arg1;
struct sendfile_sync *sfs = arg2;
vm_page_t pg = sf_buf_page(sf);
sf_buf_free(sf);
vm_page_lock(pg);
if (vm_page_unwire(pg, PQ_NONE)) {
if (vm_page_unwire(pg, nocache ? PQ_NONE : PQ_INACTIVE)) {
vm_object_t obj;
/* Try to free the page, but only if it is cheap to. */
if ((obj = pg->object) == NULL)
vm_page_free(pg);
else if (!vm_page_xbusied(pg) && VM_OBJECT_TRYWLOCK(obj)) {
vm_page_free(pg);
VM_OBJECT_WUNLOCK(obj);
} else
vm_page_deactivate(pg);
else if (nocache) {
if (!vm_page_xbusied(pg) && VM_OBJECT_TRYWLOCK(obj)) {
vm_page_free(pg);
VM_OBJECT_WUNLOCK(obj);
} else
vm_page_deactivate(pg);
}
}
vm_page_unlock(pg);
}
static void
sendfile_free_mext(struct mbuf *m)
{
struct sf_buf *sf;
vm_page_t pg;
bool nocache;
KASSERT(m->m_flags & M_EXT && m->m_ext.ext_type == EXT_SFBUF,
("%s: m %p !M_EXT or !EXT_SFBUF", __func__, m));
sf = m->m_ext.ext_arg1;
pg = sf_buf_page(sf);
nocache = m->m_ext.ext_flags & EXT_FLAG_NOCACHE;
sf_buf_free(sf);
sendfile_free_page(pg, nocache);
if (m->m_ext.ext_flags & EXT_FLAG_SYNC) {
struct sendfile_sync *sfs = m->m_ext.ext_arg2;
if (sfs != NULL) {
mtx_lock(&sfs->mtx);
KASSERT(sfs->count > 0, ("Sendfile sync botchup count == 0"));
if (--sfs->count == 0)
@ -782,7 +775,9 @@ vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio,
m0->m_ext.ext_buf = (char *)sf_buf_kva(sf);
m0->m_ext.ext_size = PAGE_SIZE;
m0->m_ext.ext_arg1 = sf;
m0->m_ext.ext_arg2 = sfs;
m0->m_ext.ext_type = EXT_SFBUF;
m0->m_ext.ext_flags = EXT_FLAG_EMBREF;
m0->m_ext.ext_free = sendfile_free_mext;
/*
* SF_NOCACHE sets the page as being freed upon send.
* However, we ignore it for the last page in 'space',
@ -790,14 +785,18 @@ vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio,
* send (rem > space), or if we have readahead
* configured (rhpages > 0).
*/
if ((flags & SF_NOCACHE) == 0 ||
(i == npages - 1 &&
((off + space) & PAGE_MASK) &&
(rem > space || rhpages > 0)))
m0->m_ext.ext_type = EXT_SFBUF;
else
m0->m_ext.ext_type = EXT_SFBUF_NOCACHE;
m0->m_ext.ext_flags = EXT_FLAG_EMBREF;
if ((flags & SF_NOCACHE) &&
(i != npages - 1 ||
!((off + space) & PAGE_MASK) ||
!(rem > space || rhpages > 0)))
m0->m_ext.ext_flags |= EXT_FLAG_NOCACHE;
if (sfs != NULL) {
m0->m_ext.ext_flags |= EXT_FLAG_SYNC;
m0->m_ext.ext_arg2 = sfs;
mtx_lock(&sfs->mtx);
sfs->count++;
mtx_unlock(&sfs->mtx);
}
m0->m_ext.ext_count = 1;
m0->m_flags |= (M_EXT | M_RDONLY);
if (nios)
@ -815,12 +814,6 @@ vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio,
else
m = m0;
mtail = m0;
if (sfs != NULL) {
mtx_lock(&sfs->mtx);
sfs->count++;
mtx_unlock(&sfs->mtx);
}
}
if (vp != NULL)

View File

@ -426,7 +426,6 @@ struct mbuf {
#define EXT_JUMBO16 5 /* jumbo cluster 16184 bytes */
#define EXT_PACKET 6 /* mbuf+cluster from packet zone */
#define EXT_MBUF 7 /* external mbuf reference */
#define EXT_SFBUF_NOCACHE 8 /* sendfile(2)'s sf_buf not to be cached */
#define EXT_VENDOR1 224 /* for vendor-internal use */
#define EXT_VENDOR2 225 /* for vendor-internal use */
@ -471,12 +470,6 @@ struct mbuf {
"\24EXT_FLAG_VENDOR4\25EXT_FLAG_EXP1\26EXT_FLAG_EXP2\27EXT_FLAG_EXP3" \
"\30EXT_FLAG_EXP4"
/*
* External reference/free functions.
*/
void sf_ext_free(void *, void *);
void sf_ext_free_nocache(void *, void *);
/*
* Flags indicating checksum, segmentation and other offload work to be
* done, or already done, by hardware or lower layers. It is split into