Rework the handling of the tmpfs node backing swap object and tmpfs

vnode v_object to avoid double-buffering.  Use the same object both as
the backing store for tmpfs node and as the v_object.

Besides reducing memory use up to 2x times for situation of mapping
files from tmpfs, it also makes tmpfs read and write operations copy
twice bytes less.

VM subsystem was already slightly adapted to tolerate OBJT_SWAP object
as v_object. Now the vm_object_deallocate() is modified to not
reinstantiate OBJ_ONEMAPPING flag and help the VFS to correctly handle
VV_TEXT flag on the last dereference of the tmpfs backing object.

Reviewed by:	alc
Tested by:	pho, bf
MFC after:	1 month
This commit is contained in:
Konstantin Belousov 2013-04-28 19:38:59 +00:00
parent e5f299ff76
commit 6f2af3fcf3
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=250030
4 changed files with 140 additions and 168 deletions

View File

@ -166,6 +166,7 @@ tmpfs_alloc_node(struct tmpfs_mount *tmp, enum vtype type,
char *target, dev_t rdev, struct tmpfs_node **node)
{
struct tmpfs_node *nnode;
vm_object_t obj;
/* If the root directory of the 'tmp' file system is not yet
* allocated, this must be the request to do it. */
@ -227,9 +228,14 @@ tmpfs_alloc_node(struct tmpfs_mount *tmp, enum vtype type,
break;
case VREG:
nnode->tn_reg.tn_aobj =
obj = nnode->tn_reg.tn_aobj =
vm_pager_allocate(OBJT_SWAP, NULL, 0, VM_PROT_DEFAULT, 0,
NULL /* XXXKIB - tmpfs needs swap reservation */);
VM_OBJECT_WLOCK(obj);
/* OBJ_TMPFS is set together with the setting of vp->v_object */
vm_object_set_flag(obj, OBJ_NOSPLIT);
vm_object_clear_flag(obj, OBJ_ONEMAPPING);
VM_OBJECT_WUNLOCK(obj);
break;
default:
@ -434,9 +440,11 @@ int
tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *node, int lkflag,
struct vnode **vpp)
{
int error = 0;
struct vnode *vp;
vm_object_t object;
int error;
error = 0;
loop:
TMPFS_NODE_LOCK(node);
if ((vp = node->tn_vnode) != NULL) {
@ -506,13 +514,22 @@ tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *node, int lkflag,
/* FALLTHROUGH */
case VLNK:
/* FALLTHROUGH */
case VREG:
/* FALLTHROUGH */
case VSOCK:
break;
case VFIFO:
vp->v_op = &tmpfs_fifoop_entries;
break;
case VREG:
object = node->tn_reg.tn_aobj;
VM_OBJECT_WLOCK(object);
VI_LOCK(vp);
KASSERT(vp->v_object == NULL, ("Not NULL v_object in tmpfs"));
vp->v_object = object;
object->un_pager.swp.swp_tmpfs = vp;
vm_object_set_flag(object, OBJ_TMPFS);
VI_UNLOCK(vp);
VM_OBJECT_WUNLOCK(object);
break;
case VDIR:
MPASS(node->tn_dir.tn_parent != NULL);
if (node->tn_dir.tn_parent == node)
@ -523,7 +540,6 @@ tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *node, int lkflag,
panic("tmpfs_alloc_vp: type %p %d", node, (int)node->tn_type);
}
vnode_pager_setsize(vp, node->tn_size);
error = insmntque(vp, mp);
if (error)
vp = NULL;
@ -1343,7 +1359,6 @@ tmpfs_reg_resize(struct vnode *vp, off_t newsize, boolean_t ignerr)
TMPFS_UNLOCK(tmp);
node->tn_size = newsize;
vnode_pager_setsize(vp, newsize);
return (0);
}

View File

@ -278,8 +278,6 @@ tmpfs_close(struct vop_close_args *v)
{
struct vnode *vp = v->a_vp;
MPASS(VOP_ISLOCKED(vp));
/* Update node times. */
tmpfs_update(vp);
@ -439,7 +437,6 @@ tmpfs_setattr(struct vop_setattr_args *v)
return error;
}
/* --------------------------------------------------------------------- */
static int
tmpfs_nocacheread(vm_object_t tobj, vm_pindex_t idx,
vm_offset_t offset, size_t tlen, struct uio *uio)
@ -448,12 +445,35 @@ tmpfs_nocacheread(vm_object_t tobj, vm_pindex_t idx,
int error, rv;
VM_OBJECT_WLOCK(tobj);
m = vm_page_grab(tobj, idx, VM_ALLOC_WIRED |
VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
/*
* The kern_sendfile() code calls vn_rdwr() with the page
* soft-busied. Ignore the soft-busy state here. Parallel
* reads of the page content from disk are prevented by
* VPO_BUSY.
*
* Although the tmpfs vnode lock is held here, it is
* nonetheless safe to sleep waiting for a free page. The
* pageout daemon does not need to acquire the tmpfs vnode
* lock to page out tobj's pages because tobj is a OBJT_SWAP
* type object.
*/
m = vm_page_grab(tobj, idx, VM_ALLOC_NORMAL | VM_ALLOC_RETRY |
VM_ALLOC_IGN_SBUSY);
if (m->valid != VM_PAGE_BITS_ALL) {
if (vm_pager_has_page(tobj, idx, NULL, NULL)) {
rv = vm_pager_get_pages(tobj, &m, 1, 0);
m = vm_page_lookup(tobj, idx);
if (m == NULL) {
printf(
"tmpfs: vm_obj %p idx %jd null lookup rv %d\n",
tobj, idx, rv);
return (EIO);
}
if (rv != VM_PAGER_OK) {
printf(
"tmpfs: vm_obj %p idx %jd valid %x pager error %d\n",
tobj, idx, m->valid, rv);
vm_page_lock(m);
vm_page_free(m);
vm_page_unlock(m);
@ -463,127 +483,38 @@ tmpfs_nocacheread(vm_object_t tobj, vm_pindex_t idx,
} else
vm_page_zero_invalid(m, TRUE);
}
vm_page_lock(m);
vm_page_hold(m);
vm_page_wakeup(m);
vm_page_unlock(m);
VM_OBJECT_WUNLOCK(tobj);
error = uiomove_fromphys(&m, offset, tlen, uio);
VM_OBJECT_WLOCK(tobj);
vm_page_lock(m);
vm_page_unwire(m, TRUE);
vm_page_unhold(m);
vm_page_deactivate(m);
/* Requeue to maintain LRU ordering. */
vm_page_requeue(m);
vm_page_unlock(m);
vm_page_wakeup(m);
VM_OBJECT_WUNLOCK(tobj);
return (error);
}
static __inline int
tmpfs_nocacheread_buf(vm_object_t tobj, vm_pindex_t idx,
vm_offset_t offset, size_t tlen, void *buf)
{
struct uio uio;
struct iovec iov;
uio.uio_iovcnt = 1;
uio.uio_iov = &iov;
iov.iov_base = buf;
iov.iov_len = tlen;
uio.uio_offset = 0;
uio.uio_resid = tlen;
uio.uio_rw = UIO_READ;
uio.uio_segflg = UIO_SYSSPACE;
uio.uio_td = curthread;
return (tmpfs_nocacheread(tobj, idx, offset, tlen, &uio));
}
static int
tmpfs_mappedread(vm_object_t vobj, vm_object_t tobj, size_t len, struct uio *uio)
{
struct sf_buf *sf;
vm_pindex_t idx;
vm_page_t m;
vm_offset_t offset;
off_t addr;
size_t tlen;
char *ma;
int error;
addr = uio->uio_offset;
idx = OFF_TO_IDX(addr);
offset = addr & PAGE_MASK;
tlen = MIN(PAGE_SIZE - offset, len);
VM_OBJECT_WLOCK(vobj);
lookupvpg:
if (((m = vm_page_lookup(vobj, idx)) != NULL) &&
vm_page_is_valid(m, offset, tlen)) {
if ((m->oflags & VPO_BUSY) != 0) {
/*
* Reference the page before unlocking and sleeping so
* that the page daemon is less likely to reclaim it.
*/
vm_page_reference(m);
vm_page_sleep(m, "tmfsmr");
goto lookupvpg;
}
vm_page_busy(m);
VM_OBJECT_WUNLOCK(vobj);
error = uiomove_fromphys(&m, offset, tlen, uio);
VM_OBJECT_WLOCK(vobj);
vm_page_wakeup(m);
VM_OBJECT_WUNLOCK(vobj);
return (error);
} else if (m != NULL && uio->uio_segflg == UIO_NOCOPY) {
KASSERT(offset == 0,
("unexpected offset in tmpfs_mappedread for sendfile"));
if ((m->oflags & VPO_BUSY) != 0) {
/*
* Reference the page before unlocking and sleeping so
* that the page daemon is less likely to reclaim it.
*/
vm_page_reference(m);
vm_page_sleep(m, "tmfsmr");
goto lookupvpg;
}
vm_page_busy(m);
VM_OBJECT_WUNLOCK(vobj);
sched_pin();
sf = sf_buf_alloc(m, SFB_CPUPRIVATE);
ma = (char *)sf_buf_kva(sf);
error = tmpfs_nocacheread_buf(tobj, idx, 0, tlen, ma);
if (error == 0) {
if (tlen != PAGE_SIZE)
bzero(ma + tlen, PAGE_SIZE - tlen);
uio->uio_offset += tlen;
uio->uio_resid -= tlen;
}
sf_buf_free(sf);
sched_unpin();
VM_OBJECT_WLOCK(vobj);
if (error == 0)
m->valid = VM_PAGE_BITS_ALL;
vm_page_wakeup(m);
VM_OBJECT_WUNLOCK(vobj);
return (error);
}
VM_OBJECT_WUNLOCK(vobj);
error = tmpfs_nocacheread(tobj, idx, offset, tlen, uio);
return (error);
}
static int
tmpfs_read(struct vop_read_args *v)
{
struct vnode *vp = v->a_vp;
struct uio *uio = v->a_uio;
struct tmpfs_node *node;
vm_object_t uobj;
size_t len;
int resid;
int error = 0;
vm_pindex_t idx;
vm_offset_t offset;
off_t addr;
size_t tlen;
node = VP_TO_TMPFS_NODE(vp);
@ -607,7 +538,11 @@ tmpfs_read(struct vop_read_args *v)
len = MIN(node->tn_size - uio->uio_offset, resid);
if (len == 0)
break;
error = tmpfs_mappedread(vp->v_object, uobj, len, uio);
addr = uio->uio_offset;
idx = OFF_TO_IDX(addr);
offset = addr & PAGE_MASK;
tlen = MIN(PAGE_SIZE - offset, len);
error = tmpfs_nocacheread(uobj, idx, offset, tlen, uio);
if ((error != 0) || (resid == uio->uio_resid))
break;
}
@ -620,10 +555,10 @@ tmpfs_read(struct vop_read_args *v)
/* --------------------------------------------------------------------- */
static int
tmpfs_mappedwrite(vm_object_t vobj, vm_object_t tobj, size_t len, struct uio *uio)
tmpfs_mappedwrite(vm_object_t tobj, size_t len, struct uio *uio)
{
vm_pindex_t idx;
vm_page_t vpg, tpg;
vm_page_t tpg;
vm_offset_t offset;
off_t addr;
size_t tlen;
@ -636,69 +571,47 @@ tmpfs_mappedwrite(vm_object_t vobj, vm_object_t tobj, size_t len, struct uio *ui
offset = addr & PAGE_MASK;
tlen = MIN(PAGE_SIZE - offset, len);
VM_OBJECT_WLOCK(vobj);
lookupvpg:
if (((vpg = vm_page_lookup(vobj, idx)) != NULL) &&
vm_page_is_valid(vpg, offset, tlen)) {
if ((vpg->oflags & VPO_BUSY) != 0) {
/*
* Reference the page before unlocking and sleeping so
* that the page daemon is less likely to reclaim it.
*/
vm_page_reference(vpg);
vm_page_sleep(vpg, "tmfsmw");
goto lookupvpg;
}
vm_page_busy(vpg);
vm_page_undirty(vpg);
VM_OBJECT_WUNLOCK(vobj);
error = uiomove_fromphys(&vpg, offset, tlen, uio);
} else {
if (vm_page_is_cached(vobj, idx))
vm_page_cache_free(vobj, idx, idx + 1);
VM_OBJECT_WUNLOCK(vobj);
vpg = NULL;
}
VM_OBJECT_WLOCK(tobj);
tpg = vm_page_grab(tobj, idx, VM_ALLOC_WIRED |
VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
tpg = vm_page_grab(tobj, idx, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
if (tpg->valid != VM_PAGE_BITS_ALL) {
if (vm_pager_has_page(tobj, idx, NULL, NULL)) {
rv = vm_pager_get_pages(tobj, &tpg, 1, 0);
tpg = vm_page_lookup(tobj, idx);
if (tpg == NULL) {
printf(
"tmpfs: vm_obj %p idx %jd null lookup rv %d\n",
tobj, idx, rv);
return (EIO);
}
if (rv != VM_PAGER_OK) {
printf(
"tmpfs: vm_obj %p idx %jd valid %x pager error %d\n",
tobj, idx, tpg->valid, rv);
vm_page_lock(tpg);
vm_page_free(tpg);
vm_page_unlock(tpg);
error = EIO;
goto out;
VM_OBJECT_WUNLOCK(tobj);
return (EIO);
}
} else
vm_page_zero_invalid(tpg, TRUE);
}
VM_OBJECT_WUNLOCK(tobj);
if (vpg == NULL)
error = uiomove_fromphys(&tpg, offset, tlen, uio);
else {
KASSERT(vpg->valid == VM_PAGE_BITS_ALL, ("parts of vpg invalid"));
pmap_copy_page(vpg, tpg);
}
VM_OBJECT_WLOCK(tobj);
if (error == 0) {
KASSERT(tpg->valid == VM_PAGE_BITS_ALL,
("parts of tpg invalid"));
vm_page_dirty(tpg);
}
vm_page_lock(tpg);
vm_page_unwire(tpg, TRUE);
vm_page_unlock(tpg);
vm_page_hold(tpg);
vm_page_wakeup(tpg);
out:
vm_page_unlock(tpg);
VM_OBJECT_WUNLOCK(tobj);
error = uiomove_fromphys(&tpg, offset, tlen, uio);
VM_OBJECT_WLOCK(tobj);
if (error == 0)
vm_page_dirty(tpg);
vm_page_lock(tpg);
vm_page_unhold(tpg);
vm_page_deactivate(tpg);
/* Requeue to maintain LRU ordering. */
vm_page_requeue(tpg);
vm_page_unlock(tpg);
VM_OBJECT_WUNLOCK(tobj);
if (vpg != NULL) {
VM_OBJECT_WLOCK(vobj);
vm_page_wakeup(vpg);
VM_OBJECT_WUNLOCK(vobj);
}
return (error);
}
@ -756,7 +669,7 @@ tmpfs_write(struct vop_write_args *v)
len = MIN(node->tn_size - uio->uio_offset, resid);
if (len == 0)
break;
error = tmpfs_mappedwrite(vp->v_object, uobj, len, uio);
error = tmpfs_mappedwrite(uobj, len, uio);
if ((error != 0) || (resid == uio->uio_resid))
break;
}
@ -1536,8 +1449,6 @@ tmpfs_inactive(struct vop_inactive_args *v)
struct tmpfs_node *node;
MPASS(VOP_ISLOCKED(vp));
node = VP_TO_TMPFS_NODE(vp);
if (node->tn_links == 0)
@ -1555,11 +1466,24 @@ tmpfs_reclaim(struct vop_reclaim_args *v)
struct tmpfs_mount *tmp;
struct tmpfs_node *node;
vm_object_t obj;
node = VP_TO_TMPFS_NODE(vp);
tmp = VFS_TO_TMPFS(vp->v_mount);
vnode_destroy_vobject(vp);
if (node->tn_type == VREG) {
obj = node->tn_reg.tn_aobj;
if (obj != NULL) {
/* Instead of vnode_destroy_vobject() */
VM_OBJECT_WLOCK(obj);
VI_LOCK(vp);
vm_object_clear_flag(obj, OBJ_TMPFS);
obj->un_pager.swp.swp_tmpfs = NULL;
VI_UNLOCK(vp);
VM_OBJECT_WUNLOCK(obj);
}
}
vp->v_object = NULL;
cache_purge(vp);
TMPFS_NODE_LOCK(node);

View File

@ -505,6 +505,7 @@ void
vm_object_deallocate(vm_object_t object)
{
vm_object_t temp;
struct vnode *vp;
while (object != NULL) {
VM_OBJECT_WLOCK(object);
@ -527,15 +528,36 @@ vm_object_deallocate(vm_object_t object)
VM_OBJECT_WUNLOCK(object);
return;
} else if (object->ref_count == 1) {
if (object->type == OBJT_SWAP &&
(object->flags & OBJ_TMPFS) != 0) {
vp = object->un_pager.swp.swp_tmpfs;
vhold(vp);
VM_OBJECT_WUNLOCK(object);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
vdrop(vp);
VM_OBJECT_WLOCK(object);
if (object->type == OBJT_DEAD) {
VM_OBJECT_WUNLOCK(object);
VOP_UNLOCK(vp, 0);
return;
} else if ((object->flags & OBJ_TMPFS) != 0) {
if (object->ref_count == 1)
VOP_UNSET_TEXT(vp);
VOP_UNLOCK(vp, 0);
}
}
if (object->shadow_count == 0 &&
object->handle == NULL &&
(object->type == OBJT_DEFAULT ||
object->type == OBJT_SWAP)) {
(object->type == OBJT_SWAP &&
(object->flags & OBJ_TMPFS) == 0))) {
vm_object_set_flag(object, OBJ_ONEMAPPING);
} else if ((object->shadow_count == 1) &&
(object->handle == NULL) &&
(object->type == OBJT_DEFAULT ||
object->type == OBJT_SWAP)) {
KASSERT((object->flags & OBJ_TMPFS) == 0,
("Shadowed tmpfs v_object"));
vm_object_t robject;
robject = LIST_FIRST(&object->shadow_head);

View File

@ -154,11 +154,21 @@ struct vm_object {
/*
* Swap pager
*
* swp_tmpfs - back-pointer to the tmpfs vnode,
* if any, which uses the vm object
* as backing store. The handle
* cannot be reused for linking,
* because the vnode can be
* reclaimed and recreated, making
* the handle changed and hash-chain
* invalid.
*
* swp_bcount - number of swap 'swblock' metablocks, each
* contains up to 16 swapblk assignments.
* see vm/swap_pager.h
*/
struct {
void *swp_tmpfs;
int swp_bcount;
} swp;
} un_pager;
@ -179,6 +189,7 @@ struct vm_object {
#define OBJ_COLORED 0x1000 /* pg_color is defined */
#define OBJ_ONEMAPPING 0x2000 /* One USE (a single, non-forked) mapping flag */
#define OBJ_DISCONNECTWNT 0x4000 /* disconnect from vnode wanted */
#define OBJ_TMPFS 0x8000
#define IDX_TO_OFF(idx) (((vm_ooffset_t)(idx)) << PAGE_SHIFT)
#define OFF_TO_IDX(off) ((vm_pindex_t)(((vm_ooffset_t)(off)) >> PAGE_SHIFT))