2007-08-10 11:00:30 +00:00
|
|
|
/* $NetBSD: tmpfs_subr.c,v 1.35 2007/07/09 21:10:50 ad Exp $ */
|
2007-06-16 01:56:05 +00:00
|
|
|
|
2008-09-03 18:53:48 +00:00
|
|
|
/*-
|
2017-11-27 15:15:37 +00:00
|
|
|
* SPDX-License-Identifier: BSD-2-Clause-NetBSD
|
|
|
|
*
|
2007-06-16 01:56:05 +00:00
|
|
|
* Copyright (c) 2005 The NetBSD Foundation, Inc.
|
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* This code is derived from software contributed to The NetBSD Foundation
|
|
|
|
* by Julio M. Merino Vidal, developed as part of Google's Summer of Code
|
|
|
|
* 2005 program.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
|
|
|
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
|
|
|
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
|
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
|
|
|
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Efficient memory file system supporting functions.
|
|
|
|
*/
|
|
|
|
#include <sys/cdefs.h>
|
|
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
|
|
|
|
#include <sys/param.h>
|
2018-11-23 22:24:59 +00:00
|
|
|
#include <sys/systm.h>
|
2017-12-06 00:42:08 +00:00
|
|
|
#include <sys/dirent.h>
|
2013-01-06 22:15:44 +00:00
|
|
|
#include <sys/fnv_hash.h>
|
2013-02-20 10:38:34 +00:00
|
|
|
#include <sys/lock.h>
|
2017-12-06 00:42:08 +00:00
|
|
|
#include <sys/limits.h>
|
|
|
|
#include <sys/mount.h>
|
2007-06-16 01:56:05 +00:00
|
|
|
#include <sys/namei.h>
|
|
|
|
#include <sys/priv.h>
|
|
|
|
#include <sys/proc.h>
|
Huge cleanup of random(4) code.
* GENERAL
- Update copyright.
- Make kernel options for RANDOM_YARROW and RANDOM_DUMMY. Set
neither to ON, which means we want Fortuna
- If there is no 'device random' in the kernel, there will be NO
random(4) device in the kernel, and the KERN_ARND sysctl will
return nothing. With RANDOM_DUMMY there will be a random(4) that
always blocks.
- Repair kern.arandom (KERN_ARND sysctl). The old version went
through arc4random(9) and was a bit weird.
- Adjust arc4random stirring a bit - the existing code looks a little
suspect.
- Fix the nasty pre- and post-read overloading by providing explictit
functions to do these tasks.
- Redo read_random(9) so as to duplicate random(4)'s read internals.
This makes it a first-class citizen rather than a hack.
- Move stuff out of locked regions when it does not need to be
there.
- Trim RANDOM_DEBUG printfs. Some are excess to requirement, some
behind boot verbose.
- Use SYSINIT to sequence the startup.
- Fix init/deinit sysctl stuff.
- Make relevant sysctls also tunables.
- Add different harvesting "styles" to allow for different requirements
(direct, queue, fast).
- Add harvesting of FFS atime events. This needs to be checked for
weighing down the FS code.
- Add harvesting of slab allocator events. This needs to be checked for
weighing down the allocator code.
- Fix the random(9) manpage.
- Loadable modules are not present for now. These will be re-engineered
when the dust settles.
- Use macros for locks.
- Fix comments.
* src/share/man/...
- Update the man pages.
* src/etc/...
- The startup/shutdown work is done in D2924.
* src/UPDATING
- Add UPDATING announcement.
* src/sys/dev/random/build.sh
- Add copyright.
- Add libz for unit tests.
* src/sys/dev/random/dummy.c
- Remove; no longer needed. Functionality incorporated into randomdev.*.
* live_entropy_sources.c live_entropy_sources.h
- Remove; content moved.
- move content to randomdev.[ch] and optimise.
* src/sys/dev/random/random_adaptors.c src/sys/dev/random/random_adaptors.h
- Remove; plugability is no longer used. Compile-time algorithm
selection is the way to go.
* src/sys/dev/random/random_harvestq.c src/sys/dev/random/random_harvestq.h
- Add early (re)boot-time randomness caching.
* src/sys/dev/random/randomdev_soft.c src/sys/dev/random/randomdev_soft.h
- Remove; no longer needed.
* src/sys/dev/random/uint128.h
- Provide a fake uint128_t; if a real one ever arrived, we can use
that instead. All that is needed here is N=0, N++, N==0, and some
localised trickery is used to manufacture a 128-bit 0ULLL.
* src/sys/dev/random/unit_test.c src/sys/dev/random/unit_test.h
- Improve unit tests; previously the testing human needed clairvoyance;
now the test will do a basic check of compressibility. Clairvoyant
talent is still a good idea.
- This is still a long way off a proper unit test.
* src/sys/dev/random/fortuna.c src/sys/dev/random/fortuna.h
- Improve messy union to just uint128_t.
- Remove unneeded 'static struct fortuna_start_cache'.
- Tighten up up arithmetic.
- Provide a method to allow eternal junk to be introduced; harden
it against blatant by compress/hashing.
- Assert that locks are held correctly.
- Fix the nasty pre- and post-read overloading by providing explictit
functions to do these tasks.
- Turn into self-sufficient module (no longer requires randomdev_soft.[ch])
* src/sys/dev/random/yarrow.c src/sys/dev/random/yarrow.h
- Improve messy union to just uint128_t.
- Remove unneeded 'staic struct start_cache'.
- Tighten up up arithmetic.
- Provide a method to allow eternal junk to be introduced; harden
it against blatant by compress/hashing.
- Assert that locks are held correctly.
- Fix the nasty pre- and post-read overloading by providing explictit
functions to do these tasks.
- Turn into self-sufficient module (no longer requires randomdev_soft.[ch])
- Fix some magic numbers elsewhere used as FAST and SLOW.
Differential Revision: https://reviews.freebsd.org/D2025
Reviewed by: vsevolod,delphij,rwatson,trasz,jmg
Approved by: so (delphij)
2015-06-30 17:00:45 +00:00
|
|
|
#include <sys/random.h>
|
2013-02-20 10:38:34 +00:00
|
|
|
#include <sys/rwlock.h>
|
2007-06-16 01:56:05 +00:00
|
|
|
#include <sys/stat.h>
|
2012-03-14 09:15:50 +00:00
|
|
|
#include <sys/sysctl.h>
|
2007-06-16 01:56:05 +00:00
|
|
|
#include <sys/vnode.h>
|
|
|
|
#include <sys/vmmeter.h>
|
|
|
|
|
|
|
|
#include <vm/vm.h>
|
2012-08-05 14:11:42 +00:00
|
|
|
#include <vm/vm_param.h>
|
2007-06-16 01:56:05 +00:00
|
|
|
#include <vm/vm_object.h>
|
|
|
|
#include <vm/vm_page.h>
|
Correct an error of omission in the implementation of the truncation
operation on POSIX shared memory objects and tmpfs. Previously, neither of
these modules correctly handled the case in which the new size of the object
or file was not a multiple of the page size. Specifically, they did not
handle partial page truncation of data stored on swap. As a result, stale
data might later be returned to an application.
Interestingly, a data inconsistency was less likely to occur under tmpfs
than POSIX shared memory objects. The reason being that a different mistake
by the tmpfs truncation operation helped avoid a data inconsistency. If the
data was still resident in memory in a PG_CACHED page, then the tmpfs
truncation operation would reactivate that page, zero the truncated portion,
and leave the page pinned in memory. More precisely, the benevolent error
was that the truncation operation didn't add the reactivated page to any of
the paging queues, effectively pinning the page. This page would remain
pinned until the file was destroyed or the page was read or written. With
this change, the page is now added to the inactive queue.
Discussed with: jhb
Reviewed by: kib (an earlier version)
MFC after: 3 weeks
2012-01-08 20:09:26 +00:00
|
|
|
#include <vm/vm_pageout.h>
|
2007-06-16 01:56:05 +00:00
|
|
|
#include <vm/vm_pager.h>
|
|
|
|
#include <vm/vm_extern.h>
|
2017-12-06 00:42:08 +00:00
|
|
|
#include <vm/swap_pager.h>
|
2007-06-16 01:56:05 +00:00
|
|
|
|
|
|
|
#include <fs/tmpfs/tmpfs.h>
|
|
|
|
#include <fs/tmpfs/tmpfs_fifoops.h>
|
|
|
|
#include <fs/tmpfs/tmpfs_vnops.h>
|
|
|
|
|
2012-03-14 09:15:50 +00:00
|
|
|
SYSCTL_NODE(_vfs, OID_AUTO, tmpfs, CTLFLAG_RW, 0, "tmpfs file system");
|
|
|
|
|
2012-04-07 15:23:51 +00:00
|
|
|
static long tmpfs_pages_reserved = TMPFS_PAGES_MINRESERVED;
|
|
|
|
|
2019-12-05 00:03:17 +00:00
|
|
|
static uma_zone_t tmpfs_dirent_pool;
|
|
|
|
static uma_zone_t tmpfs_node_pool;
|
|
|
|
|
|
|
|
static int
|
|
|
|
tmpfs_node_ctor(void *mem, int size, void *arg, int flags)
|
|
|
|
{
|
|
|
|
struct tmpfs_node *node;
|
|
|
|
|
|
|
|
node = mem;
|
|
|
|
node->tn_gen++;
|
|
|
|
node->tn_size = 0;
|
|
|
|
node->tn_status = 0;
|
|
|
|
node->tn_flags = 0;
|
|
|
|
node->tn_links = 0;
|
|
|
|
node->tn_vnode = NULL;
|
|
|
|
node->tn_vpstate = 0;
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
tmpfs_node_dtor(void *mem, int size, void *arg)
|
|
|
|
{
|
|
|
|
struct tmpfs_node *node;
|
|
|
|
|
|
|
|
node = mem;
|
|
|
|
node->tn_type = VNON;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
tmpfs_node_init(void *mem, int size, int flags)
|
|
|
|
{
|
|
|
|
struct tmpfs_node *node;
|
|
|
|
|
|
|
|
node = mem;
|
|
|
|
node->tn_id = 0;
|
|
|
|
mtx_init(&node->tn_interlock, "tmpfsni", NULL, MTX_DEF);
|
|
|
|
node->tn_gen = arc4random();
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
tmpfs_node_fini(void *mem, int size)
|
|
|
|
{
|
|
|
|
struct tmpfs_node *node;
|
|
|
|
|
|
|
|
node = mem;
|
|
|
|
mtx_destroy(&node->tn_interlock);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
tmpfs_subr_init(void)
|
|
|
|
{
|
|
|
|
tmpfs_dirent_pool = uma_zcreate("TMPFS dirent",
|
|
|
|
sizeof(struct tmpfs_dirent), NULL, NULL, NULL, NULL,
|
|
|
|
UMA_ALIGN_PTR, 0);
|
|
|
|
tmpfs_node_pool = uma_zcreate("TMPFS node",
|
|
|
|
sizeof(struct tmpfs_node), tmpfs_node_ctor, tmpfs_node_dtor,
|
|
|
|
tmpfs_node_init, tmpfs_node_fini, UMA_ALIGN_PTR, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
tmpfs_subr_uninit(void)
|
|
|
|
{
|
|
|
|
uma_zdestroy(tmpfs_node_pool);
|
|
|
|
uma_zdestroy(tmpfs_dirent_pool);
|
|
|
|
}
|
|
|
|
|
2012-04-07 15:23:51 +00:00
|
|
|
static int
|
|
|
|
sysctl_mem_reserved(SYSCTL_HANDLER_ARGS)
|
|
|
|
{
|
|
|
|
int error;
|
|
|
|
long pages, bytes;
|
|
|
|
|
|
|
|
pages = *(long *)arg1;
|
|
|
|
bytes = pages * PAGE_SIZE;
|
|
|
|
|
|
|
|
error = sysctl_handle_long(oidp, &bytes, 0, req);
|
|
|
|
if (error || !req->newptr)
|
|
|
|
return (error);
|
|
|
|
|
|
|
|
pages = bytes / PAGE_SIZE;
|
|
|
|
if (pages < TMPFS_PAGES_MINRESERVED)
|
|
|
|
return (EINVAL);
|
|
|
|
|
|
|
|
*(long *)arg1 = pages;
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
SYSCTL_PROC(_vfs_tmpfs, OID_AUTO, memory_reserved, CTLTYPE_LONG|CTLFLAG_RW,
|
2012-04-15 21:59:28 +00:00
|
|
|
&tmpfs_pages_reserved, 0, sysctl_mem_reserved, "L",
|
|
|
|
"Amount of available memory and swap below which tmpfs growth stops");
|
2012-04-07 15:23:51 +00:00
|
|
|
|
2013-01-06 22:15:44 +00:00
|
|
|
static __inline int tmpfs_dirtree_cmp(struct tmpfs_dirent *a,
|
|
|
|
struct tmpfs_dirent *b);
|
|
|
|
RB_PROTOTYPE_STATIC(tmpfs_dir, tmpfs_dirent, uh.td_entries, tmpfs_dirtree_cmp);
|
|
|
|
|
2012-04-07 15:23:51 +00:00
|
|
|
size_t
|
|
|
|
tmpfs_mem_avail(void)
|
|
|
|
{
|
|
|
|
vm_ooffset_t avail;
|
|
|
|
|
2018-02-06 22:10:07 +00:00
|
|
|
avail = swap_pager_avail + vm_free_count() - tmpfs_pages_reserved;
|
2012-04-07 15:23:51 +00:00
|
|
|
if (__predict_false(avail < 0))
|
|
|
|
avail = 0;
|
|
|
|
return (avail);
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t
|
|
|
|
tmpfs_pages_used(struct tmpfs_mount *tmp)
|
|
|
|
{
|
|
|
|
const size_t node_size = sizeof(struct tmpfs_node) +
|
|
|
|
sizeof(struct tmpfs_dirent);
|
|
|
|
size_t meta_pages;
|
|
|
|
|
|
|
|
meta_pages = howmany((uintmax_t)tmp->tm_nodes_inuse * node_size,
|
|
|
|
PAGE_SIZE);
|
|
|
|
return (meta_pages + tmp->tm_pages_used);
|
|
|
|
}
|
|
|
|
|
|
|
|
static size_t
|
|
|
|
tmpfs_pages_check_avail(struct tmpfs_mount *tmp, size_t req_pages)
|
|
|
|
{
|
|
|
|
if (tmpfs_mem_avail() < req_pages)
|
|
|
|
return (0);
|
|
|
|
|
2017-01-14 06:20:36 +00:00
|
|
|
if (tmp->tm_pages_max != ULONG_MAX &&
|
2012-04-07 15:23:51 +00:00
|
|
|
tmp->tm_pages_max < req_pages + tmpfs_pages_used(tmp))
|
|
|
|
return (0);
|
|
|
|
|
|
|
|
return (1);
|
|
|
|
}
|
|
|
|
|
2017-01-19 19:15:21 +00:00
|
|
|
void
|
|
|
|
tmpfs_ref_node(struct tmpfs_node *node)
|
|
|
|
{
|
|
|
|
|
|
|
|
TMPFS_NODE_LOCK(node);
|
|
|
|
tmpfs_ref_node_locked(node);
|
|
|
|
TMPFS_NODE_UNLOCK(node);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
tmpfs_ref_node_locked(struct tmpfs_node *node)
|
|
|
|
{
|
|
|
|
|
|
|
|
TMPFS_NODE_ASSERT_LOCKED(node);
|
|
|
|
KASSERT(node->tn_refcount > 0, ("node %p zero refcount", node));
|
|
|
|
KASSERT(node->tn_refcount < UINT_MAX, ("node %p refcount %u", node,
|
|
|
|
node->tn_refcount));
|
|
|
|
node->tn_refcount++;
|
|
|
|
}
|
|
|
|
|
2007-06-16 01:56:05 +00:00
|
|
|
/*
|
|
|
|
* Allocates a new node of type 'type' inside the 'tmp' mount point, with
|
|
|
|
* its owner set to 'uid', its group to 'gid' and its mode set to 'mode',
|
|
|
|
* using the credentials of the process 'p'.
|
|
|
|
*
|
|
|
|
* If the node type is set to 'VDIR', then the parent parameter must point
|
|
|
|
* to the parent directory of the node being created. It may only be NULL
|
|
|
|
* while allocating the root node.
|
|
|
|
*
|
|
|
|
* If the node type is set to 'VBLK' or 'VCHR', then the rdev parameter
|
|
|
|
* specifies the device the node represents.
|
|
|
|
*
|
|
|
|
* If the node type is set to 'VLNK', then the parameter target specifies
|
|
|
|
* the file name of the target file for the symbolic link that is being
|
|
|
|
* created.
|
|
|
|
*
|
|
|
|
* Note that new nodes are retrieved from the available list if it has
|
|
|
|
* items or, if it is empty, from the node pool as long as there is enough
|
|
|
|
* space to create them.
|
|
|
|
*
|
|
|
|
* Returns zero on success or an appropriate error code on failure.
|
|
|
|
*/
|
|
|
|
int
|
2014-07-14 09:52:33 +00:00
|
|
|
tmpfs_alloc_node(struct mount *mp, struct tmpfs_mount *tmp, enum vtype type,
|
2007-06-16 01:56:05 +00:00
|
|
|
uid_t uid, gid_t gid, mode_t mode, struct tmpfs_node *parent,
|
2018-11-02 14:42:36 +00:00
|
|
|
const char *target, dev_t rdev, struct tmpfs_node **node)
|
2007-06-16 01:56:05 +00:00
|
|
|
{
|
|
|
|
struct tmpfs_node *nnode;
|
2013-04-28 19:38:59 +00:00
|
|
|
vm_object_t obj;
|
2007-06-16 01:56:05 +00:00
|
|
|
|
|
|
|
/* If the root directory of the 'tmp' file system is not yet
|
|
|
|
* allocated, this must be the request to do it. */
|
|
|
|
MPASS(IMPLIES(tmp->tm_root == NULL, parent == NULL && type == VDIR));
|
|
|
|
|
|
|
|
MPASS(IFF(type == VLNK, target != NULL));
|
|
|
|
MPASS(IFF(type == VBLK || type == VCHR, rdev != VNOVAL));
|
|
|
|
|
2010-01-20 16:56:20 +00:00
|
|
|
if (tmp->tm_nodes_inuse >= tmp->tm_nodes_max)
|
2007-06-25 18:46:13 +00:00
|
|
|
return (ENOSPC);
|
2012-04-07 15:23:51 +00:00
|
|
|
if (tmpfs_pages_check_avail(tmp, 1) == 0)
|
|
|
|
return (ENOSPC);
|
2007-06-16 01:56:05 +00:00
|
|
|
|
2014-07-14 09:52:33 +00:00
|
|
|
if ((mp->mnt_kern_flag & MNTK_UNMOUNT) != 0) {
|
|
|
|
/*
|
|
|
|
* When a new tmpfs node is created for fully
|
|
|
|
* constructed mount point, there must be a parent
|
|
|
|
* node, which vnode is locked exclusively. As
|
|
|
|
* consequence, if the unmount is executing in
|
|
|
|
* parallel, vflush() cannot reclaim the parent vnode.
|
|
|
|
* Due to this, the check for MNTK_UNMOUNT flag is not
|
|
|
|
* racy: if we did not see MNTK_UNMOUNT flag, then tmp
|
|
|
|
* cannot be destroyed until node construction is
|
|
|
|
* finished and the parent vnode unlocked.
|
|
|
|
*
|
|
|
|
* Tmpfs does not need to instantiate new nodes during
|
|
|
|
* unmount.
|
|
|
|
*/
|
|
|
|
return (EBUSY);
|
|
|
|
}
|
2019-04-02 13:41:26 +00:00
|
|
|
if ((mp->mnt_kern_flag & MNT_RDONLY) != 0)
|
|
|
|
return (EROFS);
|
2014-07-14 09:52:33 +00:00
|
|
|
|
2019-12-05 00:03:17 +00:00
|
|
|
nnode = uma_zalloc_arg(tmpfs_node_pool, tmp, M_WAITOK);
|
2007-06-16 01:56:05 +00:00
|
|
|
|
|
|
|
/* Generic initialization. */
|
|
|
|
nnode->tn_type = type;
|
2007-06-28 02:34:32 +00:00
|
|
|
vfs_timestamp(&nnode->tn_atime);
|
2007-06-16 01:56:05 +00:00
|
|
|
nnode->tn_birthtime = nnode->tn_ctime = nnode->tn_mtime =
|
|
|
|
nnode->tn_atime;
|
|
|
|
nnode->tn_uid = uid;
|
|
|
|
nnode->tn_gid = gid;
|
|
|
|
nnode->tn_mode = mode;
|
2018-11-20 15:14:30 +00:00
|
|
|
nnode->tn_id = alloc_unr64(&tmp->tm_ino_unr);
|
2017-01-19 19:15:21 +00:00
|
|
|
nnode->tn_refcount = 1;
|
2007-06-28 02:39:31 +00:00
|
|
|
|
2007-06-16 01:56:05 +00:00
|
|
|
/* Type-specific initialization. */
|
|
|
|
switch (nnode->tn_type) {
|
|
|
|
case VBLK:
|
|
|
|
case VCHR:
|
|
|
|
nnode->tn_rdev = rdev;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case VDIR:
|
2013-01-06 22:15:44 +00:00
|
|
|
RB_INIT(&nnode->tn_dir.tn_dirhead);
|
|
|
|
LIST_INIT(&nnode->tn_dir.tn_dupindex);
|
2007-11-18 04:52:40 +00:00
|
|
|
MPASS(parent != nnode);
|
|
|
|
MPASS(IMPLIES(parent == NULL, tmp->tm_root == NULL));
|
2007-06-16 01:56:05 +00:00
|
|
|
nnode->tn_dir.tn_parent = (parent == NULL) ? nnode : parent;
|
|
|
|
nnode->tn_dir.tn_readdir_lastn = 0;
|
|
|
|
nnode->tn_dir.tn_readdir_lastp = NULL;
|
|
|
|
nnode->tn_links++;
|
2009-10-11 07:03:56 +00:00
|
|
|
TMPFS_NODE_LOCK(nnode->tn_dir.tn_parent);
|
2007-06-16 01:56:05 +00:00
|
|
|
nnode->tn_dir.tn_parent->tn_links++;
|
2009-10-11 07:03:56 +00:00
|
|
|
TMPFS_NODE_UNLOCK(nnode->tn_dir.tn_parent);
|
2007-06-16 01:56:05 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case VFIFO:
|
|
|
|
/* FALLTHROUGH */
|
|
|
|
case VSOCK:
|
|
|
|
break;
|
|
|
|
|
|
|
|
case VLNK:
|
|
|
|
MPASS(strlen(target) < MAXPATHLEN);
|
|
|
|
nnode->tn_size = strlen(target);
|
2007-06-29 05:23:15 +00:00
|
|
|
nnode->tn_link = malloc(nnode->tn_size, M_TMPFSNAME,
|
|
|
|
M_WAITOK);
|
2007-06-16 01:56:05 +00:00
|
|
|
memcpy(nnode->tn_link, target, nnode->tn_size);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case VREG:
|
2013-04-28 19:38:59 +00:00
|
|
|
obj = nnode->tn_reg.tn_aobj =
|
Implement global and per-uid accounting of the anonymous memory. Add
rlimit RLIMIT_SWAP that limits the amount of swap that may be reserved
for the uid.
The accounting information (charge) is associated with either map entry,
or vm object backing the entry, assuming the object is the first one
in the shadow chain and entry does not require COW. Charge is moved
from entry to object on allocation of the object, e.g. during the mmap,
assuming the object is allocated, or on the first page fault on the
entry. It moves back to the entry on forks due to COW setup.
The per-entry granularity of accounting makes the charge process fair
for processes that change uid during lifetime, and decrements charge
for proper uid when region is unmapped.
The interface of vm_pager_allocate(9) is extended by adding struct ucred *,
that is used to charge appropriate uid when allocation if performed by
kernel, e.g. md(4).
Several syscalls, among them is fork(2), may now return ENOMEM when
global or per-uid limits are enforced.
In collaboration with: pho
Reviewed by: alc
Approved by: re (kensmith)
2009-06-23 20:45:22 +00:00
|
|
|
vm_pager_allocate(OBJT_SWAP, NULL, 0, VM_PROT_DEFAULT, 0,
|
|
|
|
NULL /* XXXKIB - tmpfs needs swap reservation */);
|
2013-04-28 19:38:59 +00:00
|
|
|
VM_OBJECT_WLOCK(obj);
|
|
|
|
/* OBJ_TMPFS is set together with the setting of vp->v_object */
|
2019-11-19 23:19:43 +00:00
|
|
|
vm_object_set_flag(obj, OBJ_TMPFS_NODE);
|
2013-04-28 19:38:59 +00:00
|
|
|
VM_OBJECT_WUNLOCK(obj);
|
2007-06-16 01:56:05 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
2017-01-19 14:27:37 +00:00
|
|
|
panic("tmpfs_alloc_node: type %p %d", nnode,
|
|
|
|
(int)nnode->tn_type);
|
2007-06-16 01:56:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
TMPFS_LOCK(tmp);
|
|
|
|
LIST_INSERT_HEAD(&tmp->tm_nodes_used, nnode, tn_entries);
|
2017-01-19 19:15:21 +00:00
|
|
|
nnode->tn_attached = true;
|
2007-06-16 01:56:05 +00:00
|
|
|
tmp->tm_nodes_inuse++;
|
2017-01-19 19:15:21 +00:00
|
|
|
tmp->tm_refcount++;
|
2007-06-16 01:56:05 +00:00
|
|
|
TMPFS_UNLOCK(tmp);
|
|
|
|
|
|
|
|
*node = nnode;
|
2017-01-19 14:27:37 +00:00
|
|
|
return (0);
|
2007-06-16 01:56:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Destroys the node pointed to by node from the file system 'tmp'.
|
2017-01-19 14:27:37 +00:00
|
|
|
* If the node references a directory, no entries are allowed.
|
2007-06-16 01:56:05 +00:00
|
|
|
*/
|
|
|
|
void
|
|
|
|
tmpfs_free_node(struct tmpfs_mount *tmp, struct tmpfs_node *node)
|
2017-01-19 19:15:21 +00:00
|
|
|
{
|
|
|
|
|
|
|
|
TMPFS_LOCK(tmp);
|
|
|
|
TMPFS_NODE_LOCK(node);
|
|
|
|
if (!tmpfs_free_node_locked(tmp, node, false)) {
|
|
|
|
TMPFS_NODE_UNLOCK(node);
|
|
|
|
TMPFS_UNLOCK(tmp);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
bool
|
|
|
|
tmpfs_free_node_locked(struct tmpfs_mount *tmp, struct tmpfs_node *node,
|
|
|
|
bool detach)
|
2007-06-16 01:56:05 +00:00
|
|
|
{
|
2011-02-13 14:46:39 +00:00
|
|
|
vm_object_t uobj;
|
2007-06-16 01:56:05 +00:00
|
|
|
|
2017-01-19 19:15:21 +00:00
|
|
|
TMPFS_MP_ASSERT_LOCKED(tmp);
|
|
|
|
TMPFS_NODE_ASSERT_LOCKED(node);
|
|
|
|
KASSERT(node->tn_refcount > 0, ("node %p refcount zero", node));
|
|
|
|
|
|
|
|
node->tn_refcount--;
|
|
|
|
if (node->tn_attached && (detach || node->tn_refcount == 0)) {
|
|
|
|
MPASS(tmp->tm_nodes_inuse > 0);
|
|
|
|
tmp->tm_nodes_inuse--;
|
|
|
|
LIST_REMOVE(node, tn_entries);
|
|
|
|
node->tn_attached = false;
|
|
|
|
}
|
|
|
|
if (node->tn_refcount > 0)
|
|
|
|
return (false);
|
|
|
|
|
2007-08-10 05:24:49 +00:00
|
|
|
#ifdef INVARIANTS
|
|
|
|
MPASS(node->tn_vnode == NULL);
|
2009-10-11 07:03:56 +00:00
|
|
|
MPASS((node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0);
|
2007-08-10 05:24:49 +00:00
|
|
|
#endif
|
2017-01-19 19:15:21 +00:00
|
|
|
TMPFS_NODE_UNLOCK(node);
|
2007-06-16 01:56:05 +00:00
|
|
|
TMPFS_UNLOCK(tmp);
|
|
|
|
|
|
|
|
switch (node->tn_type) {
|
|
|
|
case VBLK:
|
|
|
|
/* FALLTHROUGH */
|
|
|
|
case VCHR:
|
|
|
|
/* FALLTHROUGH */
|
|
|
|
case VDIR:
|
|
|
|
/* FALLTHROUGH */
|
|
|
|
case VFIFO:
|
|
|
|
/* FALLTHROUGH */
|
|
|
|
case VSOCK:
|
|
|
|
break;
|
|
|
|
|
|
|
|
case VLNK:
|
2007-06-29 05:23:15 +00:00
|
|
|
free(node->tn_link, M_TMPFSNAME);
|
2007-06-16 01:56:05 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case VREG:
|
2011-02-13 14:46:39 +00:00
|
|
|
uobj = node->tn_reg.tn_aobj;
|
|
|
|
if (uobj != NULL) {
|
2017-09-30 18:23:45 +00:00
|
|
|
if (uobj->size != 0)
|
|
|
|
atomic_subtract_long(&tmp->tm_pages_used, uobj->size);
|
2013-05-30 19:51:33 +00:00
|
|
|
KASSERT((uobj->flags & OBJ_TMPFS) == 0,
|
|
|
|
("leaked OBJ_TMPFS node %p vm_obj %p", node, uobj));
|
2011-02-13 14:46:39 +00:00
|
|
|
vm_object_deallocate(uobj);
|
|
|
|
}
|
2007-06-16 01:56:05 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
2007-12-07 00:00:21 +00:00
|
|
|
panic("tmpfs_free_node: type %p %d", node, (int)node->tn_type);
|
2007-06-16 01:56:05 +00:00
|
|
|
}
|
|
|
|
|
2019-12-05 00:03:17 +00:00
|
|
|
uma_zfree(tmpfs_node_pool, node);
|
2017-01-19 19:15:21 +00:00
|
|
|
TMPFS_LOCK(tmp);
|
|
|
|
tmpfs_free_tmp(tmp);
|
|
|
|
return (true);
|
2007-06-16 01:56:05 +00:00
|
|
|
}
|
|
|
|
|
2013-01-06 22:15:44 +00:00
|
|
|
static __inline uint32_t
|
|
|
|
tmpfs_dirent_hash(const char *name, u_int len)
|
|
|
|
{
|
|
|
|
uint32_t hash;
|
|
|
|
|
|
|
|
hash = fnv_32_buf(name, len, FNV1_32_INIT + len) & TMPFS_DIRCOOKIE_MASK;
|
|
|
|
#ifdef TMPFS_DEBUG_DIRCOOKIE_DUP
|
|
|
|
hash &= 0xf;
|
|
|
|
#endif
|
|
|
|
if (hash < TMPFS_DIRCOOKIE_MIN)
|
|
|
|
hash += TMPFS_DIRCOOKIE_MIN;
|
|
|
|
|
|
|
|
return (hash);
|
|
|
|
}
|
|
|
|
|
|
|
|
static __inline off_t
|
|
|
|
tmpfs_dirent_cookie(struct tmpfs_dirent *de)
|
|
|
|
{
|
2014-03-14 02:10:30 +00:00
|
|
|
if (de == NULL)
|
|
|
|
return (TMPFS_DIRCOOKIE_EOF);
|
|
|
|
|
2013-01-06 22:15:44 +00:00
|
|
|
MPASS(de->td_cookie >= TMPFS_DIRCOOKIE_MIN);
|
|
|
|
|
|
|
|
return (de->td_cookie);
|
|
|
|
}
|
|
|
|
|
|
|
|
static __inline boolean_t
|
|
|
|
tmpfs_dirent_dup(struct tmpfs_dirent *de)
|
|
|
|
{
|
|
|
|
return ((de->td_cookie & TMPFS_DIRCOOKIE_DUP) != 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static __inline boolean_t
|
|
|
|
tmpfs_dirent_duphead(struct tmpfs_dirent *de)
|
|
|
|
{
|
|
|
|
return ((de->td_cookie & TMPFS_DIRCOOKIE_DUPHEAD) != 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
tmpfs_dirent_init(struct tmpfs_dirent *de, const char *name, u_int namelen)
|
|
|
|
{
|
|
|
|
de->td_hash = de->td_cookie = tmpfs_dirent_hash(name, namelen);
|
|
|
|
memcpy(de->ud.td_name, name, namelen);
|
|
|
|
de->td_namelen = namelen;
|
|
|
|
}
|
|
|
|
|
2007-06-16 01:56:05 +00:00
|
|
|
/*
|
|
|
|
* Allocates a new directory entry for the node node with a name of name.
|
|
|
|
* The new directory entry is returned in *de.
|
|
|
|
*
|
|
|
|
* The link count of node is increased by one to reflect the new object
|
|
|
|
* referencing it.
|
|
|
|
*
|
|
|
|
* Returns zero on success or an appropriate error code on failure.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
tmpfs_alloc_dirent(struct tmpfs_mount *tmp, struct tmpfs_node *node,
|
2013-01-06 22:15:44 +00:00
|
|
|
const char *name, u_int len, struct tmpfs_dirent **de)
|
2007-06-16 01:56:05 +00:00
|
|
|
{
|
|
|
|
struct tmpfs_dirent *nde;
|
|
|
|
|
2019-12-05 00:03:17 +00:00
|
|
|
nde = uma_zalloc(tmpfs_dirent_pool, M_WAITOK);
|
2007-06-16 01:56:05 +00:00
|
|
|
nde->td_node = node;
|
2013-01-06 22:15:44 +00:00
|
|
|
if (name != NULL) {
|
|
|
|
nde->ud.td_name = malloc(len, M_TMPFSNAME, M_WAITOK);
|
|
|
|
tmpfs_dirent_init(nde, name, len);
|
|
|
|
} else
|
|
|
|
nde->td_namelen = 0;
|
2010-08-22 05:36:06 +00:00
|
|
|
if (node != NULL)
|
|
|
|
node->tn_links++;
|
2007-06-16 01:56:05 +00:00
|
|
|
|
|
|
|
*de = nde;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Frees a directory entry. It is the caller's responsibility to destroy
|
|
|
|
* the node referenced by it if needed.
|
|
|
|
*
|
|
|
|
* The link count of node is decreased by one to reflect the removal of an
|
|
|
|
* object that referenced it. This only happens if 'node_exists' is true;
|
|
|
|
* otherwise the function will not access the node referred to by the
|
|
|
|
* directory entry, as it may already have been released from the outside.
|
|
|
|
*/
|
|
|
|
void
|
2013-01-06 22:15:44 +00:00
|
|
|
tmpfs_free_dirent(struct tmpfs_mount *tmp, struct tmpfs_dirent *de)
|
2007-06-16 01:56:05 +00:00
|
|
|
{
|
2013-01-06 22:15:44 +00:00
|
|
|
struct tmpfs_node *node;
|
2007-06-16 01:56:05 +00:00
|
|
|
|
2013-01-06 22:15:44 +00:00
|
|
|
node = de->td_node;
|
|
|
|
if (node != NULL) {
|
|
|
|
MPASS(node->tn_links > 0);
|
|
|
|
node->tn_links--;
|
2007-06-16 01:56:05 +00:00
|
|
|
}
|
2013-01-06 22:15:44 +00:00
|
|
|
if (!tmpfs_dirent_duphead(de) && de->ud.td_name != NULL)
|
|
|
|
free(de->ud.td_name, M_TMPFSNAME);
|
2019-12-05 00:03:17 +00:00
|
|
|
uma_zfree(tmpfs_dirent_pool, de);
|
2007-06-16 01:56:05 +00:00
|
|
|
}
|
|
|
|
|
2013-05-02 18:44:31 +00:00
|
|
|
void
|
|
|
|
tmpfs_destroy_vobject(struct vnode *vp, vm_object_t obj)
|
|
|
|
{
|
|
|
|
|
2014-07-14 08:55:02 +00:00
|
|
|
ASSERT_VOP_ELOCKED(vp, "tmpfs_destroy_vobject");
|
2013-05-02 18:44:31 +00:00
|
|
|
if (vp->v_type != VREG || obj == NULL)
|
|
|
|
return;
|
|
|
|
|
|
|
|
VM_OBJECT_WLOCK(obj);
|
|
|
|
VI_LOCK(vp);
|
|
|
|
vm_object_clear_flag(obj, OBJ_TMPFS);
|
|
|
|
obj->un_pager.swp.swp_tmpfs = NULL;
|
2019-06-05 20:16:25 +00:00
|
|
|
if (vp->v_writecount < 0)
|
|
|
|
vp->v_writecount = 0;
|
2013-05-02 18:44:31 +00:00
|
|
|
VI_UNLOCK(vp);
|
|
|
|
VM_OBJECT_WUNLOCK(obj);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Need to clear v_object for insmntque failure.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
tmpfs_insmntque_dtr(struct vnode *vp, void *dtr_arg)
|
|
|
|
{
|
|
|
|
|
|
|
|
tmpfs_destroy_vobject(vp, vp->v_object);
|
|
|
|
vp->v_object = NULL;
|
|
|
|
vp->v_data = NULL;
|
|
|
|
vp->v_op = &dead_vnodeops;
|
|
|
|
vgone(vp);
|
|
|
|
vput(vp);
|
|
|
|
}
|
|
|
|
|
2007-06-16 01:56:05 +00:00
|
|
|
/*
|
|
|
|
* Allocates a new vnode for the node node or returns a new reference to
|
|
|
|
* an existing one if the node had already a vnode referencing it. The
|
|
|
|
* resulting locked vnode is returned in *vpp.
|
|
|
|
*
|
|
|
|
* Returns zero on success or an appropriate error code on failure.
|
|
|
|
*/
|
|
|
|
int
|
2007-08-10 05:24:49 +00:00
|
|
|
tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *node, int lkflag,
|
2009-05-11 15:33:26 +00:00
|
|
|
struct vnode **vpp)
|
2007-06-16 01:56:05 +00:00
|
|
|
{
|
|
|
|
struct vnode *vp;
|
2017-01-19 19:15:21 +00:00
|
|
|
struct tmpfs_mount *tm;
|
2013-04-28 19:38:59 +00:00
|
|
|
vm_object_t object;
|
|
|
|
int error;
|
2007-06-16 01:56:05 +00:00
|
|
|
|
2013-04-28 19:38:59 +00:00
|
|
|
error = 0;
|
2017-01-19 19:15:21 +00:00
|
|
|
tm = VFS_TO_TMPFS(mp);
|
2007-08-10 05:24:49 +00:00
|
|
|
TMPFS_NODE_LOCK(node);
|
2017-01-19 19:15:21 +00:00
|
|
|
tmpfs_ref_node_locked(node);
|
|
|
|
loop:
|
|
|
|
TMPFS_NODE_ASSERT_LOCKED(node);
|
2007-08-03 06:24:31 +00:00
|
|
|
if ((vp = node->tn_vnode) != NULL) {
|
2009-10-11 07:03:56 +00:00
|
|
|
MPASS((node->tn_vpstate & TMPFS_VNODE_DOOMED) == 0);
|
2007-08-10 05:24:49 +00:00
|
|
|
VI_LOCK(vp);
|
2013-08-05 18:53:59 +00:00
|
|
|
if ((node->tn_type == VDIR && node->tn_dir.tn_parent == NULL) ||
|
2019-12-08 21:30:04 +00:00
|
|
|
(VN_IS_DOOMED(vp) &&
|
|
|
|
(lkflag & LK_NOWAIT) != 0)) {
|
2013-08-05 18:53:59 +00:00
|
|
|
VI_UNLOCK(vp);
|
|
|
|
TMPFS_NODE_UNLOCK(node);
|
|
|
|
error = ENOENT;
|
|
|
|
vp = NULL;
|
|
|
|
goto out;
|
|
|
|
}
|
2019-12-08 21:30:04 +00:00
|
|
|
if (VN_IS_DOOMED(vp)) {
|
2013-08-05 18:53:59 +00:00
|
|
|
VI_UNLOCK(vp);
|
|
|
|
node->tn_vpstate |= TMPFS_VNODE_WRECLAIM;
|
|
|
|
while ((node->tn_vpstate & TMPFS_VNODE_WRECLAIM) != 0) {
|
|
|
|
msleep(&node->tn_vnode, TMPFS_NODE_MTX(node),
|
|
|
|
0, "tmpfsE", 0);
|
|
|
|
}
|
2017-01-19 19:15:21 +00:00
|
|
|
goto loop;
|
2013-08-05 18:53:59 +00:00
|
|
|
}
|
2007-08-10 05:24:49 +00:00
|
|
|
TMPFS_NODE_UNLOCK(node);
|
2012-03-14 08:29:21 +00:00
|
|
|
error = vget(vp, lkflag | LK_INTERLOCK, curthread);
|
2017-01-19 19:15:21 +00:00
|
|
|
if (error == ENOENT) {
|
|
|
|
TMPFS_NODE_LOCK(node);
|
2013-08-05 18:53:59 +00:00
|
|
|
goto loop;
|
2017-01-19 19:15:21 +00:00
|
|
|
}
|
2012-03-14 08:29:21 +00:00
|
|
|
if (error != 0) {
|
|
|
|
vp = NULL;
|
|
|
|
goto out;
|
|
|
|
}
|
2007-06-28 02:39:31 +00:00
|
|
|
|
2007-06-16 01:56:05 +00:00
|
|
|
/*
|
|
|
|
* Make sure the vnode is still there after
|
|
|
|
* getting the interlock to avoid racing a free.
|
|
|
|
*/
|
|
|
|
if (node->tn_vnode == NULL || node->tn_vnode != vp) {
|
|
|
|
vput(vp);
|
2017-01-19 19:15:21 +00:00
|
|
|
TMPFS_NODE_LOCK(node);
|
2007-06-16 01:56:05 +00:00
|
|
|
goto loop;
|
|
|
|
}
|
|
|
|
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2009-10-11 07:03:56 +00:00
|
|
|
if ((node->tn_vpstate & TMPFS_VNODE_DOOMED) ||
|
|
|
|
(node->tn_type == VDIR && node->tn_dir.tn_parent == NULL)) {
|
|
|
|
TMPFS_NODE_UNLOCK(node);
|
|
|
|
error = ENOENT;
|
|
|
|
vp = NULL;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2007-06-16 01:56:05 +00:00
|
|
|
/*
|
|
|
|
* otherwise lock the vp list while we call getnewvnode
|
|
|
|
* since that can block.
|
|
|
|
*/
|
|
|
|
if (node->tn_vpstate & TMPFS_VNODE_ALLOCATING) {
|
|
|
|
node->tn_vpstate |= TMPFS_VNODE_WANT;
|
2007-08-03 06:24:31 +00:00
|
|
|
error = msleep((caddr_t) &node->tn_vpstate,
|
2017-01-19 19:15:21 +00:00
|
|
|
TMPFS_NODE_MTX(node), 0, "tmpfs_alloc_vp", 0);
|
|
|
|
if (error != 0)
|
|
|
|
goto out;
|
2007-08-03 06:24:31 +00:00
|
|
|
goto loop;
|
|
|
|
} else
|
|
|
|
node->tn_vpstate |= TMPFS_VNODE_ALLOCATING;
|
|
|
|
|
2007-06-16 01:56:05 +00:00
|
|
|
TMPFS_NODE_UNLOCK(node);
|
|
|
|
|
|
|
|
/* Get a new vnode and associate it with our node. */
|
2017-01-19 19:46:49 +00:00
|
|
|
error = getnewvnode("tmpfs", mp, VFS_TO_TMPFS(mp)->tm_nonc ?
|
|
|
|
&tmpfs_vnodeop_nonc_entries : &tmpfs_vnodeop_entries, &vp);
|
2007-06-16 01:56:05 +00:00
|
|
|
if (error != 0)
|
|
|
|
goto unlock;
|
|
|
|
MPASS(vp != NULL);
|
|
|
|
|
2014-07-14 09:13:56 +00:00
|
|
|
/* lkflag is ignored, the lock is exclusive */
|
2019-12-11 23:11:21 +00:00
|
|
|
(void) vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
|
2007-06-16 01:56:05 +00:00
|
|
|
|
|
|
|
vp->v_data = node;
|
|
|
|
vp->v_type = node->tn_type;
|
|
|
|
|
|
|
|
/* Type-specific initialization. */
|
|
|
|
switch (node->tn_type) {
|
|
|
|
case VBLK:
|
|
|
|
/* FALLTHROUGH */
|
|
|
|
case VCHR:
|
2007-08-03 06:24:31 +00:00
|
|
|
/* FALLTHROUGH */
|
2007-06-16 01:56:05 +00:00
|
|
|
case VLNK:
|
|
|
|
/* FALLTHROUGH */
|
|
|
|
case VSOCK:
|
|
|
|
break;
|
2007-08-03 06:24:31 +00:00
|
|
|
case VFIFO:
|
|
|
|
vp->v_op = &tmpfs_fifoop_entries;
|
|
|
|
break;
|
2013-04-28 19:38:59 +00:00
|
|
|
case VREG:
|
|
|
|
object = node->tn_reg.tn_aobj;
|
|
|
|
VM_OBJECT_WLOCK(object);
|
|
|
|
VI_LOCK(vp);
|
|
|
|
KASSERT(vp->v_object == NULL, ("Not NULL v_object in tmpfs"));
|
|
|
|
vp->v_object = object;
|
|
|
|
object->un_pager.swp.swp_tmpfs = vp;
|
|
|
|
vm_object_set_flag(object, OBJ_TMPFS);
|
|
|
|
VI_UNLOCK(vp);
|
|
|
|
VM_OBJECT_WUNLOCK(object);
|
|
|
|
break;
|
2007-11-18 04:52:40 +00:00
|
|
|
case VDIR:
|
2009-10-11 07:03:56 +00:00
|
|
|
MPASS(node->tn_dir.tn_parent != NULL);
|
2007-11-18 04:52:40 +00:00
|
|
|
if (node->tn_dir.tn_parent == node)
|
|
|
|
vp->v_vflag |= VV_ROOT;
|
|
|
|
break;
|
2007-06-16 01:56:05 +00:00
|
|
|
|
|
|
|
default:
|
2007-12-07 00:00:21 +00:00
|
|
|
panic("tmpfs_alloc_vp: type %p %d", node, (int)node->tn_type);
|
2007-06-16 01:56:05 +00:00
|
|
|
}
|
2014-06-04 15:30:49 +00:00
|
|
|
if (vp->v_type != VFIFO)
|
|
|
|
VN_LOCK_ASHARE(vp);
|
2007-06-16 01:56:05 +00:00
|
|
|
|
2013-05-02 18:44:31 +00:00
|
|
|
error = insmntque1(vp, mp, tmpfs_insmntque_dtr, NULL);
|
2017-01-19 14:27:37 +00:00
|
|
|
if (error != 0)
|
2007-08-10 05:24:49 +00:00
|
|
|
vp = NULL;
|
2007-06-16 01:56:05 +00:00
|
|
|
|
|
|
|
unlock:
|
|
|
|
TMPFS_NODE_LOCK(node);
|
2007-08-10 05:24:49 +00:00
|
|
|
|
2007-08-03 06:24:31 +00:00
|
|
|
MPASS(node->tn_vpstate & TMPFS_VNODE_ALLOCATING);
|
2007-06-16 01:56:05 +00:00
|
|
|
node->tn_vpstate &= ~TMPFS_VNODE_ALLOCATING;
|
2007-08-10 05:24:49 +00:00
|
|
|
node->tn_vnode = vp;
|
2007-06-28 02:39:31 +00:00
|
|
|
|
2007-06-16 01:56:05 +00:00
|
|
|
if (node->tn_vpstate & TMPFS_VNODE_WANT) {
|
|
|
|
node->tn_vpstate &= ~TMPFS_VNODE_WANT;
|
|
|
|
TMPFS_NODE_UNLOCK(node);
|
|
|
|
wakeup((caddr_t) &node->tn_vpstate);
|
2007-06-28 02:34:32 +00:00
|
|
|
} else
|
2007-06-16 01:56:05 +00:00
|
|
|
TMPFS_NODE_UNLOCK(node);
|
2007-06-28 02:39:31 +00:00
|
|
|
|
2007-06-16 01:56:05 +00:00
|
|
|
out:
|
2017-01-19 19:15:21 +00:00
|
|
|
if (error == 0) {
|
|
|
|
*vpp = vp;
|
2007-06-16 01:56:05 +00:00
|
|
|
|
2007-08-10 05:24:49 +00:00
|
|
|
#ifdef INVARIANTS
|
2012-03-14 08:29:21 +00:00
|
|
|
MPASS(*vpp != NULL && VOP_ISLOCKED(*vpp));
|
|
|
|
TMPFS_NODE_LOCK(node);
|
|
|
|
MPASS(*vpp == node->tn_vnode);
|
|
|
|
TMPFS_NODE_UNLOCK(node);
|
2007-08-10 05:24:49 +00:00
|
|
|
#endif
|
2017-01-19 19:15:21 +00:00
|
|
|
}
|
|
|
|
tmpfs_free_node(tm, node);
|
2007-06-16 01:56:05 +00:00
|
|
|
|
2017-01-19 14:27:37 +00:00
|
|
|
return (error);
|
2007-06-16 01:56:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Destroys the association between the vnode vp and the node it
|
|
|
|
* references.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
tmpfs_free_vp(struct vnode *vp)
|
|
|
|
{
|
|
|
|
struct tmpfs_node *node;
|
|
|
|
|
|
|
|
node = VP_TO_TMPFS_NODE(vp);
|
|
|
|
|
2014-07-14 08:59:25 +00:00
|
|
|
TMPFS_NODE_ASSERT_LOCKED(node);
|
2007-06-16 01:56:05 +00:00
|
|
|
node->tn_vnode = NULL;
|
2013-08-05 18:53:59 +00:00
|
|
|
if ((node->tn_vpstate & TMPFS_VNODE_WRECLAIM) != 0)
|
|
|
|
wakeup(&node->tn_vnode);
|
|
|
|
node->tn_vpstate &= ~TMPFS_VNODE_WRECLAIM;
|
2007-06-16 01:56:05 +00:00
|
|
|
vp->v_data = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Allocates a new file of type 'type' and adds it to the parent directory
|
|
|
|
* 'dvp'; this addition is done using the component name given in 'cnp'.
|
|
|
|
* The ownership of the new file is automatically assigned based on the
|
|
|
|
* credentials of the caller (through 'cnp'), the group is set based on
|
|
|
|
* the parent directory and the mode is determined from the 'vap' argument.
|
|
|
|
* If successful, *vpp holds a vnode to the newly created file and zero
|
|
|
|
* is returned. Otherwise *vpp is NULL and the function returns an
|
|
|
|
* appropriate error code.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
tmpfs_alloc_file(struct vnode *dvp, struct vnode **vpp, struct vattr *vap,
|
2018-11-02 14:42:36 +00:00
|
|
|
struct componentname *cnp, const char *target)
|
2007-06-16 01:56:05 +00:00
|
|
|
{
|
|
|
|
int error;
|
|
|
|
struct tmpfs_dirent *de;
|
|
|
|
struct tmpfs_mount *tmp;
|
|
|
|
struct tmpfs_node *dnode;
|
|
|
|
struct tmpfs_node *node;
|
|
|
|
struct tmpfs_node *parent;
|
|
|
|
|
2017-01-19 14:49:55 +00:00
|
|
|
ASSERT_VOP_ELOCKED(dvp, "tmpfs_alloc_file");
|
2007-06-16 01:56:05 +00:00
|
|
|
MPASS(cnp->cn_flags & HASBUF);
|
|
|
|
|
|
|
|
tmp = VFS_TO_TMPFS(dvp->v_mount);
|
|
|
|
dnode = VP_TO_TMPFS_DIR(dvp);
|
|
|
|
*vpp = NULL;
|
|
|
|
|
|
|
|
/* If the entry we are creating is a directory, we cannot overflow
|
|
|
|
* the number of links of its parent, because it will get a new
|
|
|
|
* link. */
|
|
|
|
if (vap->va_type == VDIR) {
|
|
|
|
/* Ensure that we do not overflow the maximum number of links
|
|
|
|
* imposed by the system. */
|
2017-12-19 20:19:07 +00:00
|
|
|
MPASS(dnode->tn_links <= TMPFS_LINK_MAX);
|
|
|
|
if (dnode->tn_links == TMPFS_LINK_MAX) {
|
2014-07-14 09:02:40 +00:00
|
|
|
return (EMLINK);
|
2007-06-16 01:56:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
parent = dnode;
|
2007-11-18 04:52:40 +00:00
|
|
|
MPASS(parent != NULL);
|
2007-06-16 01:56:05 +00:00
|
|
|
} else
|
|
|
|
parent = NULL;
|
|
|
|
|
|
|
|
/* Allocate a node that represents the new file. */
|
2014-07-14 09:52:33 +00:00
|
|
|
error = tmpfs_alloc_node(dvp->v_mount, tmp, vap->va_type,
|
2017-01-19 14:27:37 +00:00
|
|
|
cnp->cn_cred->cr_uid, dnode->tn_gid, vap->va_mode, parent,
|
|
|
|
target, vap->va_rdev, &node);
|
2007-06-16 01:56:05 +00:00
|
|
|
if (error != 0)
|
2014-07-14 09:02:40 +00:00
|
|
|
return (error);
|
2007-06-16 01:56:05 +00:00
|
|
|
|
|
|
|
/* Allocate a directory entry that points to the new file. */
|
|
|
|
error = tmpfs_alloc_dirent(tmp, node, cnp->cn_nameptr, cnp->cn_namelen,
|
|
|
|
&de);
|
|
|
|
if (error != 0) {
|
|
|
|
tmpfs_free_node(tmp, node);
|
2014-07-14 09:02:40 +00:00
|
|
|
return (error);
|
2007-06-16 01:56:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Allocate a vnode for the new file. */
|
2009-05-11 15:33:26 +00:00
|
|
|
error = tmpfs_alloc_vp(dvp->v_mount, node, LK_EXCLUSIVE, vpp);
|
2007-06-16 01:56:05 +00:00
|
|
|
if (error != 0) {
|
2013-01-06 22:15:44 +00:00
|
|
|
tmpfs_free_dirent(tmp, de);
|
2007-06-16 01:56:05 +00:00
|
|
|
tmpfs_free_node(tmp, node);
|
2014-07-14 09:02:40 +00:00
|
|
|
return (error);
|
2007-06-16 01:56:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Now that all required items are allocated, we can proceed to
|
|
|
|
* insert the new node into the directory, an operation that
|
|
|
|
* cannot fail. */
|
2010-08-22 05:36:06 +00:00
|
|
|
if (cnp->cn_flags & ISWHITEOUT)
|
|
|
|
tmpfs_dir_whiteout_remove(dvp, cnp);
|
2007-06-16 01:56:05 +00:00
|
|
|
tmpfs_dir_attach(dvp, de);
|
2014-07-14 09:02:40 +00:00
|
|
|
return (0);
|
2007-06-16 01:56:05 +00:00
|
|
|
}
|
|
|
|
|
2017-01-19 18:38:58 +00:00
|
|
|
struct tmpfs_dirent *
|
2013-01-06 22:15:44 +00:00
|
|
|
tmpfs_dir_first(struct tmpfs_node *dnode, struct tmpfs_dir_cursor *dc)
|
|
|
|
{
|
|
|
|
struct tmpfs_dirent *de;
|
|
|
|
|
|
|
|
de = RB_MIN(tmpfs_dir, &dnode->tn_dir.tn_dirhead);
|
|
|
|
dc->tdc_tree = de;
|
|
|
|
if (de != NULL && tmpfs_dirent_duphead(de))
|
|
|
|
de = LIST_FIRST(&de->ud.td_duphead);
|
|
|
|
dc->tdc_current = de;
|
|
|
|
|
|
|
|
return (dc->tdc_current);
|
|
|
|
}
|
|
|
|
|
2017-01-19 18:38:58 +00:00
|
|
|
struct tmpfs_dirent *
|
2013-01-06 22:15:44 +00:00
|
|
|
tmpfs_dir_next(struct tmpfs_node *dnode, struct tmpfs_dir_cursor *dc)
|
|
|
|
{
|
|
|
|
struct tmpfs_dirent *de;
|
|
|
|
|
|
|
|
MPASS(dc->tdc_tree != NULL);
|
|
|
|
if (tmpfs_dirent_dup(dc->tdc_current)) {
|
|
|
|
dc->tdc_current = LIST_NEXT(dc->tdc_current, uh.td_dup.entries);
|
|
|
|
if (dc->tdc_current != NULL)
|
|
|
|
return (dc->tdc_current);
|
|
|
|
}
|
|
|
|
dc->tdc_tree = dc->tdc_current = RB_NEXT(tmpfs_dir,
|
|
|
|
&dnode->tn_dir.tn_dirhead, dc->tdc_tree);
|
|
|
|
if ((de = dc->tdc_current) != NULL && tmpfs_dirent_duphead(de)) {
|
|
|
|
dc->tdc_current = LIST_FIRST(&de->ud.td_duphead);
|
|
|
|
MPASS(dc->tdc_current != NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
return (dc->tdc_current);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Lookup directory entry in RB-Tree. Function may return duphead entry. */
|
|
|
|
static struct tmpfs_dirent *
|
|
|
|
tmpfs_dir_xlookup_hash(struct tmpfs_node *dnode, uint32_t hash)
|
|
|
|
{
|
|
|
|
struct tmpfs_dirent *de, dekey;
|
|
|
|
|
|
|
|
dekey.td_hash = hash;
|
|
|
|
de = RB_FIND(tmpfs_dir, &dnode->tn_dir.tn_dirhead, &dekey);
|
|
|
|
return (de);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Lookup directory entry by cookie, initialize directory cursor accordingly. */
|
|
|
|
static struct tmpfs_dirent *
|
|
|
|
tmpfs_dir_lookup_cookie(struct tmpfs_node *node, off_t cookie,
|
|
|
|
struct tmpfs_dir_cursor *dc)
|
|
|
|
{
|
|
|
|
struct tmpfs_dir *dirhead = &node->tn_dir.tn_dirhead;
|
|
|
|
struct tmpfs_dirent *de, dekey;
|
|
|
|
|
|
|
|
MPASS(cookie >= TMPFS_DIRCOOKIE_MIN);
|
|
|
|
|
|
|
|
if (cookie == node->tn_dir.tn_readdir_lastn &&
|
|
|
|
(de = node->tn_dir.tn_readdir_lastp) != NULL) {
|
|
|
|
/* Protect against possible race, tn_readdir_last[pn]
|
|
|
|
* may be updated with only shared vnode lock held. */
|
|
|
|
if (cookie == tmpfs_dirent_cookie(de))
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((cookie & TMPFS_DIRCOOKIE_DUP) != 0) {
|
|
|
|
LIST_FOREACH(de, &node->tn_dir.tn_dupindex,
|
|
|
|
uh.td_dup.index_entries) {
|
|
|
|
MPASS(tmpfs_dirent_dup(de));
|
|
|
|
if (de->td_cookie == cookie)
|
|
|
|
goto out;
|
|
|
|
/* dupindex list is sorted. */
|
|
|
|
if (de->td_cookie < cookie) {
|
|
|
|
de = NULL;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
MPASS(de == NULL);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2016-08-10 13:50:21 +00:00
|
|
|
if ((cookie & TMPFS_DIRCOOKIE_MASK) != cookie) {
|
|
|
|
de = NULL;
|
|
|
|
} else {
|
|
|
|
dekey.td_hash = cookie;
|
|
|
|
/* Recover if direntry for cookie was removed */
|
|
|
|
de = RB_NFIND(tmpfs_dir, dirhead, &dekey);
|
|
|
|
}
|
2013-01-06 22:15:44 +00:00
|
|
|
dc->tdc_tree = de;
|
|
|
|
dc->tdc_current = de;
|
|
|
|
if (de != NULL && tmpfs_dirent_duphead(de)) {
|
|
|
|
dc->tdc_current = LIST_FIRST(&de->ud.td_duphead);
|
|
|
|
MPASS(dc->tdc_current != NULL);
|
|
|
|
}
|
|
|
|
return (dc->tdc_current);
|
|
|
|
|
|
|
|
out:
|
|
|
|
dc->tdc_tree = de;
|
|
|
|
dc->tdc_current = de;
|
|
|
|
if (de != NULL && tmpfs_dirent_dup(de))
|
|
|
|
dc->tdc_tree = tmpfs_dir_xlookup_hash(node,
|
|
|
|
de->td_hash);
|
|
|
|
return (dc->tdc_current);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Looks for a directory entry in the directory represented by node.
|
|
|
|
* 'cnp' describes the name of the entry to look for. Note that the .
|
|
|
|
* and .. components are not allowed as they do not physically exist
|
|
|
|
* within directories.
|
|
|
|
*
|
|
|
|
* Returns a pointer to the entry when found, otherwise NULL.
|
|
|
|
*/
|
|
|
|
struct tmpfs_dirent *
|
|
|
|
tmpfs_dir_lookup(struct tmpfs_node *node, struct tmpfs_node *f,
|
|
|
|
struct componentname *cnp)
|
|
|
|
{
|
|
|
|
struct tmpfs_dir_duphead *duphead;
|
|
|
|
struct tmpfs_dirent *de;
|
|
|
|
uint32_t hash;
|
|
|
|
|
|
|
|
MPASS(IMPLIES(cnp->cn_namelen == 1, cnp->cn_nameptr[0] != '.'));
|
|
|
|
MPASS(IMPLIES(cnp->cn_namelen == 2, !(cnp->cn_nameptr[0] == '.' &&
|
|
|
|
cnp->cn_nameptr[1] == '.')));
|
|
|
|
TMPFS_VALIDATE_DIR(node);
|
|
|
|
|
|
|
|
hash = tmpfs_dirent_hash(cnp->cn_nameptr, cnp->cn_namelen);
|
|
|
|
de = tmpfs_dir_xlookup_hash(node, hash);
|
|
|
|
if (de != NULL && tmpfs_dirent_duphead(de)) {
|
|
|
|
duphead = &de->ud.td_duphead;
|
|
|
|
LIST_FOREACH(de, duphead, uh.td_dup.entries) {
|
|
|
|
if (TMPFS_DIRENT_MATCHES(de, cnp->cn_nameptr,
|
|
|
|
cnp->cn_namelen))
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
} else if (de != NULL) {
|
|
|
|
if (!TMPFS_DIRENT_MATCHES(de, cnp->cn_nameptr,
|
|
|
|
cnp->cn_namelen))
|
|
|
|
de = NULL;
|
|
|
|
}
|
|
|
|
if (de != NULL && f != NULL && de->td_node != f)
|
|
|
|
de = NULL;
|
|
|
|
|
|
|
|
return (de);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Attach duplicate-cookie directory entry nde to dnode and insert to dupindex
|
|
|
|
* list, allocate new cookie value.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
tmpfs_dir_attach_dup(struct tmpfs_node *dnode,
|
|
|
|
struct tmpfs_dir_duphead *duphead, struct tmpfs_dirent *nde)
|
|
|
|
{
|
|
|
|
struct tmpfs_dir_duphead *dupindex;
|
|
|
|
struct tmpfs_dirent *de, *pde;
|
|
|
|
|
|
|
|
dupindex = &dnode->tn_dir.tn_dupindex;
|
|
|
|
de = LIST_FIRST(dupindex);
|
|
|
|
if (de == NULL || de->td_cookie < TMPFS_DIRCOOKIE_DUP_MAX) {
|
|
|
|
if (de == NULL)
|
|
|
|
nde->td_cookie = TMPFS_DIRCOOKIE_DUP_MIN;
|
|
|
|
else
|
|
|
|
nde->td_cookie = de->td_cookie + 1;
|
|
|
|
MPASS(tmpfs_dirent_dup(nde));
|
|
|
|
LIST_INSERT_HEAD(dupindex, nde, uh.td_dup.index_entries);
|
|
|
|
LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Cookie numbers are near exhaustion. Scan dupindex list for unused
|
|
|
|
* numbers. dupindex list is sorted in descending order. Keep it so
|
|
|
|
* after inserting nde.
|
|
|
|
*/
|
|
|
|
while (1) {
|
|
|
|
pde = de;
|
|
|
|
de = LIST_NEXT(de, uh.td_dup.index_entries);
|
|
|
|
if (de == NULL && pde->td_cookie != TMPFS_DIRCOOKIE_DUP_MIN) {
|
|
|
|
/*
|
|
|
|
* Last element of the index doesn't have minimal cookie
|
|
|
|
* value, use it.
|
|
|
|
*/
|
|
|
|
nde->td_cookie = TMPFS_DIRCOOKIE_DUP_MIN;
|
|
|
|
LIST_INSERT_AFTER(pde, nde, uh.td_dup.index_entries);
|
|
|
|
LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries);
|
|
|
|
return;
|
|
|
|
} else if (de == NULL) {
|
|
|
|
/*
|
|
|
|
* We are so lucky have 2^30 hash duplicates in single
|
|
|
|
* directory :) Return largest possible cookie value.
|
|
|
|
* It should be fine except possible issues with
|
|
|
|
* VOP_READDIR restart.
|
|
|
|
*/
|
|
|
|
nde->td_cookie = TMPFS_DIRCOOKIE_DUP_MAX;
|
|
|
|
LIST_INSERT_HEAD(dupindex, nde,
|
|
|
|
uh.td_dup.index_entries);
|
|
|
|
LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (de->td_cookie + 1 == pde->td_cookie ||
|
|
|
|
de->td_cookie >= TMPFS_DIRCOOKIE_DUP_MAX)
|
|
|
|
continue; /* No hole or invalid cookie. */
|
|
|
|
nde->td_cookie = de->td_cookie + 1;
|
|
|
|
MPASS(tmpfs_dirent_dup(nde));
|
|
|
|
MPASS(pde->td_cookie > nde->td_cookie);
|
|
|
|
MPASS(nde->td_cookie > de->td_cookie);
|
|
|
|
LIST_INSERT_BEFORE(de, nde, uh.td_dup.index_entries);
|
|
|
|
LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries);
|
|
|
|
return;
|
2016-04-10 23:07:00 +00:00
|
|
|
}
|
2013-01-06 22:15:44 +00:00
|
|
|
}
|
|
|
|
|
2007-06-16 01:56:05 +00:00
|
|
|
/*
|
|
|
|
* Attaches the directory entry de to the directory represented by vp.
|
|
|
|
* Note that this does not change the link count of the node pointed by
|
|
|
|
* the directory entry, as this is done by tmpfs_alloc_dirent.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
tmpfs_dir_attach(struct vnode *vp, struct tmpfs_dirent *de)
|
|
|
|
{
|
|
|
|
struct tmpfs_node *dnode;
|
2013-01-06 22:15:44 +00:00
|
|
|
struct tmpfs_dirent *xde, *nde;
|
2007-06-16 01:56:05 +00:00
|
|
|
|
2007-08-03 06:24:31 +00:00
|
|
|
ASSERT_VOP_ELOCKED(vp, __func__);
|
2013-01-06 22:15:44 +00:00
|
|
|
MPASS(de->td_namelen > 0);
|
|
|
|
MPASS(de->td_hash >= TMPFS_DIRCOOKIE_MIN);
|
|
|
|
MPASS(de->td_cookie == de->td_hash);
|
|
|
|
|
2007-06-16 01:56:05 +00:00
|
|
|
dnode = VP_TO_TMPFS_DIR(vp);
|
2013-01-06 22:15:44 +00:00
|
|
|
dnode->tn_dir.tn_readdir_lastn = 0;
|
|
|
|
dnode->tn_dir.tn_readdir_lastp = NULL;
|
|
|
|
|
|
|
|
MPASS(!tmpfs_dirent_dup(de));
|
|
|
|
xde = RB_INSERT(tmpfs_dir, &dnode->tn_dir.tn_dirhead, de);
|
|
|
|
if (xde != NULL && tmpfs_dirent_duphead(xde))
|
|
|
|
tmpfs_dir_attach_dup(dnode, &xde->ud.td_duphead, de);
|
|
|
|
else if (xde != NULL) {
|
|
|
|
/*
|
|
|
|
* Allocate new duphead. Swap xde with duphead to avoid
|
|
|
|
* adding/removing elements with the same hash.
|
|
|
|
*/
|
|
|
|
MPASS(!tmpfs_dirent_dup(xde));
|
|
|
|
tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount), NULL, NULL, 0,
|
|
|
|
&nde);
|
|
|
|
/* *nde = *xde; XXX gcc 4.2.1 may generate invalid code. */
|
|
|
|
memcpy(nde, xde, sizeof(*xde));
|
|
|
|
xde->td_cookie |= TMPFS_DIRCOOKIE_DUPHEAD;
|
|
|
|
LIST_INIT(&xde->ud.td_duphead);
|
|
|
|
xde->td_namelen = 0;
|
|
|
|
xde->td_node = NULL;
|
|
|
|
tmpfs_dir_attach_dup(dnode, &xde->ud.td_duphead, nde);
|
|
|
|
tmpfs_dir_attach_dup(dnode, &xde->ud.td_duphead, de);
|
|
|
|
}
|
2007-06-16 01:56:05 +00:00
|
|
|
dnode->tn_size += sizeof(struct tmpfs_dirent);
|
|
|
|
dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \
|
|
|
|
TMPFS_NODE_MODIFIED;
|
2015-01-31 21:31:53 +00:00
|
|
|
tmpfs_update(vp);
|
2007-06-16 01:56:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Detaches the directory entry de from the directory represented by vp.
|
|
|
|
* Note that this does not change the link count of the node pointed by
|
|
|
|
* the directory entry, as this is done by tmpfs_free_dirent.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
tmpfs_dir_detach(struct vnode *vp, struct tmpfs_dirent *de)
|
|
|
|
{
|
2013-01-06 22:15:44 +00:00
|
|
|
struct tmpfs_mount *tmp;
|
|
|
|
struct tmpfs_dir *head;
|
2007-06-16 01:56:05 +00:00
|
|
|
struct tmpfs_node *dnode;
|
2013-01-06 22:15:44 +00:00
|
|
|
struct tmpfs_dirent *xde;
|
2007-06-16 01:56:05 +00:00
|
|
|
|
2007-08-03 06:24:31 +00:00
|
|
|
ASSERT_VOP_ELOCKED(vp, __func__);
|
2007-06-16 01:56:05 +00:00
|
|
|
|
2013-01-06 22:15:44 +00:00
|
|
|
dnode = VP_TO_TMPFS_DIR(vp);
|
|
|
|
head = &dnode->tn_dir.tn_dirhead;
|
|
|
|
dnode->tn_dir.tn_readdir_lastn = 0;
|
|
|
|
dnode->tn_dir.tn_readdir_lastp = NULL;
|
|
|
|
|
|
|
|
if (tmpfs_dirent_dup(de)) {
|
|
|
|
/* Remove duphead if de was last entry. */
|
|
|
|
if (LIST_NEXT(de, uh.td_dup.entries) == NULL) {
|
|
|
|
xde = tmpfs_dir_xlookup_hash(dnode, de->td_hash);
|
|
|
|
MPASS(tmpfs_dirent_duphead(xde));
|
|
|
|
} else
|
|
|
|
xde = NULL;
|
|
|
|
LIST_REMOVE(de, uh.td_dup.entries);
|
|
|
|
LIST_REMOVE(de, uh.td_dup.index_entries);
|
|
|
|
if (xde != NULL) {
|
|
|
|
if (LIST_EMPTY(&xde->ud.td_duphead)) {
|
|
|
|
RB_REMOVE(tmpfs_dir, head, xde);
|
|
|
|
tmp = VFS_TO_TMPFS(vp->v_mount);
|
|
|
|
MPASS(xde->td_node == NULL);
|
|
|
|
tmpfs_free_dirent(tmp, xde);
|
|
|
|
}
|
|
|
|
}
|
2015-06-19 07:25:15 +00:00
|
|
|
de->td_cookie = de->td_hash;
|
2013-01-06 22:15:44 +00:00
|
|
|
} else
|
|
|
|
RB_REMOVE(tmpfs_dir, head, de);
|
2007-06-16 01:56:05 +00:00
|
|
|
|
|
|
|
dnode->tn_size -= sizeof(struct tmpfs_dirent);
|
|
|
|
dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \
|
|
|
|
TMPFS_NODE_MODIFIED;
|
2015-01-31 21:31:53 +00:00
|
|
|
tmpfs_update(vp);
|
2007-06-16 01:56:05 +00:00
|
|
|
}
|
|
|
|
|
2013-01-06 22:15:44 +00:00
|
|
|
void
|
|
|
|
tmpfs_dir_destroy(struct tmpfs_mount *tmp, struct tmpfs_node *dnode)
|
2007-06-16 01:56:05 +00:00
|
|
|
{
|
2013-01-06 22:15:44 +00:00
|
|
|
struct tmpfs_dirent *de, *dde, *nde;
|
|
|
|
|
|
|
|
RB_FOREACH_SAFE(de, tmpfs_dir, &dnode->tn_dir.tn_dirhead, nde) {
|
|
|
|
RB_REMOVE(tmpfs_dir, &dnode->tn_dir.tn_dirhead, de);
|
|
|
|
/* Node may already be destroyed. */
|
|
|
|
de->td_node = NULL;
|
|
|
|
if (tmpfs_dirent_duphead(de)) {
|
|
|
|
while ((dde = LIST_FIRST(&de->ud.td_duphead)) != NULL) {
|
|
|
|
LIST_REMOVE(dde, uh.td_dup.entries);
|
|
|
|
dde->td_node = NULL;
|
|
|
|
tmpfs_free_dirent(tmp, dde);
|
|
|
|
}
|
2007-06-16 01:56:05 +00:00
|
|
|
}
|
2013-01-06 22:15:44 +00:00
|
|
|
tmpfs_free_dirent(tmp, de);
|
2007-06-16 01:56:05 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Helper function for tmpfs_readdir. Creates a '.' entry for the given
|
|
|
|
* directory and returns it in the uio space. The function returns 0
|
|
|
|
* on success, -1 if there was not enough space in the uio structure to
|
|
|
|
* hold the directory entry or an appropriate error code if another
|
|
|
|
* error happens.
|
|
|
|
*/
|
2013-01-06 22:15:44 +00:00
|
|
|
static int
|
2019-04-02 13:49:32 +00:00
|
|
|
tmpfs_dir_getdotdent(struct tmpfs_mount *tm, struct tmpfs_node *node,
|
|
|
|
struct uio *uio)
|
2007-06-16 01:56:05 +00:00
|
|
|
{
|
|
|
|
int error;
|
|
|
|
struct dirent dent;
|
|
|
|
|
|
|
|
TMPFS_VALIDATE_DIR(node);
|
|
|
|
MPASS(uio->uio_offset == TMPFS_DIRCOOKIE_DOT);
|
|
|
|
|
|
|
|
dent.d_fileno = node->tn_id;
|
|
|
|
dent.d_type = DT_DIR;
|
|
|
|
dent.d_namlen = 1;
|
|
|
|
dent.d_name[0] = '.';
|
|
|
|
dent.d_reclen = GENERIC_DIRSIZ(&dent);
|
2018-11-23 22:24:59 +00:00
|
|
|
dirent_terminate(&dent);
|
2007-06-16 01:56:05 +00:00
|
|
|
|
|
|
|
if (dent.d_reclen > uio->uio_resid)
|
2013-01-06 22:15:44 +00:00
|
|
|
error = EJUSTRETURN;
|
|
|
|
else
|
2007-06-16 01:56:05 +00:00
|
|
|
error = uiomove(&dent, dent.d_reclen, uio);
|
|
|
|
|
2019-04-02 13:49:32 +00:00
|
|
|
tmpfs_set_status(tm, node, TMPFS_NODE_ACCESSED);
|
2007-06-16 01:56:05 +00:00
|
|
|
|
2017-01-06 17:43:36 +00:00
|
|
|
return (error);
|
2007-06-16 01:56:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Helper function for tmpfs_readdir. Creates a '..' entry for the given
|
|
|
|
* directory and returns it in the uio space. The function returns 0
|
|
|
|
* on success, -1 if there was not enough space in the uio structure to
|
|
|
|
* hold the directory entry or an appropriate error code if another
|
|
|
|
* error happens.
|
|
|
|
*/
|
2013-01-06 22:15:44 +00:00
|
|
|
static int
|
2019-04-02 13:49:32 +00:00
|
|
|
tmpfs_dir_getdotdotdent(struct tmpfs_mount *tm, struct tmpfs_node *node,
|
|
|
|
struct uio *uio)
|
2007-06-16 01:56:05 +00:00
|
|
|
{
|
2019-10-03 19:55:05 +00:00
|
|
|
struct tmpfs_node *parent;
|
2007-06-16 01:56:05 +00:00
|
|
|
struct dirent dent;
|
2019-10-03 19:55:05 +00:00
|
|
|
int error;
|
2007-06-16 01:56:05 +00:00
|
|
|
|
|
|
|
TMPFS_VALIDATE_DIR(node);
|
|
|
|
MPASS(uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT);
|
|
|
|
|
2009-10-11 07:03:56 +00:00
|
|
|
/*
|
|
|
|
* Return ENOENT if the current node is already removed.
|
|
|
|
*/
|
|
|
|
TMPFS_ASSERT_LOCKED(node);
|
2019-10-03 19:55:05 +00:00
|
|
|
parent = node->tn_dir.tn_parent;
|
|
|
|
if (parent == NULL)
|
2009-10-11 07:03:56 +00:00
|
|
|
return (ENOENT);
|
|
|
|
|
2019-10-03 19:55:05 +00:00
|
|
|
TMPFS_NODE_LOCK(parent);
|
|
|
|
dent.d_fileno = parent->tn_id;
|
|
|
|
TMPFS_NODE_UNLOCK(parent);
|
2009-10-11 07:03:56 +00:00
|
|
|
|
2007-06-16 01:56:05 +00:00
|
|
|
dent.d_type = DT_DIR;
|
|
|
|
dent.d_namlen = 2;
|
|
|
|
dent.d_name[0] = '.';
|
|
|
|
dent.d_name[1] = '.';
|
|
|
|
dent.d_reclen = GENERIC_DIRSIZ(&dent);
|
2018-11-23 22:24:59 +00:00
|
|
|
dirent_terminate(&dent);
|
2007-06-16 01:56:05 +00:00
|
|
|
|
|
|
|
if (dent.d_reclen > uio->uio_resid)
|
2013-01-06 22:15:44 +00:00
|
|
|
error = EJUSTRETURN;
|
|
|
|
else
|
2007-06-16 01:56:05 +00:00
|
|
|
error = uiomove(&dent, dent.d_reclen, uio);
|
|
|
|
|
2019-04-02 13:49:32 +00:00
|
|
|
tmpfs_set_status(tm, node, TMPFS_NODE_ACCESSED);
|
2007-06-16 01:56:05 +00:00
|
|
|
|
2017-01-06 17:43:36 +00:00
|
|
|
return (error);
|
2007-06-16 01:56:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Helper function for tmpfs_readdir. Returns as much directory entries
|
|
|
|
* as can fit in the uio space. The read starts at uio->uio_offset.
|
|
|
|
* The function returns 0 on success, -1 if there was not enough space
|
|
|
|
* in the uio structure to hold the directory entry or an appropriate
|
|
|
|
* error code if another error happens.
|
|
|
|
*/
|
|
|
|
int
|
2019-04-02 13:49:32 +00:00
|
|
|
tmpfs_dir_getdents(struct tmpfs_mount *tm, struct tmpfs_node *node,
|
|
|
|
struct uio *uio, int maxcookies, u_long *cookies, int *ncookies)
|
2007-06-16 01:56:05 +00:00
|
|
|
{
|
2013-01-06 22:15:44 +00:00
|
|
|
struct tmpfs_dir_cursor dc;
|
2007-06-16 01:56:05 +00:00
|
|
|
struct tmpfs_dirent *de;
|
2013-01-06 22:15:44 +00:00
|
|
|
off_t off;
|
|
|
|
int error;
|
2007-06-16 01:56:05 +00:00
|
|
|
|
|
|
|
TMPFS_VALIDATE_DIR(node);
|
|
|
|
|
2013-01-06 22:15:44 +00:00
|
|
|
off = 0;
|
2014-03-14 02:10:30 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Lookup the node from the current offset. The starting offset of
|
|
|
|
* 0 will lookup both '.' and '..', and then the first real entry,
|
|
|
|
* or EOF if there are none. Then find all entries for the dir that
|
|
|
|
* fit into the buffer. Once no more entries are found (de == NULL),
|
|
|
|
* the offset is set to TMPFS_DIRCOOKIE_EOF, which will cause the next
|
|
|
|
* call to return 0.
|
|
|
|
*/
|
2013-01-06 22:15:44 +00:00
|
|
|
switch (uio->uio_offset) {
|
|
|
|
case TMPFS_DIRCOOKIE_DOT:
|
2019-04-02 13:49:32 +00:00
|
|
|
error = tmpfs_dir_getdotdent(tm, node, uio);
|
2013-01-06 22:15:44 +00:00
|
|
|
if (error != 0)
|
|
|
|
return (error);
|
|
|
|
uio->uio_offset = TMPFS_DIRCOOKIE_DOTDOT;
|
2014-03-14 13:55:48 +00:00
|
|
|
if (cookies != NULL)
|
2013-01-06 22:15:44 +00:00
|
|
|
cookies[(*ncookies)++] = off = uio->uio_offset;
|
2014-03-14 13:58:02 +00:00
|
|
|
/* FALLTHROUGH */
|
2013-01-06 22:15:44 +00:00
|
|
|
case TMPFS_DIRCOOKIE_DOTDOT:
|
2019-04-02 13:49:32 +00:00
|
|
|
error = tmpfs_dir_getdotdotdent(tm, node, uio);
|
2013-01-06 22:15:44 +00:00
|
|
|
if (error != 0)
|
|
|
|
return (error);
|
|
|
|
de = tmpfs_dir_first(node, &dc);
|
2014-03-14 02:10:30 +00:00
|
|
|
uio->uio_offset = tmpfs_dirent_cookie(de);
|
2014-03-14 13:55:48 +00:00
|
|
|
if (cookies != NULL)
|
2013-01-06 22:15:44 +00:00
|
|
|
cookies[(*ncookies)++] = off = uio->uio_offset;
|
2014-03-14 02:10:30 +00:00
|
|
|
/* EOF. */
|
2013-01-06 22:15:44 +00:00
|
|
|
if (de == NULL)
|
|
|
|
return (0);
|
|
|
|
break;
|
|
|
|
case TMPFS_DIRCOOKIE_EOF:
|
|
|
|
return (0);
|
|
|
|
default:
|
|
|
|
de = tmpfs_dir_lookup_cookie(node, uio->uio_offset, &dc);
|
|
|
|
if (de == NULL)
|
|
|
|
return (EINVAL);
|
2014-03-14 13:55:48 +00:00
|
|
|
if (cookies != NULL)
|
2013-01-06 22:15:44 +00:00
|
|
|
off = tmpfs_dirent_cookie(de);
|
2007-06-16 01:56:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Read as much entries as possible; i.e., until we reach the end of
|
|
|
|
* the directory or we exhaust uio space. */
|
|
|
|
do {
|
|
|
|
struct dirent d;
|
|
|
|
|
|
|
|
/* Create a dirent structure representing the current
|
|
|
|
* tmpfs_node and fill it. */
|
2010-08-22 05:36:06 +00:00
|
|
|
if (de->td_node == NULL) {
|
|
|
|
d.d_fileno = 1;
|
|
|
|
d.d_type = DT_WHT;
|
|
|
|
} else {
|
|
|
|
d.d_fileno = de->td_node->tn_id;
|
|
|
|
switch (de->td_node->tn_type) {
|
|
|
|
case VBLK:
|
|
|
|
d.d_type = DT_BLK;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case VCHR:
|
|
|
|
d.d_type = DT_CHR;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case VDIR:
|
|
|
|
d.d_type = DT_DIR;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case VFIFO:
|
|
|
|
d.d_type = DT_FIFO;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case VLNK:
|
|
|
|
d.d_type = DT_LNK;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case VREG:
|
|
|
|
d.d_type = DT_REG;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case VSOCK:
|
|
|
|
d.d_type = DT_SOCK;
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
panic("tmpfs_dir_getdents: type %p %d",
|
|
|
|
de->td_node, (int)de->td_node->tn_type);
|
|
|
|
}
|
2007-06-16 01:56:05 +00:00
|
|
|
}
|
|
|
|
d.d_namlen = de->td_namelen;
|
|
|
|
MPASS(de->td_namelen < sizeof(d.d_name));
|
2013-01-06 22:15:44 +00:00
|
|
|
(void)memcpy(d.d_name, de->ud.td_name, de->td_namelen);
|
2007-06-16 01:56:05 +00:00
|
|
|
d.d_reclen = GENERIC_DIRSIZ(&d);
|
2018-11-23 22:24:59 +00:00
|
|
|
dirent_terminate(&d);
|
2007-06-16 01:56:05 +00:00
|
|
|
|
|
|
|
/* Stop reading if the directory entry we are treating is
|
|
|
|
* bigger than the amount of data that can be returned. */
|
|
|
|
if (d.d_reclen > uio->uio_resid) {
|
2013-01-06 22:15:44 +00:00
|
|
|
error = EJUSTRETURN;
|
2007-06-16 01:56:05 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Copy the new dirent structure into the output buffer and
|
|
|
|
* advance pointers. */
|
|
|
|
error = uiomove(&d, d.d_reclen, uio);
|
2011-01-20 09:39:16 +00:00
|
|
|
if (error == 0) {
|
2013-01-06 22:15:44 +00:00
|
|
|
de = tmpfs_dir_next(node, &dc);
|
2014-03-14 13:55:48 +00:00
|
|
|
if (cookies != NULL) {
|
2014-03-14 02:10:30 +00:00
|
|
|
off = tmpfs_dirent_cookie(de);
|
2014-03-14 13:55:48 +00:00
|
|
|
MPASS(*ncookies < maxcookies);
|
2013-01-06 22:15:44 +00:00
|
|
|
cookies[(*ncookies)++] = off;
|
|
|
|
}
|
2011-01-20 09:39:16 +00:00
|
|
|
}
|
2007-06-16 01:56:05 +00:00
|
|
|
} while (error == 0 && uio->uio_resid > 0 && de != NULL);
|
|
|
|
|
2014-03-14 13:55:48 +00:00
|
|
|
/* Skip setting off when using cookies as it is already done above. */
|
|
|
|
if (cookies == NULL)
|
2014-03-14 02:10:30 +00:00
|
|
|
off = tmpfs_dirent_cookie(de);
|
2007-06-16 01:56:05 +00:00
|
|
|
|
2014-03-14 13:55:48 +00:00
|
|
|
/* Update the offset and cache. */
|
2013-01-06 22:15:44 +00:00
|
|
|
uio->uio_offset = off;
|
|
|
|
node->tn_dir.tn_readdir_lastn = off;
|
|
|
|
node->tn_dir.tn_readdir_lastp = de;
|
|
|
|
|
2019-04-02 13:49:32 +00:00
|
|
|
tmpfs_set_status(tm, node, TMPFS_NODE_ACCESSED);
|
2007-06-16 01:56:05 +00:00
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
2010-08-22 05:36:06 +00:00
|
|
|
int
|
|
|
|
tmpfs_dir_whiteout_add(struct vnode *dvp, struct componentname *cnp)
|
|
|
|
{
|
|
|
|
struct tmpfs_dirent *de;
|
|
|
|
int error;
|
|
|
|
|
|
|
|
error = tmpfs_alloc_dirent(VFS_TO_TMPFS(dvp->v_mount), NULL,
|
|
|
|
cnp->cn_nameptr, cnp->cn_namelen, &de);
|
|
|
|
if (error != 0)
|
|
|
|
return (error);
|
|
|
|
tmpfs_dir_attach(dvp, de);
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
tmpfs_dir_whiteout_remove(struct vnode *dvp, struct componentname *cnp)
|
|
|
|
{
|
|
|
|
struct tmpfs_dirent *de;
|
|
|
|
|
|
|
|
de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(dvp), NULL, cnp);
|
|
|
|
MPASS(de != NULL && de->td_node == NULL);
|
|
|
|
tmpfs_dir_detach(dvp, de);
|
2013-01-06 22:15:44 +00:00
|
|
|
tmpfs_free_dirent(VFS_TO_TMPFS(dvp->v_mount), de);
|
2010-08-22 05:36:06 +00:00
|
|
|
}
|
|
|
|
|
2007-06-16 01:56:05 +00:00
|
|
|
/*
|
2011-02-14 15:36:38 +00:00
|
|
|
* Resizes the aobj associated with the regular file pointed to by 'vp' to the
|
|
|
|
* size 'newsize'. 'vp' must point to a vnode that represents a regular file.
|
|
|
|
* 'newsize' must be positive.
|
2007-06-16 01:56:05 +00:00
|
|
|
*
|
|
|
|
* Returns zero on success or an appropriate error code on failure.
|
|
|
|
*/
|
|
|
|
int
|
2012-01-16 00:26:49 +00:00
|
|
|
tmpfs_reg_resize(struct vnode *vp, off_t newsize, boolean_t ignerr)
|
2007-06-16 01:56:05 +00:00
|
|
|
{
|
|
|
|
struct tmpfs_mount *tmp;
|
|
|
|
struct tmpfs_node *node;
|
2011-02-13 14:46:39 +00:00
|
|
|
vm_object_t uobj;
|
2015-06-12 11:32:20 +00:00
|
|
|
vm_page_t m;
|
Correct an error of omission in the implementation of the truncation
operation on POSIX shared memory objects and tmpfs. Previously, neither of
these modules correctly handled the case in which the new size of the object
or file was not a multiple of the page size. Specifically, they did not
handle partial page truncation of data stored on swap. As a result, stale
data might later be returned to an application.
Interestingly, a data inconsistency was less likely to occur under tmpfs
than POSIX shared memory objects. The reason being that a different mistake
by the tmpfs truncation operation helped avoid a data inconsistency. If the
data was still resident in memory in a PG_CACHED page, then the tmpfs
truncation operation would reactivate that page, zero the truncated portion,
and leave the page pinned in memory. More precisely, the benevolent error
was that the truncation operation didn't add the reactivated page to any of
the paging queues, effectively pinning the page. This page would remain
pinned until the file was destroyed or the page was read or written. With
this change, the page is now added to the inactive queue.
Discussed with: jhb
Reviewed by: kib (an earlier version)
MFC after: 3 weeks
2012-01-08 20:09:26 +00:00
|
|
|
vm_pindex_t idx, newpages, oldpages;
|
2007-06-16 01:56:05 +00:00
|
|
|
off_t oldsize;
|
Correct an error of omission in the implementation of the truncation
operation on POSIX shared memory objects and tmpfs. Previously, neither of
these modules correctly handled the case in which the new size of the object
or file was not a multiple of the page size. Specifically, they did not
handle partial page truncation of data stored on swap. As a result, stale
data might later be returned to an application.
Interestingly, a data inconsistency was less likely to occur under tmpfs
than POSIX shared memory objects. The reason being that a different mistake
by the tmpfs truncation operation helped avoid a data inconsistency. If the
data was still resident in memory in a PG_CACHED page, then the tmpfs
truncation operation would reactivate that page, zero the truncated portion,
and leave the page pinned in memory. More precisely, the benevolent error
was that the truncation operation didn't add the reactivated page to any of
the paging queues, effectively pinning the page. This page would remain
pinned until the file was destroyed or the page was read or written. With
this change, the page is now added to the inactive queue.
Discussed with: jhb
Reviewed by: kib (an earlier version)
MFC after: 3 weeks
2012-01-08 20:09:26 +00:00
|
|
|
int base, rv;
|
2007-06-16 01:56:05 +00:00
|
|
|
|
|
|
|
MPASS(vp->v_type == VREG);
|
|
|
|
MPASS(newsize >= 0);
|
|
|
|
|
|
|
|
node = VP_TO_TMPFS_NODE(vp);
|
2011-02-13 14:46:39 +00:00
|
|
|
uobj = node->tn_reg.tn_aobj;
|
2007-06-16 01:56:05 +00:00
|
|
|
tmp = VFS_TO_TMPFS(vp->v_mount);
|
|
|
|
|
2011-02-14 15:36:38 +00:00
|
|
|
/*
|
|
|
|
* Convert the old and new sizes to the number of pages needed to
|
2007-06-16 01:56:05 +00:00
|
|
|
* store them. It may happen that we do not need to do anything
|
|
|
|
* because the last allocated page can accommodate the change on
|
2011-02-14 15:36:38 +00:00
|
|
|
* its own.
|
|
|
|
*/
|
2007-06-16 01:56:05 +00:00
|
|
|
oldsize = node->tn_size;
|
2011-02-13 14:46:39 +00:00
|
|
|
oldpages = OFF_TO_IDX(oldsize + PAGE_MASK);
|
|
|
|
MPASS(oldpages == uobj->size);
|
|
|
|
newpages = OFF_TO_IDX(newsize + PAGE_MASK);
|
2017-09-30 18:23:45 +00:00
|
|
|
|
|
|
|
if (__predict_true(newpages == oldpages && newsize >= oldsize)) {
|
|
|
|
node->tn_size = newsize;
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
2007-06-16 01:56:05 +00:00
|
|
|
if (newpages > oldpages &&
|
2012-04-07 15:23:51 +00:00
|
|
|
tmpfs_pages_check_avail(tmp, newpages - oldpages) == 0)
|
2011-02-14 15:36:38 +00:00
|
|
|
return (ENOSPC);
|
2007-06-16 01:56:05 +00:00
|
|
|
|
2013-02-20 12:03:20 +00:00
|
|
|
VM_OBJECT_WLOCK(uobj);
|
2007-06-16 01:56:05 +00:00
|
|
|
if (newsize < oldsize) {
|
Correct an error of omission in the implementation of the truncation
operation on POSIX shared memory objects and tmpfs. Previously, neither of
these modules correctly handled the case in which the new size of the object
or file was not a multiple of the page size. Specifically, they did not
handle partial page truncation of data stored on swap. As a result, stale
data might later be returned to an application.
Interestingly, a data inconsistency was less likely to occur under tmpfs
than POSIX shared memory objects. The reason being that a different mistake
by the tmpfs truncation operation helped avoid a data inconsistency. If the
data was still resident in memory in a PG_CACHED page, then the tmpfs
truncation operation would reactivate that page, zero the truncated portion,
and leave the page pinned in memory. More precisely, the benevolent error
was that the truncation operation didn't add the reactivated page to any of
the paging queues, effectively pinning the page. This page would remain
pinned until the file was destroyed or the page was read or written. With
this change, the page is now added to the inactive queue.
Discussed with: jhb
Reviewed by: kib (an earlier version)
MFC after: 3 weeks
2012-01-08 20:09:26 +00:00
|
|
|
/*
|
|
|
|
* Zero the truncated part of the last page.
|
|
|
|
*/
|
|
|
|
base = newsize & PAGE_MASK;
|
|
|
|
if (base != 0) {
|
|
|
|
idx = OFF_TO_IDX(newsize);
|
|
|
|
retry:
|
2019-10-15 03:35:11 +00:00
|
|
|
m = vm_page_grab(uobj, idx, VM_ALLOC_NOCREAT);
|
Correct an error of omission in the implementation of the truncation
operation on POSIX shared memory objects and tmpfs. Previously, neither of
these modules correctly handled the case in which the new size of the object
or file was not a multiple of the page size. Specifically, they did not
handle partial page truncation of data stored on swap. As a result, stale
data might later be returned to an application.
Interestingly, a data inconsistency was less likely to occur under tmpfs
than POSIX shared memory objects. The reason being that a different mistake
by the tmpfs truncation operation helped avoid a data inconsistency. If the
data was still resident in memory in a PG_CACHED page, then the tmpfs
truncation operation would reactivate that page, zero the truncated portion,
and leave the page pinned in memory. More precisely, the benevolent error
was that the truncation operation didn't add the reactivated page to any of
the paging queues, effectively pinning the page. This page would remain
pinned until the file was destroyed or the page was read or written. With
this change, the page is now added to the inactive queue.
Discussed with: jhb
Reviewed by: kib (an earlier version)
MFC after: 3 weeks
2012-01-08 20:09:26 +00:00
|
|
|
if (m != NULL) {
|
2019-10-15 03:45:41 +00:00
|
|
|
MPASS(vm_page_all_valid(m));
|
Correct an error of omission in the implementation of the truncation
operation on POSIX shared memory objects and tmpfs. Previously, neither of
these modules correctly handled the case in which the new size of the object
or file was not a multiple of the page size. Specifically, they did not
handle partial page truncation of data stored on swap. As a result, stale
data might later be returned to an application.
Interestingly, a data inconsistency was less likely to occur under tmpfs
than POSIX shared memory objects. The reason being that a different mistake
by the tmpfs truncation operation helped avoid a data inconsistency. If the
data was still resident in memory in a PG_CACHED page, then the tmpfs
truncation operation would reactivate that page, zero the truncated portion,
and leave the page pinned in memory. More precisely, the benevolent error
was that the truncation operation didn't add the reactivated page to any of
the paging queues, effectively pinning the page. This page would remain
pinned until the file was destroyed or the page was read or written. With
this change, the page is now added to the inactive queue.
Discussed with: jhb
Reviewed by: kib (an earlier version)
MFC after: 3 weeks
2012-01-08 20:09:26 +00:00
|
|
|
} else if (vm_pager_has_page(uobj, idx, NULL, NULL)) {
|
2017-11-08 02:39:37 +00:00
|
|
|
m = vm_page_alloc(uobj, idx, VM_ALLOC_NORMAL |
|
|
|
|
VM_ALLOC_WAITFAIL);
|
|
|
|
if (m == NULL)
|
Correct an error of omission in the implementation of the truncation
operation on POSIX shared memory objects and tmpfs. Previously, neither of
these modules correctly handled the case in which the new size of the object
or file was not a multiple of the page size. Specifically, they did not
handle partial page truncation of data stored on swap. As a result, stale
data might later be returned to an application.
Interestingly, a data inconsistency was less likely to occur under tmpfs
than POSIX shared memory objects. The reason being that a different mistake
by the tmpfs truncation operation helped avoid a data inconsistency. If the
data was still resident in memory in a PG_CACHED page, then the tmpfs
truncation operation would reactivate that page, zero the truncated portion,
and leave the page pinned in memory. More precisely, the benevolent error
was that the truncation operation didn't add the reactivated page to any of
the paging queues, effectively pinning the page. This page would remain
pinned until the file was destroyed or the page was read or written. With
this change, the page is now added to the inactive queue.
Discussed with: jhb
Reviewed by: kib (an earlier version)
MFC after: 3 weeks
2012-01-08 20:09:26 +00:00
|
|
|
goto retry;
|
2016-11-15 18:22:50 +00:00
|
|
|
rv = vm_pager_get_pages(uobj, &m, 1, NULL,
|
|
|
|
NULL);
|
Correct an error of omission in the implementation of the truncation
operation on POSIX shared memory objects and tmpfs. Previously, neither of
these modules correctly handled the case in which the new size of the object
or file was not a multiple of the page size. Specifically, they did not
handle partial page truncation of data stored on swap. As a result, stale
data might later be returned to an application.
Interestingly, a data inconsistency was less likely to occur under tmpfs
than POSIX shared memory objects. The reason being that a different mistake
by the tmpfs truncation operation helped avoid a data inconsistency. If the
data was still resident in memory in a PG_CACHED page, then the tmpfs
truncation operation would reactivate that page, zero the truncated portion,
and leave the page pinned in memory. More precisely, the benevolent error
was that the truncation operation didn't add the reactivated page to any of
the paging queues, effectively pinning the page. This page would remain
pinned until the file was destroyed or the page was read or written. With
this change, the page is now added to the inactive queue.
Discussed with: jhb
Reviewed by: kib (an earlier version)
MFC after: 3 weeks
2012-01-08 20:09:26 +00:00
|
|
|
if (rv == VM_PAGER_OK) {
|
2016-12-11 19:24:41 +00:00
|
|
|
/*
|
|
|
|
* Since the page was not resident,
|
|
|
|
* and therefore not recently
|
|
|
|
* accessed, immediately enqueue it
|
|
|
|
* for asynchronous laundering. The
|
|
|
|
* current operation is not regarded
|
|
|
|
* as an access.
|
|
|
|
*/
|
Change synchonization rules for vm_page reference counting.
There are several mechanisms by which a vm_page reference is held,
preventing the page from being freed back to the page allocator. In
particular, holding the page's object lock is sufficient to prevent the
page from being freed; holding the busy lock or a wiring is sufficent as
well. These references are protected by the page lock, which must
therefore be acquired for many per-page operations. This results in
false sharing since the page locks are external to the vm_page
structures themselves and each lock protects multiple structures.
Transition to using an atomically updated per-page reference counter.
The object's reference is counted using a flag bit in the counter. A
second flag bit is used to atomically block new references via
pmap_extract_and_hold() while removing managed mappings of a page.
Thus, the reference count of a page is guaranteed not to increase if the
page is unbusied, unmapped, and the object's write lock is held. As
a consequence of this, the page lock no longer protects a page's
identity; operations which move pages between objects are now
synchronized solely by the objects' locks.
The vm_page_wire() and vm_page_unwire() KPIs are changed. The former
requires that either the object lock or the busy lock is held. The
latter no longer has a return value and may free the page if it releases
the last reference to that page. vm_page_unwire_noq() behaves the same
as before; the caller is responsible for checking its return value and
freeing or enqueuing the page as appropriate. vm_page_wire_mapped() is
introduced for use in pmap_extract_and_hold(). It fails if the page is
concurrently being unmapped, typically triggering a fallback to the
fault handler. vm_page_wire() no longer requires the page lock and
vm_page_unwire() now internally acquires the page lock when releasing
the last wiring of a page (since the page lock still protects a page's
queue state). In particular, synchronization details are no longer
leaked into the caller.
The change excises the page lock from several frequently executed code
paths. In particular, vm_object_terminate() no longer bounces between
page locks as it releases an object's pages, and direct I/O and
sendfile(SF_NOCACHE) completions no longer require the page lock. In
these latter cases we now get linear scalability in the common scenario
where different threads are operating on different files.
__FreeBSD_version is bumped. The DRM ports have been updated to
accomodate the KPI changes.
Reviewed by: jeff (earlier version)
Tested by: gallatin (earlier version), pho
Sponsored by: Netflix
Differential Revision: https://reviews.freebsd.org/D20486
2019-09-09 21:32:42 +00:00
|
|
|
vm_page_lock(m);
|
2016-12-11 19:24:41 +00:00
|
|
|
vm_page_launder(m);
|
Correct an error of omission in the implementation of the truncation
operation on POSIX shared memory objects and tmpfs. Previously, neither of
these modules correctly handled the case in which the new size of the object
or file was not a multiple of the page size. Specifically, they did not
handle partial page truncation of data stored on swap. As a result, stale
data might later be returned to an application.
Interestingly, a data inconsistency was less likely to occur under tmpfs
than POSIX shared memory objects. The reason being that a different mistake
by the tmpfs truncation operation helped avoid a data inconsistency. If the
data was still resident in memory in a PG_CACHED page, then the tmpfs
truncation operation would reactivate that page, zero the truncated portion,
and leave the page pinned in memory. More precisely, the benevolent error
was that the truncation operation didn't add the reactivated page to any of
the paging queues, effectively pinning the page. This page would remain
pinned until the file was destroyed or the page was read or written. With
this change, the page is now added to the inactive queue.
Discussed with: jhb
Reviewed by: kib (an earlier version)
MFC after: 3 weeks
2012-01-08 20:09:26 +00:00
|
|
|
vm_page_unlock(m);
|
|
|
|
} else {
|
|
|
|
vm_page_free(m);
|
2012-01-16 00:26:49 +00:00
|
|
|
if (ignerr)
|
|
|
|
m = NULL;
|
|
|
|
else {
|
2013-02-20 12:03:20 +00:00
|
|
|
VM_OBJECT_WUNLOCK(uobj);
|
2012-01-16 00:26:49 +00:00
|
|
|
return (EIO);
|
|
|
|
}
|
Correct an error of omission in the implementation of the truncation
operation on POSIX shared memory objects and tmpfs. Previously, neither of
these modules correctly handled the case in which the new size of the object
or file was not a multiple of the page size. Specifically, they did not
handle partial page truncation of data stored on swap. As a result, stale
data might later be returned to an application.
Interestingly, a data inconsistency was less likely to occur under tmpfs
than POSIX shared memory objects. The reason being that a different mistake
by the tmpfs truncation operation helped avoid a data inconsistency. If the
data was still resident in memory in a PG_CACHED page, then the tmpfs
truncation operation would reactivate that page, zero the truncated portion,
and leave the page pinned in memory. More precisely, the benevolent error
was that the truncation operation didn't add the reactivated page to any of
the paging queues, effectively pinning the page. This page would remain
pinned until the file was destroyed or the page was read or written. With
this change, the page is now added to the inactive queue.
Discussed with: jhb
Reviewed by: kib (an earlier version)
MFC after: 3 weeks
2012-01-08 20:09:26 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
if (m != NULL) {
|
|
|
|
pmap_zero_page_area(m, base, PAGE_SIZE - base);
|
2019-12-15 03:15:06 +00:00
|
|
|
vm_page_set_dirty(m);
|
2019-10-15 03:35:11 +00:00
|
|
|
vm_page_xunbusy(m);
|
Correct an error of omission in the implementation of the truncation
operation on POSIX shared memory objects and tmpfs. Previously, neither of
these modules correctly handled the case in which the new size of the object
or file was not a multiple of the page size. Specifically, they did not
handle partial page truncation of data stored on swap. As a result, stale
data might later be returned to an application.
Interestingly, a data inconsistency was less likely to occur under tmpfs
than POSIX shared memory objects. The reason being that a different mistake
by the tmpfs truncation operation helped avoid a data inconsistency. If the
data was still resident in memory in a PG_CACHED page, then the tmpfs
truncation operation would reactivate that page, zero the truncated portion,
and leave the page pinned in memory. More precisely, the benevolent error
was that the truncation operation didn't add the reactivated page to any of
the paging queues, effectively pinning the page. This page would remain
pinned until the file was destroyed or the page was read or written. With
this change, the page is now added to the inactive queue.
Discussed with: jhb
Reviewed by: kib (an earlier version)
MFC after: 3 weeks
2012-01-08 20:09:26 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2007-06-16 01:56:05 +00:00
|
|
|
/*
|
2011-02-14 15:36:38 +00:00
|
|
|
* Release any swap space and free any whole pages.
|
2007-06-16 01:56:05 +00:00
|
|
|
*/
|
|
|
|
if (newpages < oldpages) {
|
2011-02-13 14:46:39 +00:00
|
|
|
swap_pager_freespace(uobj, newpages, oldpages -
|
|
|
|
newpages);
|
2011-06-29 16:40:41 +00:00
|
|
|
vm_object_page_remove(uobj, newpages, 0, 0);
|
2007-06-16 01:56:05 +00:00
|
|
|
}
|
|
|
|
}
|
2011-02-13 14:46:39 +00:00
|
|
|
uobj->size = newpages;
|
2013-02-20 12:03:20 +00:00
|
|
|
VM_OBJECT_WUNLOCK(uobj);
|
Correct an error of omission in the implementation of the truncation
operation on POSIX shared memory objects and tmpfs. Previously, neither of
these modules correctly handled the case in which the new size of the object
or file was not a multiple of the page size. Specifically, they did not
handle partial page truncation of data stored on swap. As a result, stale
data might later be returned to an application.
Interestingly, a data inconsistency was less likely to occur under tmpfs
than POSIX shared memory objects. The reason being that a different mistake
by the tmpfs truncation operation helped avoid a data inconsistency. If the
data was still resident in memory in a PG_CACHED page, then the tmpfs
truncation operation would reactivate that page, zero the truncated portion,
and leave the page pinned in memory. More precisely, the benevolent error
was that the truncation operation didn't add the reactivated page to any of
the paging queues, effectively pinning the page. This page would remain
pinned until the file was destroyed or the page was read or written. With
this change, the page is now added to the inactive queue.
Discussed with: jhb
Reviewed by: kib (an earlier version)
MFC after: 3 weeks
2012-01-08 20:09:26 +00:00
|
|
|
|
2017-01-14 06:20:36 +00:00
|
|
|
atomic_add_long(&tmp->tm_pages_used, newpages - oldpages);
|
Correct an error of omission in the implementation of the truncation
operation on POSIX shared memory objects and tmpfs. Previously, neither of
these modules correctly handled the case in which the new size of the object
or file was not a multiple of the page size. Specifically, they did not
handle partial page truncation of data stored on swap. As a result, stale
data might later be returned to an application.
Interestingly, a data inconsistency was less likely to occur under tmpfs
than POSIX shared memory objects. The reason being that a different mistake
by the tmpfs truncation operation helped avoid a data inconsistency. If the
data was still resident in memory in a PG_CACHED page, then the tmpfs
truncation operation would reactivate that page, zero the truncated portion,
and leave the page pinned in memory. More precisely, the benevolent error
was that the truncation operation didn't add the reactivated page to any of
the paging queues, effectively pinning the page. This page would remain
pinned until the file was destroyed or the page was read or written. With
this change, the page is now added to the inactive queue.
Discussed with: jhb
Reviewed by: kib (an earlier version)
MFC after: 3 weeks
2012-01-08 20:09:26 +00:00
|
|
|
|
|
|
|
node->tn_size = newsize;
|
2011-02-14 15:36:38 +00:00
|
|
|
return (0);
|
2007-06-16 01:56:05 +00:00
|
|
|
}
|
|
|
|
|
2015-01-28 10:37:23 +00:00
|
|
|
void
|
|
|
|
tmpfs_check_mtime(struct vnode *vp)
|
|
|
|
{
|
|
|
|
struct tmpfs_node *node;
|
|
|
|
struct vm_object *obj;
|
|
|
|
|
|
|
|
ASSERT_VOP_ELOCKED(vp, "check_mtime");
|
|
|
|
if (vp->v_type != VREG)
|
|
|
|
return;
|
|
|
|
obj = vp->v_object;
|
|
|
|
KASSERT((obj->flags & (OBJ_TMPFS_NODE | OBJ_TMPFS)) ==
|
|
|
|
(OBJ_TMPFS_NODE | OBJ_TMPFS), ("non-tmpfs obj"));
|
|
|
|
/* unlocked read */
|
2019-10-29 21:06:34 +00:00
|
|
|
if (obj->generation != obj->cleangeneration) {
|
2015-01-28 10:37:23 +00:00
|
|
|
VM_OBJECT_WLOCK(obj);
|
2019-10-29 21:06:34 +00:00
|
|
|
if (obj->generation != obj->cleangeneration) {
|
|
|
|
obj->cleangeneration = obj->generation;
|
2015-01-28 10:37:23 +00:00
|
|
|
node = VP_TO_TMPFS_NODE(vp);
|
2015-01-31 12:27:18 +00:00
|
|
|
node->tn_status |= TMPFS_NODE_MODIFIED |
|
|
|
|
TMPFS_NODE_CHANGED;
|
2015-01-28 10:37:23 +00:00
|
|
|
}
|
|
|
|
VM_OBJECT_WUNLOCK(obj);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2007-06-16 01:56:05 +00:00
|
|
|
/*
|
|
|
|
* Change flags of the given vnode.
|
|
|
|
* Caller should execute tmpfs_update on vp after a successful execution.
|
|
|
|
* The vnode must be locked on entry and remain locked on exit.
|
|
|
|
*/
|
|
|
|
int
|
2013-03-21 22:44:33 +00:00
|
|
|
tmpfs_chflags(struct vnode *vp, u_long flags, struct ucred *cred,
|
|
|
|
struct thread *p)
|
2007-06-16 01:56:05 +00:00
|
|
|
{
|
|
|
|
int error;
|
|
|
|
struct tmpfs_node *node;
|
|
|
|
|
2017-01-06 17:32:44 +00:00
|
|
|
ASSERT_VOP_ELOCKED(vp, "chflags");
|
2007-06-16 01:56:05 +00:00
|
|
|
|
|
|
|
node = VP_TO_TMPFS_NODE(vp);
|
|
|
|
|
2013-08-28 22:12:56 +00:00
|
|
|
if ((flags & ~(SF_APPEND | SF_ARCHIVED | SF_IMMUTABLE | SF_NOUNLINK |
|
|
|
|
UF_APPEND | UF_ARCHIVE | UF_HIDDEN | UF_IMMUTABLE | UF_NODUMP |
|
|
|
|
UF_NOUNLINK | UF_OFFLINE | UF_OPAQUE | UF_READONLY | UF_REPARSE |
|
|
|
|
UF_SPARSE | UF_SYSTEM)) != 0)
|
2012-04-16 18:10:34 +00:00
|
|
|
return (EOPNOTSUPP);
|
|
|
|
|
2007-06-16 01:56:05 +00:00
|
|
|
/* Disallow this operation if the file system is mounted read-only. */
|
|
|
|
if (vp->v_mount->mnt_flag & MNT_RDONLY)
|
|
|
|
return EROFS;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Callers may only modify the file flags on objects they
|
|
|
|
* have VADMIN rights for.
|
|
|
|
*/
|
|
|
|
if ((error = VOP_ACCESS(vp, VADMIN, cred, p)))
|
|
|
|
return (error);
|
|
|
|
/*
|
|
|
|
* Unprivileged processes are not permitted to unset system
|
|
|
|
* flags, or modify flags if any system flags are set.
|
|
|
|
*/
|
2018-12-11 19:32:16 +00:00
|
|
|
if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS)) {
|
2012-04-16 18:10:34 +00:00
|
|
|
if (node->tn_flags &
|
|
|
|
(SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) {
|
2007-06-16 01:56:05 +00:00
|
|
|
error = securelevel_gt(cred, 0);
|
|
|
|
if (error)
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
} else {
|
2012-04-16 18:10:34 +00:00
|
|
|
if (node->tn_flags &
|
|
|
|
(SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) ||
|
|
|
|
((flags ^ node->tn_flags) & SF_SETTABLE))
|
2007-06-16 01:56:05 +00:00
|
|
|
return (EPERM);
|
|
|
|
}
|
2012-04-16 18:10:34 +00:00
|
|
|
node->tn_flags = flags;
|
2007-06-16 01:56:05 +00:00
|
|
|
node->tn_status |= TMPFS_NODE_CHANGED;
|
|
|
|
|
2017-01-06 17:32:44 +00:00
|
|
|
ASSERT_VOP_ELOCKED(vp, "chflags2");
|
2007-06-16 01:56:05 +00:00
|
|
|
|
2017-01-06 17:32:44 +00:00
|
|
|
return (0);
|
2007-06-16 01:56:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Change access mode on the given vnode.
|
|
|
|
* Caller should execute tmpfs_update on vp after a successful execution.
|
|
|
|
* The vnode must be locked on entry and remain locked on exit.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
tmpfs_chmod(struct vnode *vp, mode_t mode, struct ucred *cred, struct thread *p)
|
|
|
|
{
|
|
|
|
int error;
|
|
|
|
struct tmpfs_node *node;
|
|
|
|
|
2017-01-06 17:32:44 +00:00
|
|
|
ASSERT_VOP_ELOCKED(vp, "chmod");
|
2007-06-16 01:56:05 +00:00
|
|
|
|
|
|
|
node = VP_TO_TMPFS_NODE(vp);
|
|
|
|
|
|
|
|
/* Disallow this operation if the file system is mounted read-only. */
|
|
|
|
if (vp->v_mount->mnt_flag & MNT_RDONLY)
|
|
|
|
return EROFS;
|
|
|
|
|
|
|
|
/* Immutable or append-only files cannot be modified, either. */
|
|
|
|
if (node->tn_flags & (IMMUTABLE | APPEND))
|
|
|
|
return EPERM;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* To modify the permissions on a file, must possess VADMIN
|
|
|
|
* for that file.
|
|
|
|
*/
|
|
|
|
if ((error = VOP_ACCESS(vp, VADMIN, cred, p)))
|
|
|
|
return (error);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Privileged processes may set the sticky bit on non-directories,
|
|
|
|
* as well as set the setgid bit on a file with a group that the
|
|
|
|
* process is not a member of.
|
|
|
|
*/
|
|
|
|
if (vp->v_type != VDIR && (mode & S_ISTXT)) {
|
2018-12-11 19:32:16 +00:00
|
|
|
if (priv_check_cred(cred, PRIV_VFS_STICKYFILE))
|
2007-06-16 01:56:05 +00:00
|
|
|
return (EFTYPE);
|
|
|
|
}
|
|
|
|
if (!groupmember(node->tn_gid, cred) && (mode & S_ISGID)) {
|
2018-12-11 19:32:16 +00:00
|
|
|
error = priv_check_cred(cred, PRIV_VFS_SETGID);
|
2007-06-16 01:56:05 +00:00
|
|
|
if (error)
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
node->tn_mode &= ~ALLPERMS;
|
|
|
|
node->tn_mode |= mode & ALLPERMS;
|
|
|
|
|
|
|
|
node->tn_status |= TMPFS_NODE_CHANGED;
|
|
|
|
|
2017-01-06 17:32:44 +00:00
|
|
|
ASSERT_VOP_ELOCKED(vp, "chmod2");
|
2007-06-16 01:56:05 +00:00
|
|
|
|
2017-01-06 17:32:44 +00:00
|
|
|
return (0);
|
2007-06-16 01:56:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Change ownership of the given vnode. At least one of uid or gid must
|
|
|
|
* be different than VNOVAL. If one is set to that value, the attribute
|
|
|
|
* is unchanged.
|
|
|
|
* Caller should execute tmpfs_update on vp after a successful execution.
|
|
|
|
* The vnode must be locked on entry and remain locked on exit.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
tmpfs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred,
|
|
|
|
struct thread *p)
|
|
|
|
{
|
|
|
|
int error;
|
|
|
|
struct tmpfs_node *node;
|
|
|
|
uid_t ouid;
|
|
|
|
gid_t ogid;
|
|
|
|
|
2017-01-06 17:32:44 +00:00
|
|
|
ASSERT_VOP_ELOCKED(vp, "chown");
|
2007-06-16 01:56:05 +00:00
|
|
|
|
|
|
|
node = VP_TO_TMPFS_NODE(vp);
|
|
|
|
|
|
|
|
/* Assign default values if they are unknown. */
|
|
|
|
MPASS(uid != VNOVAL || gid != VNOVAL);
|
|
|
|
if (uid == VNOVAL)
|
|
|
|
uid = node->tn_uid;
|
|
|
|
if (gid == VNOVAL)
|
|
|
|
gid = node->tn_gid;
|
|
|
|
MPASS(uid != VNOVAL && gid != VNOVAL);
|
|
|
|
|
|
|
|
/* Disallow this operation if the file system is mounted read-only. */
|
|
|
|
if (vp->v_mount->mnt_flag & MNT_RDONLY)
|
|
|
|
return EROFS;
|
|
|
|
|
|
|
|
/* Immutable or append-only files cannot be modified, either. */
|
|
|
|
if (node->tn_flags & (IMMUTABLE | APPEND))
|
|
|
|
return EPERM;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* To modify the ownership of a file, must possess VADMIN for that
|
|
|
|
* file.
|
|
|
|
*/
|
|
|
|
if ((error = VOP_ACCESS(vp, VADMIN, cred, p)))
|
|
|
|
return (error);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* To change the owner of a file, or change the group of a file to a
|
|
|
|
* group of which we are not a member, the caller must have
|
|
|
|
* privilege.
|
|
|
|
*/
|
2007-06-28 02:39:31 +00:00
|
|
|
if ((uid != node->tn_uid ||
|
2007-06-16 01:56:05 +00:00
|
|
|
(gid != node->tn_gid && !groupmember(gid, cred))) &&
|
2018-12-11 19:32:16 +00:00
|
|
|
(error = priv_check_cred(cred, PRIV_VFS_CHOWN)))
|
2007-06-16 01:56:05 +00:00
|
|
|
return (error);
|
|
|
|
|
|
|
|
ogid = node->tn_gid;
|
|
|
|
ouid = node->tn_uid;
|
|
|
|
|
|
|
|
node->tn_uid = uid;
|
|
|
|
node->tn_gid = gid;
|
|
|
|
|
|
|
|
node->tn_status |= TMPFS_NODE_CHANGED;
|
|
|
|
|
|
|
|
if ((node->tn_mode & (S_ISUID | S_ISGID)) && (ouid != uid || ogid != gid)) {
|
2018-12-11 19:32:16 +00:00
|
|
|
if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID))
|
2007-06-16 01:56:05 +00:00
|
|
|
node->tn_mode &= ~(S_ISUID | S_ISGID);
|
|
|
|
}
|
|
|
|
|
2017-01-06 17:32:44 +00:00
|
|
|
ASSERT_VOP_ELOCKED(vp, "chown2");
|
2007-06-16 01:56:05 +00:00
|
|
|
|
2017-01-06 17:32:44 +00:00
|
|
|
return (0);
|
2007-06-16 01:56:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Change size of the given vnode.
|
|
|
|
* Caller should execute tmpfs_update on vp after a successful execution.
|
|
|
|
* The vnode must be locked on entry and remain locked on exit.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
tmpfs_chsize(struct vnode *vp, u_quad_t size, struct ucred *cred,
|
|
|
|
struct thread *p)
|
|
|
|
{
|
|
|
|
int error;
|
|
|
|
struct tmpfs_node *node;
|
|
|
|
|
2017-01-06 17:32:44 +00:00
|
|
|
ASSERT_VOP_ELOCKED(vp, "chsize");
|
2007-06-16 01:56:05 +00:00
|
|
|
|
|
|
|
node = VP_TO_TMPFS_NODE(vp);
|
|
|
|
|
|
|
|
/* Decide whether this is a valid operation based on the file type. */
|
|
|
|
error = 0;
|
|
|
|
switch (vp->v_type) {
|
|
|
|
case VDIR:
|
|
|
|
return EISDIR;
|
|
|
|
|
|
|
|
case VREG:
|
|
|
|
if (vp->v_mount->mnt_flag & MNT_RDONLY)
|
|
|
|
return EROFS;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case VBLK:
|
|
|
|
/* FALLTHROUGH */
|
|
|
|
case VCHR:
|
|
|
|
/* FALLTHROUGH */
|
|
|
|
case VFIFO:
|
|
|
|
/* Allow modifications of special files even if in the file
|
|
|
|
* system is mounted read-only (we are not modifying the
|
|
|
|
* files themselves, but the objects they represent). */
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
default:
|
|
|
|
/* Anything else is unsupported. */
|
|
|
|
return EOPNOTSUPP;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Immutable or append-only files cannot be modified, either. */
|
|
|
|
if (node->tn_flags & (IMMUTABLE | APPEND))
|
|
|
|
return EPERM;
|
|
|
|
|
|
|
|
error = tmpfs_truncate(vp, size);
|
|
|
|
/* tmpfs_truncate will raise the NOTE_EXTEND and NOTE_ATTRIB kevents
|
|
|
|
* for us, as will update tn_status; no need to do that here. */
|
|
|
|
|
2017-01-06 17:32:44 +00:00
|
|
|
ASSERT_VOP_ELOCKED(vp, "chsize2");
|
2007-06-16 01:56:05 +00:00
|
|
|
|
2017-01-06 17:32:44 +00:00
|
|
|
return (error);
|
2007-06-16 01:56:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Change access and modification times of the given vnode.
|
|
|
|
* Caller should execute tmpfs_update on vp after a successful execution.
|
|
|
|
* The vnode must be locked on entry and remain locked on exit.
|
|
|
|
*/
|
|
|
|
int
|
2014-06-17 07:11:00 +00:00
|
|
|
tmpfs_chtimes(struct vnode *vp, struct vattr *vap,
|
|
|
|
struct ucred *cred, struct thread *l)
|
2007-06-16 01:56:05 +00:00
|
|
|
{
|
|
|
|
int error;
|
|
|
|
struct tmpfs_node *node;
|
|
|
|
|
2017-01-06 17:32:44 +00:00
|
|
|
ASSERT_VOP_ELOCKED(vp, "chtimes");
|
2007-06-16 01:56:05 +00:00
|
|
|
|
|
|
|
node = VP_TO_TMPFS_NODE(vp);
|
|
|
|
|
|
|
|
/* Disallow this operation if the file system is mounted read-only. */
|
|
|
|
if (vp->v_mount->mnt_flag & MNT_RDONLY)
|
|
|
|
return EROFS;
|
|
|
|
|
|
|
|
/* Immutable or append-only files cannot be modified, either. */
|
|
|
|
if (node->tn_flags & (IMMUTABLE | APPEND))
|
|
|
|
return EPERM;
|
|
|
|
|
2014-06-17 07:11:00 +00:00
|
|
|
error = vn_utimes_perm(vp, vap, cred, l);
|
|
|
|
if (error != 0)
|
2007-06-29 05:23:15 +00:00
|
|
|
return (error);
|
2007-06-16 01:56:05 +00:00
|
|
|
|
2015-07-26 08:33:46 +00:00
|
|
|
if (vap->va_atime.tv_sec != VNOVAL)
|
2007-06-16 01:56:05 +00:00
|
|
|
node->tn_status |= TMPFS_NODE_ACCESSED;
|
|
|
|
|
2015-07-26 08:33:46 +00:00
|
|
|
if (vap->va_mtime.tv_sec != VNOVAL)
|
2007-06-16 01:56:05 +00:00
|
|
|
node->tn_status |= TMPFS_NODE_MODIFIED;
|
|
|
|
|
2015-07-26 08:33:46 +00:00
|
|
|
if (vap->va_birthtime.tv_sec != VNOVAL)
|
2007-06-16 01:56:05 +00:00
|
|
|
node->tn_status |= TMPFS_NODE_MODIFIED;
|
2007-06-28 02:39:31 +00:00
|
|
|
|
2014-06-17 07:11:00 +00:00
|
|
|
tmpfs_itimes(vp, &vap->va_atime, &vap->va_mtime);
|
2007-06-28 02:39:31 +00:00
|
|
|
|
2015-07-26 08:33:46 +00:00
|
|
|
if (vap->va_birthtime.tv_sec != VNOVAL)
|
2014-06-17 07:11:00 +00:00
|
|
|
node->tn_birthtime = vap->va_birthtime;
|
2017-01-06 17:32:44 +00:00
|
|
|
ASSERT_VOP_ELOCKED(vp, "chtimes2");
|
2007-06-16 01:56:05 +00:00
|
|
|
|
2017-01-06 17:32:44 +00:00
|
|
|
return (0);
|
2007-06-16 01:56:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2019-04-02 13:49:32 +00:00
|
|
|
tmpfs_set_status(struct tmpfs_mount *tm, struct tmpfs_node *node, int status)
|
2017-01-06 17:43:36 +00:00
|
|
|
{
|
|
|
|
|
2019-04-02 13:49:32 +00:00
|
|
|
if ((node->tn_status & status) == status || tm->tm_ronly)
|
2017-01-06 17:43:36 +00:00
|
|
|
return;
|
|
|
|
TMPFS_NODE_LOCK(node);
|
|
|
|
node->tn_status |= status;
|
|
|
|
TMPFS_NODE_UNLOCK(node);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Sync timestamps */
|
2017-01-06 19:58:20 +00:00
|
|
|
void
|
|
|
|
tmpfs_itimes(struct vnode *vp, const struct timespec *acc,
|
2007-06-16 01:56:05 +00:00
|
|
|
const struct timespec *mod)
|
|
|
|
{
|
2017-01-06 19:58:20 +00:00
|
|
|
struct tmpfs_node *node;
|
2007-06-16 01:56:05 +00:00
|
|
|
struct timespec now;
|
|
|
|
|
2017-01-06 19:58:20 +00:00
|
|
|
ASSERT_VOP_LOCKED(vp, "tmpfs_itimes");
|
|
|
|
node = VP_TO_TMPFS_NODE(vp);
|
2007-06-16 01:56:05 +00:00
|
|
|
|
|
|
|
if ((node->tn_status & (TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED |
|
|
|
|
TMPFS_NODE_CHANGED)) == 0)
|
|
|
|
return;
|
|
|
|
|
2007-06-18 14:40:19 +00:00
|
|
|
vfs_timestamp(&now);
|
2017-01-06 19:58:20 +00:00
|
|
|
TMPFS_NODE_LOCK(node);
|
2007-06-16 01:56:05 +00:00
|
|
|
if (node->tn_status & TMPFS_NODE_ACCESSED) {
|
|
|
|
if (acc == NULL)
|
|
|
|
acc = &now;
|
|
|
|
node->tn_atime = *acc;
|
|
|
|
}
|
|
|
|
if (node->tn_status & TMPFS_NODE_MODIFIED) {
|
|
|
|
if (mod == NULL)
|
|
|
|
mod = &now;
|
|
|
|
node->tn_mtime = *mod;
|
|
|
|
}
|
2017-01-06 17:43:36 +00:00
|
|
|
if (node->tn_status & TMPFS_NODE_CHANGED)
|
2007-06-16 01:56:05 +00:00
|
|
|
node->tn_ctime = now;
|
2017-01-06 17:43:36 +00:00
|
|
|
node->tn_status &= ~(TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED |
|
|
|
|
TMPFS_NODE_CHANGED);
|
|
|
|
TMPFS_NODE_UNLOCK(node);
|
|
|
|
|
Huge cleanup of random(4) code.
* GENERAL
- Update copyright.
- Make kernel options for RANDOM_YARROW and RANDOM_DUMMY. Set
neither to ON, which means we want Fortuna
- If there is no 'device random' in the kernel, there will be NO
random(4) device in the kernel, and the KERN_ARND sysctl will
return nothing. With RANDOM_DUMMY there will be a random(4) that
always blocks.
- Repair kern.arandom (KERN_ARND sysctl). The old version went
through arc4random(9) and was a bit weird.
- Adjust arc4random stirring a bit - the existing code looks a little
suspect.
- Fix the nasty pre- and post-read overloading by providing explictit
functions to do these tasks.
- Redo read_random(9) so as to duplicate random(4)'s read internals.
This makes it a first-class citizen rather than a hack.
- Move stuff out of locked regions when it does not need to be
there.
- Trim RANDOM_DEBUG printfs. Some are excess to requirement, some
behind boot verbose.
- Use SYSINIT to sequence the startup.
- Fix init/deinit sysctl stuff.
- Make relevant sysctls also tunables.
- Add different harvesting "styles" to allow for different requirements
(direct, queue, fast).
- Add harvesting of FFS atime events. This needs to be checked for
weighing down the FS code.
- Add harvesting of slab allocator events. This needs to be checked for
weighing down the allocator code.
- Fix the random(9) manpage.
- Loadable modules are not present for now. These will be re-engineered
when the dust settles.
- Use macros for locks.
- Fix comments.
* src/share/man/...
- Update the man pages.
* src/etc/...
- The startup/shutdown work is done in D2924.
* src/UPDATING
- Add UPDATING announcement.
* src/sys/dev/random/build.sh
- Add copyright.
- Add libz for unit tests.
* src/sys/dev/random/dummy.c
- Remove; no longer needed. Functionality incorporated into randomdev.*.
* live_entropy_sources.c live_entropy_sources.h
- Remove; content moved.
- move content to randomdev.[ch] and optimise.
* src/sys/dev/random/random_adaptors.c src/sys/dev/random/random_adaptors.h
- Remove; plugability is no longer used. Compile-time algorithm
selection is the way to go.
* src/sys/dev/random/random_harvestq.c src/sys/dev/random/random_harvestq.h
- Add early (re)boot-time randomness caching.
* src/sys/dev/random/randomdev_soft.c src/sys/dev/random/randomdev_soft.h
- Remove; no longer needed.
* src/sys/dev/random/uint128.h
- Provide a fake uint128_t; if a real one ever arrived, we can use
that instead. All that is needed here is N=0, N++, N==0, and some
localised trickery is used to manufacture a 128-bit 0ULLL.
* src/sys/dev/random/unit_test.c src/sys/dev/random/unit_test.h
- Improve unit tests; previously the testing human needed clairvoyance;
now the test will do a basic check of compressibility. Clairvoyant
talent is still a good idea.
- This is still a long way off a proper unit test.
* src/sys/dev/random/fortuna.c src/sys/dev/random/fortuna.h
- Improve messy union to just uint128_t.
- Remove unneeded 'static struct fortuna_start_cache'.
- Tighten up up arithmetic.
- Provide a method to allow eternal junk to be introduced; harden
it against blatant by compress/hashing.
- Assert that locks are held correctly.
- Fix the nasty pre- and post-read overloading by providing explictit
functions to do these tasks.
- Turn into self-sufficient module (no longer requires randomdev_soft.[ch])
* src/sys/dev/random/yarrow.c src/sys/dev/random/yarrow.h
- Improve messy union to just uint128_t.
- Remove unneeded 'staic struct start_cache'.
- Tighten up up arithmetic.
- Provide a method to allow eternal junk to be introduced; harden
it against blatant by compress/hashing.
- Assert that locks are held correctly.
- Fix the nasty pre- and post-read overloading by providing explictit
functions to do these tasks.
- Turn into self-sufficient module (no longer requires randomdev_soft.[ch])
- Fix some magic numbers elsewhere used as FAST and SLOW.
Differential Revision: https://reviews.freebsd.org/D2025
Reviewed by: vsevolod,delphij,rwatson,trasz,jmg
Approved by: so (delphij)
2015-06-30 17:00:45 +00:00
|
|
|
/* XXX: FIX? The entropy here is desirable, but the harvesting may be expensive */
|
2018-08-26 12:51:46 +00:00
|
|
|
random_harvest_queue(node, sizeof(*node), RANDOM_FS_ATIME);
|
2007-06-16 01:56:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
tmpfs_update(struct vnode *vp)
|
|
|
|
{
|
|
|
|
|
|
|
|
tmpfs_itimes(vp, NULL, NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
tmpfs_truncate(struct vnode *vp, off_t length)
|
|
|
|
{
|
|
|
|
int error;
|
|
|
|
struct tmpfs_node *node;
|
|
|
|
|
|
|
|
node = VP_TO_TMPFS_NODE(vp);
|
|
|
|
|
|
|
|
if (length < 0) {
|
|
|
|
error = EINVAL;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (node->tn_size == length) {
|
|
|
|
error = 0;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (length > VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize)
|
|
|
|
return (EFBIG);
|
|
|
|
|
2012-01-16 00:26:49 +00:00
|
|
|
error = tmpfs_reg_resize(vp, length, FALSE);
|
2017-01-06 17:43:36 +00:00
|
|
|
if (error == 0)
|
2007-06-16 01:56:05 +00:00
|
|
|
node->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED;
|
|
|
|
|
|
|
|
out:
|
|
|
|
tmpfs_update(vp);
|
|
|
|
|
2017-01-06 17:43:36 +00:00
|
|
|
return (error);
|
2007-06-16 01:56:05 +00:00
|
|
|
}
|
2013-01-06 22:15:44 +00:00
|
|
|
|
|
|
|
static __inline int
|
|
|
|
tmpfs_dirtree_cmp(struct tmpfs_dirent *a, struct tmpfs_dirent *b)
|
|
|
|
{
|
|
|
|
if (a->td_hash > b->td_hash)
|
|
|
|
return (1);
|
|
|
|
else if (a->td_hash < b->td_hash)
|
|
|
|
return (-1);
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
RB_GENERATE_STATIC(tmpfs_dir, tmpfs_dirent, uh.td_entries, tmpfs_dirtree_cmp);
|