2005-01-06 23:35:40 +00:00
|
|
|
/*-
|
2017-11-20 19:43:44 +00:00
|
|
|
* SPDX-License-Identifier: BSD-3-Clause
|
|
|
|
*
|
1994-05-24 10:09:53 +00:00
|
|
|
* Copyright (c) 1982, 1986, 1989, 1993
|
|
|
|
* The Regents of the University of California. All rights reserved.
|
|
|
|
* (c) UNIX System Laboratories, Inc.
|
|
|
|
* All or some portions of this file are derived from material licensed
|
|
|
|
* to the University of California by American Telephone and Telegraph
|
|
|
|
* Co. or Unix System Laboratories, Inc. and are reproduced herein with
|
|
|
|
* the permission of UNIX System Laboratories, Inc.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
2016-09-15 13:16:20 +00:00
|
|
|
* 3. Neither the name of the University nor the names of its contributors
|
1994-05-24 10:09:53 +00:00
|
|
|
* may be used to endorse or promote products derived from this software
|
|
|
|
* without specific prior written permission.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
* SUCH DAMAGE.
|
|
|
|
*
|
|
|
|
* @(#)vfs_lookup.c 8.4 (Berkeley) 2/16/94
|
|
|
|
*/
|
|
|
|
|
2003-06-11 00:56:59 +00:00
|
|
|
#include <sys/cdefs.h>
|
|
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
|
2011-08-11 12:30:23 +00:00
|
|
|
#include "opt_capsicum.h"
|
1996-01-03 21:42:35 +00:00
|
|
|
#include "opt_ktrace.h"
|
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
#include <sys/param.h>
|
1994-08-18 22:36:09 +00:00
|
|
|
#include <sys/systm.h>
|
2000-12-06 07:09:08 +00:00
|
|
|
#include <sys/kernel.h>
|
2014-03-16 10:55:57 +00:00
|
|
|
#include <sys/capsicum.h>
|
2008-03-31 12:01:21 +00:00
|
|
|
#include <sys/fcntl.h>
|
2009-05-27 14:11:23 +00:00
|
|
|
#include <sys/jail.h>
|
2001-05-01 08:13:21 +00:00
|
|
|
#include <sys/lock.h>
|
2002-01-13 21:37:49 +00:00
|
|
|
#include <sys/mutex.h>
|
1994-05-24 10:09:53 +00:00
|
|
|
#include <sys/namei.h>
|
|
|
|
#include <sys/vnode.h>
|
|
|
|
#include <sys/mount.h>
|
|
|
|
#include <sys/filedesc.h>
|
|
|
|
#include <sys/proc.h>
|
2009-04-06 10:32:40 +00:00
|
|
|
#include <sys/sdt.h>
|
2005-02-07 18:44:55 +00:00
|
|
|
#include <sys/syscallsubr.h>
|
2005-04-03 23:50:20 +00:00
|
|
|
#include <sys/sysctl.h>
|
1994-05-24 10:09:53 +00:00
|
|
|
#ifdef KTRACE
|
|
|
|
#include <sys/ktrace.h>
|
|
|
|
#endif
|
|
|
|
|
2006-02-05 15:42:01 +00:00
|
|
|
#include <security/audit/audit.h>
|
2006-10-22 11:52:19 +00:00
|
|
|
#include <security/mac/mac_framework.h>
|
2006-02-05 15:42:01 +00:00
|
|
|
|
2002-03-20 04:09:59 +00:00
|
|
|
#include <vm/uma.h>
|
1997-12-27 02:56:39 +00:00
|
|
|
|
2006-02-01 09:34:32 +00:00
|
|
|
#define NAMEI_DIAGNOSTIC 1
|
2004-12-03 12:15:39 +00:00
|
|
|
#undef NAMEI_DIAGNOSTIC
|
|
|
|
|
2009-04-06 10:32:40 +00:00
|
|
|
SDT_PROVIDER_DECLARE(vfs);
|
2013-11-26 08:46:27 +00:00
|
|
|
SDT_PROBE_DEFINE3(vfs, namei, lookup, entry, "struct vnode *", "char *",
|
2009-04-06 10:32:40 +00:00
|
|
|
"unsigned long");
|
2013-11-26 08:46:27 +00:00
|
|
|
SDT_PROBE_DEFINE2(vfs, namei, lookup, return, "int", "struct vnode *");
|
2009-04-06 10:32:40 +00:00
|
|
|
|
2016-11-02 12:02:31 +00:00
|
|
|
/* Allocation zone for namei. */
|
2002-03-20 04:09:59 +00:00
|
|
|
uma_zone_t namei_zone;
|
2016-11-02 12:02:31 +00:00
|
|
|
|
|
|
|
/* Placeholder vnode for mp traversal. */
|
Below is slightly edited description of the LOR by Tor Egge:
--------------------------
[Deadlock] is caused by a lock order reversal in vfs_lookup(), where
[some] process is trying to lock a directory vnode, that is the parent
directory of covered vnode) while holding an exclusive vnode lock on
covering vnode.
A simplified scenario:
root fs var fs
/ A / (/var) D
/var B /log (/var/log) E
vfs lock C vfs lock F
Within each file system, the lock order is clear: C->A->B and F->D->E
When traversing across mounts, the system can choose between two lock orders,
but everything must then follow that lock order:
L1: C->A->B
|
+->F->D->E
L2: F->D->E
|
+->C->A->B
The lookup() process for namei("/var") mixes those two lock orders:
VOP_LOOKUP() obtains B while A is held
vfs_busy() obtains a shared lock on F while A and B are held (follows L1,
violates L2)
vput() releases lock on B
VOP_UNLOCK() releases lock on A
VFS_ROOT() obtains lock on D while shared lock on F is held
vfs_unbusy() releases shared lock on F
vn_lock() obtains lock on A while D is held (violates L1, follows L2)
dounmount() follows L1 (B is locked while F is drained).
Without unmount activity, vfs_busy() will always succeed without blocking
and the deadlock isn't triggered (the system behaves as if L2 is followed).
With unmount, you can get 4 processes in a deadlock:
p1: holds D, want A (in lookup())
p2: holds shared lock on F, want D (in VFS_ROOT())
p3: holds B, want drain lock on F (in dounmount())
p4: holds A, want B (in VOP_LOOKUP())
You can have more than one instance of p2.
The reversal was introduced in revision 1.81 of src/sys/kern/vfs_lookup.c and
MFCed to revision 1.80.2.1, probably to avoid a cascade of vnode locks when nfs
servers are dead (VFS_ROOT() just hangs) spreading to the root fs root vnode.
- Tor Egge
To fix the LOR, ups@ noted that when crossing the mount point, ni_dvp
is actually not used by the callers of namei. Thus, placeholder deadfs
vnode vp_crossmp is introduced that is filled into ni_dvp.
Idea by: ups
Reviewed by: tegge, ups, jeff, rwatson (mac interaction)
Tested by: Peter Holm
MFC after: 2 weeks
2007-01-22 11:25:22 +00:00
|
|
|
static struct vnode *vp_crossmp;
|
2000-12-06 07:09:08 +00:00
|
|
|
|
2016-12-02 18:03:15 +00:00
|
|
|
static int
|
|
|
|
crossmp_vop_islocked(struct vop_islocked_args *ap)
|
|
|
|
{
|
|
|
|
|
|
|
|
return (LK_SHARED);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
crossmp_vop_lock1(struct vop_lock1_args *ap)
|
|
|
|
{
|
|
|
|
struct vnode *vp;
|
2018-05-19 04:59:39 +00:00
|
|
|
struct lock *lk __unused;
|
|
|
|
const char *file __unused;
|
|
|
|
int flags, line __unused;
|
2016-12-02 18:03:15 +00:00
|
|
|
|
|
|
|
vp = ap->a_vp;
|
|
|
|
lk = vp->v_vnlock;
|
|
|
|
flags = ap->a_flags;
|
|
|
|
file = ap->a_file;
|
|
|
|
line = ap->a_line;
|
|
|
|
|
|
|
|
if ((flags & LK_SHARED) == 0)
|
|
|
|
panic("invalid lock request for crossmp");
|
|
|
|
|
|
|
|
WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER, file, line,
|
|
|
|
flags & LK_INTERLOCK ? &VI_MTX(vp)->lock_object : NULL);
|
|
|
|
WITNESS_LOCK(&lk->lock_object, 0, file, line);
|
|
|
|
if ((flags & LK_INTERLOCK) != 0)
|
|
|
|
VI_UNLOCK(vp);
|
|
|
|
LOCK_LOG_LOCK("SLOCK", &lk->lock_object, 0, 0, ap->a_file, line);
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
crossmp_vop_unlock(struct vop_unlock_args *ap)
|
|
|
|
{
|
|
|
|
struct vnode *vp;
|
2018-05-19 04:59:39 +00:00
|
|
|
struct lock *lk __unused;
|
2016-12-02 18:03:15 +00:00
|
|
|
|
|
|
|
vp = ap->a_vp;
|
|
|
|
lk = vp->v_vnlock;
|
|
|
|
|
|
|
|
WITNESS_UNLOCK(&lk->lock_object, 0, LOCK_FILE, LOCK_LINE);
|
|
|
|
LOCK_LOG_LOCK("SUNLOCK", &lk->lock_object, 0, 0, LOCK_FILE,
|
|
|
|
LOCK_LINE);
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct vop_vector crossmp_vnodeops = {
|
2017-01-22 19:36:02 +00:00
|
|
|
.vop_default = &default_vnodeops,
|
2016-12-02 18:03:15 +00:00
|
|
|
.vop_islocked = crossmp_vop_islocked,
|
|
|
|
.vop_lock1 = crossmp_vop_lock1,
|
|
|
|
.vop_unlock = crossmp_vop_unlock,
|
|
|
|
};
|
2019-12-16 00:06:22 +00:00
|
|
|
/*
|
|
|
|
* VFS_VOP_VECTOR_REGISTER(crossmp_vnodeops) is not used here since the vnode
|
|
|
|
* gets allocated early. See nameiinit for the direct call below.
|
|
|
|
*/
|
2016-12-02 18:03:15 +00:00
|
|
|
|
2016-11-02 12:43:15 +00:00
|
|
|
struct nameicap_tracker {
|
|
|
|
struct vnode *dp;
|
|
|
|
TAILQ_ENTRY(nameicap_tracker) nm_link;
|
|
|
|
};
|
|
|
|
|
|
|
|
/* Zone for cap mode tracker elements used for dotdot capability checks. */
|
|
|
|
static uma_zone_t nt_zone;
|
|
|
|
|
2000-12-06 07:09:08 +00:00
|
|
|
static void
|
|
|
|
nameiinit(void *dummy __unused)
|
|
|
|
{
|
2007-03-31 16:08:50 +00:00
|
|
|
|
2002-03-19 09:11:49 +00:00
|
|
|
namei_zone = uma_zcreate("NAMEI", MAXPATHLEN, NULL, NULL, NULL, NULL,
|
|
|
|
UMA_ALIGN_PTR, 0);
|
2016-11-02 12:43:15 +00:00
|
|
|
nt_zone = uma_zcreate("rentr", sizeof(struct nameicap_tracker),
|
2017-03-15 18:23:32 +00:00
|
|
|
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
|
2019-12-16 00:06:22 +00:00
|
|
|
vfs_vector_op_register(&crossmp_vnodeops);
|
2016-12-02 18:03:15 +00:00
|
|
|
getnewvnode("crossmp", NULL, &crossmp_vnodeops, &vp_crossmp);
|
2000-12-06 07:09:08 +00:00
|
|
|
}
|
2008-03-16 10:58:09 +00:00
|
|
|
SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nameiinit, NULL);
|
2000-12-06 07:09:08 +00:00
|
|
|
|
2016-12-12 11:12:04 +00:00
|
|
|
static int lookup_cap_dotdot = 1;
|
2016-11-02 12:43:15 +00:00
|
|
|
SYSCTL_INT(_vfs, OID_AUTO, lookup_cap_dotdot, CTLFLAG_RWTUN,
|
|
|
|
&lookup_cap_dotdot, 0,
|
|
|
|
"enables \"..\" components in path lookup in capability mode");
|
2016-12-12 11:12:04 +00:00
|
|
|
static int lookup_cap_dotdot_nonlocal = 1;
|
2016-11-02 12:43:15 +00:00
|
|
|
SYSCTL_INT(_vfs, OID_AUTO, lookup_cap_dotdot_nonlocal, CTLFLAG_RWTUN,
|
|
|
|
&lookup_cap_dotdot_nonlocal, 0,
|
|
|
|
"enables \"..\" components in path lookup in capability mode "
|
|
|
|
"on non-local mount");
|
|
|
|
|
|
|
|
static void
|
|
|
|
nameicap_tracker_add(struct nameidata *ndp, struct vnode *dp)
|
|
|
|
{
|
|
|
|
struct nameicap_tracker *nt;
|
|
|
|
|
|
|
|
if ((ndp->ni_lcf & NI_LCF_CAP_DOTDOT) == 0 || dp->v_type != VDIR)
|
|
|
|
return;
|
2018-11-11 00:04:36 +00:00
|
|
|
if ((ndp->ni_lcf & (NI_LCF_BENEATH_ABS | NI_LCF_BENEATH_LATCHED)) ==
|
|
|
|
NI_LCF_BENEATH_ABS) {
|
|
|
|
MPASS((ndp->ni_lcf & NI_LCF_LATCH) != 0);
|
|
|
|
if (dp != ndp->ni_beneath_latch)
|
|
|
|
return;
|
|
|
|
ndp->ni_lcf |= NI_LCF_BENEATH_LATCHED;
|
|
|
|
}
|
2016-11-02 12:43:15 +00:00
|
|
|
nt = uma_zalloc(nt_zone, M_WAITOK);
|
|
|
|
vhold(dp);
|
|
|
|
nt->dp = dp;
|
|
|
|
TAILQ_INSERT_TAIL(&ndp->ni_cap_tracker, nt, nm_link);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2018-11-11 00:04:36 +00:00
|
|
|
nameicap_cleanup(struct nameidata *ndp, bool clean_latch)
|
2016-11-02 12:43:15 +00:00
|
|
|
{
|
|
|
|
struct nameicap_tracker *nt, *nt1;
|
|
|
|
|
|
|
|
KASSERT(TAILQ_EMPTY(&ndp->ni_cap_tracker) ||
|
|
|
|
(ndp->ni_lcf & NI_LCF_CAP_DOTDOT) != 0, ("not strictrelative"));
|
|
|
|
TAILQ_FOREACH_SAFE(nt, &ndp->ni_cap_tracker, nm_link, nt1) {
|
|
|
|
TAILQ_REMOVE(&ndp->ni_cap_tracker, nt, nm_link);
|
|
|
|
vdrop(nt->dp);
|
|
|
|
uma_zfree(nt_zone, nt);
|
|
|
|
}
|
2018-11-29 19:13:10 +00:00
|
|
|
if (clean_latch && (ndp->ni_lcf & NI_LCF_LATCH) != 0) {
|
|
|
|
ndp->ni_lcf &= ~NI_LCF_LATCH;
|
2018-11-11 00:04:36 +00:00
|
|
|
vrele(ndp->ni_beneath_latch);
|
2018-11-29 19:13:10 +00:00
|
|
|
}
|
2016-11-02 12:43:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* For dotdot lookups in capability mode, only allow the component
|
|
|
|
* lookup to succeed if the resulting directory was already traversed
|
|
|
|
* during the operation. Also fail dotdot lookups for non-local
|
|
|
|
* filesystems, where external agents might assist local lookups to
|
|
|
|
* escape the compartment.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
nameicap_check_dotdot(struct nameidata *ndp, struct vnode *dp)
|
|
|
|
{
|
|
|
|
struct nameicap_tracker *nt;
|
|
|
|
struct mount *mp;
|
|
|
|
|
|
|
|
if ((ndp->ni_lcf & NI_LCF_CAP_DOTDOT) == 0 || dp == NULL ||
|
|
|
|
dp->v_type != VDIR)
|
|
|
|
return (0);
|
|
|
|
mp = dp->v_mount;
|
|
|
|
if (lookup_cap_dotdot_nonlocal == 0 && mp != NULL &&
|
|
|
|
(mp->mnt_flag & MNT_LOCAL) == 0)
|
|
|
|
return (ENOTCAPABLE);
|
|
|
|
TAILQ_FOREACH_REVERSE(nt, &ndp->ni_cap_tracker, nameicap_tracker_head,
|
|
|
|
nm_link) {
|
|
|
|
if (dp == nt->dp)
|
|
|
|
return (0);
|
|
|
|
}
|
2018-11-11 00:04:36 +00:00
|
|
|
if ((ndp->ni_lcf & NI_LCF_BENEATH_ABS) != 0) {
|
|
|
|
ndp->ni_lcf &= ~NI_LCF_BENEATH_LATCHED;
|
|
|
|
nameicap_cleanup(ndp, false);
|
|
|
|
return (0);
|
|
|
|
}
|
2016-11-02 12:43:15 +00:00
|
|
|
return (ENOTCAPABLE);
|
|
|
|
}
|
|
|
|
|
2015-07-05 22:56:41 +00:00
|
|
|
static void
|
|
|
|
namei_cleanup_cnp(struct componentname *cnp)
|
|
|
|
{
|
2016-11-02 12:02:31 +00:00
|
|
|
|
2015-07-05 22:56:41 +00:00
|
|
|
uma_zfree(namei_zone, cnp->cn_pnbuf);
|
|
|
|
#ifdef DIAGNOSTIC
|
|
|
|
cnp->cn_pnbuf = NULL;
|
|
|
|
cnp->cn_nameptr = NULL;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2015-07-09 15:06:58 +00:00
|
|
|
static int
|
2020-02-01 06:40:35 +00:00
|
|
|
namei_handle_root(struct nameidata *ndp, struct vnode **dpp, u_int n)
|
2015-07-09 15:06:58 +00:00
|
|
|
{
|
2015-07-09 17:17:26 +00:00
|
|
|
struct componentname *cnp;
|
2015-07-09 15:06:58 +00:00
|
|
|
|
2015-07-09 17:17:26 +00:00
|
|
|
cnp = &ndp->ni_cnd;
|
2018-11-11 00:04:36 +00:00
|
|
|
if ((ndp->ni_lcf & NI_LCF_STRICTRELATIVE) != 0) {
|
2015-07-09 15:06:58 +00:00
|
|
|
#ifdef KTRACE
|
|
|
|
if (KTRPOINT(curthread, KTR_CAPFAIL))
|
|
|
|
ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL);
|
|
|
|
#endif
|
|
|
|
return (ENOTCAPABLE);
|
|
|
|
}
|
2018-11-11 00:04:36 +00:00
|
|
|
if ((cnp->cn_flags & BENEATH) != 0) {
|
|
|
|
ndp->ni_lcf |= NI_LCF_BENEATH_ABS;
|
|
|
|
ndp->ni_lcf &= ~NI_LCF_BENEATH_LATCHED;
|
|
|
|
nameicap_cleanup(ndp, false);
|
|
|
|
}
|
2015-07-09 15:06:58 +00:00
|
|
|
while (*(cnp->cn_nameptr) == '/') {
|
|
|
|
cnp->cn_nameptr++;
|
|
|
|
ndp->ni_pathlen--;
|
|
|
|
}
|
|
|
|
*dpp = ndp->ni_rootdir;
|
2020-02-01 06:40:35 +00:00
|
|
|
vrefactn(*dpp, n);
|
2015-07-09 15:06:58 +00:00
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
2006-08-05 21:08:47 +00:00
|
|
|
* Convert a pathname into a pointer to a locked vnode.
|
1994-05-24 10:09:53 +00:00
|
|
|
*
|
|
|
|
* The FOLLOW flag is set when symbolic links are to be followed
|
|
|
|
* when they occur at the end of the name translation process.
|
|
|
|
* Symbolic links are always followed for all other pathname
|
|
|
|
* components other than the last.
|
|
|
|
*
|
|
|
|
* The segflg defines whether the name is to be copied from user
|
|
|
|
* space or kernel space.
|
|
|
|
*
|
|
|
|
* Overall outline of namei:
|
|
|
|
*
|
|
|
|
* copy in name
|
|
|
|
* get starting directory
|
|
|
|
* while (!done && !error) {
|
|
|
|
* call lookup to search path.
|
|
|
|
* if symbolic link, massage name in buffer and continue
|
|
|
|
* }
|
|
|
|
*/
|
|
|
|
int
|
2006-08-05 21:40:59 +00:00
|
|
|
namei(struct nameidata *ndp)
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
2006-08-05 21:40:59 +00:00
|
|
|
struct filedesc *fdp; /* pointer to file descriptor state */
|
|
|
|
char *cp; /* pointer into pathname argument */
|
|
|
|
struct vnode *dp; /* the directory we are searching */
|
1994-05-24 10:09:53 +00:00
|
|
|
struct iovec aiov; /* uio for reading symbolic links */
|
2016-11-02 12:02:31 +00:00
|
|
|
struct componentname *cnp;
|
2020-02-02 16:34:57 +00:00
|
|
|
struct file *dfp;
|
2016-11-02 12:02:31 +00:00
|
|
|
struct thread *td;
|
|
|
|
struct proc *p;
|
|
|
|
cap_rights_t rights;
|
2018-11-11 00:04:36 +00:00
|
|
|
struct filecaps dirfd_caps;
|
1994-05-24 10:09:53 +00:00
|
|
|
struct uio auio;
|
2015-07-09 16:32:58 +00:00
|
|
|
int error, linklen, startdir_used;
|
2004-08-04 18:39:07 +00:00
|
|
|
|
2016-11-02 12:02:31 +00:00
|
|
|
cnp = &ndp->ni_cnd;
|
|
|
|
td = cnp->cn_thread;
|
|
|
|
p = td->td_proc;
|
2002-02-27 19:15:29 +00:00
|
|
|
ndp->ni_cnd.cn_cred = ndp->ni_cnd.cn_thread->td_ucred;
|
2001-09-12 08:38:13 +00:00
|
|
|
KASSERT(cnp->cn_cred && p, ("namei: bad cred/proc"));
|
1999-01-08 17:31:30 +00:00
|
|
|
KASSERT((cnp->cn_nameiop & (~OPMASK)) == 0,
|
1999-01-10 01:58:29 +00:00
|
|
|
("namei: nameiop contaminated with flags"));
|
1999-01-08 17:31:30 +00:00
|
|
|
KASSERT((cnp->cn_flags & OPMASK) == 0,
|
1999-01-10 01:58:29 +00:00
|
|
|
("namei: flags contaminated with nameiops"));
|
2015-07-09 16:32:58 +00:00
|
|
|
MPASS(ndp->ni_startdir == NULL || ndp->ni_startdir->v_type == VDIR ||
|
|
|
|
ndp->ni_startdir->v_type == VBAD);
|
2001-09-12 08:38:13 +00:00
|
|
|
fdp = p->p_fd;
|
2016-11-02 12:43:15 +00:00
|
|
|
TAILQ_INIT(&ndp->ni_cap_tracker);
|
|
|
|
ndp->ni_lcf = 0;
|
1994-05-24 10:09:53 +00:00
|
|
|
|
2009-05-29 10:02:44 +00:00
|
|
|
/* We will set this ourselves if we need it. */
|
|
|
|
cnp->cn_flags &= ~TRAILINGSLASH;
|
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
|
|
|
* Get a buffer for the name to be translated, and copy the
|
|
|
|
* name into the buffer.
|
|
|
|
*/
|
|
|
|
if ((cnp->cn_flags & HASBUF) == 0)
|
2003-02-19 05:47:46 +00:00
|
|
|
cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
|
1994-05-24 10:09:53 +00:00
|
|
|
if (ndp->ni_segflg == UIO_SYSSPACE)
|
2016-11-02 12:10:39 +00:00
|
|
|
error = copystr(ndp->ni_dirp, cnp->cn_pnbuf, MAXPATHLEN,
|
|
|
|
&ndp->ni_pathlen);
|
1994-05-24 10:09:53 +00:00
|
|
|
else
|
2016-11-02 12:10:39 +00:00
|
|
|
error = copyinstr(ndp->ni_dirp, cnp->cn_pnbuf, MAXPATHLEN,
|
|
|
|
&ndp->ni_pathlen);
|
1996-12-01 16:05:44 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Don't allow empty pathnames.
|
|
|
|
*/
|
2015-07-09 17:17:26 +00:00
|
|
|
if (error == 0 && *cnp->cn_pnbuf == '\0')
|
1996-12-01 16:05:44 +00:00
|
|
|
error = ENOENT;
|
|
|
|
|
2011-08-13 09:21:16 +00:00
|
|
|
#ifdef CAPABILITY_MODE
|
|
|
|
/*
|
2016-11-02 12:43:15 +00:00
|
|
|
* In capability mode, lookups must be restricted to happen in
|
|
|
|
* the subtree with the root specified by the file descriptor:
|
|
|
|
* - The root must be real file descriptor, not the pseudo-descriptor
|
|
|
|
* AT_FDCWD.
|
|
|
|
* - The passed path must be relative and not absolute.
|
|
|
|
* - If lookup_cap_dotdot is disabled, path must not contain the
|
|
|
|
* '..' components.
|
|
|
|
* - If lookup_cap_dotdot is enabled, we verify that all '..'
|
|
|
|
* components lookups result in the directories which were
|
|
|
|
* previously walked by us, which prevents an escape from
|
|
|
|
* the relative root.
|
2011-08-13 09:21:16 +00:00
|
|
|
*/
|
2013-08-05 19:42:03 +00:00
|
|
|
if (error == 0 && IN_CAPABILITY_MODE(td) &&
|
|
|
|
(cnp->cn_flags & NOCAPCHECK) == 0) {
|
2016-11-02 12:43:15 +00:00
|
|
|
ndp->ni_lcf |= NI_LCF_STRICTRELATIVE;
|
2011-10-18 07:28:58 +00:00
|
|
|
if (ndp->ni_dirfd == AT_FDCWD) {
|
|
|
|
#ifdef KTRACE
|
|
|
|
if (KTRPOINT(td, KTR_CAPFAIL))
|
2013-09-18 19:26:08 +00:00
|
|
|
ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL);
|
2011-10-18 07:28:58 +00:00
|
|
|
#endif
|
2011-08-13 09:21:16 +00:00
|
|
|
error = ECAPMODE;
|
2011-10-18 07:28:58 +00:00
|
|
|
}
|
2011-08-13 09:21:16 +00:00
|
|
|
}
|
|
|
|
#endif
|
2015-07-09 17:17:26 +00:00
|
|
|
if (error != 0) {
|
2014-08-24 12:51:12 +00:00
|
|
|
namei_cleanup_cnp(cnp);
|
1994-05-24 10:09:53 +00:00
|
|
|
ndp->ni_vp = NULL;
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
ndp->ni_loopcnt = 0;
|
|
|
|
#ifdef KTRACE
|
2002-06-07 05:37:18 +00:00
|
|
|
if (KTRPOINT(td, KTR_NAMEI)) {
|
|
|
|
KASSERT(cnp->cn_thread == curthread,
|
|
|
|
("namei not using curthread"));
|
|
|
|
ktrnamei(cnp->cn_pnbuf);
|
|
|
|
}
|
1994-05-24 10:09:53 +00:00
|
|
|
#endif
|
|
|
|
/*
|
|
|
|
* Get starting point for the translation.
|
|
|
|
*/
|
Replace custom file descriptor array sleep lock constructed using a mutex
and flags with an sxlock. This leads to a significant and measurable
performance improvement as a result of access to shared locking for
frequent lookup operations, reduced general overhead, and reduced overhead
in the event of contention. All of these are imported for threaded
applications where simultaneous access to a shared file descriptor array
occurs frequently. Kris has reported 2x-4x transaction rate improvements
on 8-core MySQL benchmarks; smaller improvements can be expected for many
workloads as a result of reduced overhead.
- Generally eliminate the distinction between "fast" and regular
acquisisition of the filedesc lock; the plan is that they will now all
be fast. Change all locking instances to either shared or exclusive
locks.
- Correct a bug (pointed out by kib) in fdfree() where previously msleep()
was called without the mutex held; sx_sleep() is now always called with
the sxlock held exclusively.
- Universally hold the struct file lock over changes to struct file,
rather than the filedesc lock or no lock. Always update the f_ops
field last. A further memory barrier is required here in the future
(discussed with jhb).
- Improve locking and reference management in linux_at(), which fails to
properly acquire vnode references before using vnode pointers. Annotate
improper use of vn_fullpath(), which will be replaced at a future date.
In fcntl(), we conservatively acquire an exclusive lock, even though in
some cases a shared lock may be sufficient, which should be revisited.
The dropping of the filedesc lock in fdgrowtable() is no longer required
as the sxlock can be held over the sleep operation; we should consider
removing that (pointed out by attilio).
Tested by: kris
Discussed with: jhb, kris, attilio, jeff
2007-04-04 09:11:34 +00:00
|
|
|
FILEDESC_SLOCK(fdp);
|
2020-02-01 06:40:35 +00:00
|
|
|
/*
|
|
|
|
* The reference on ni_rootdir is acquired in the block below to avoid
|
|
|
|
* back-to-back atomics for absolute lookups.
|
|
|
|
*/
|
1998-02-15 04:17:09 +00:00
|
|
|
ndp->ni_rootdir = fdp->fd_rdir;
|
1999-09-25 14:14:21 +00:00
|
|
|
ndp->ni_topdir = fdp->fd_jdir;
|
1998-02-15 04:17:09 +00:00
|
|
|
|
2015-07-09 16:32:58 +00:00
|
|
|
startdir_used = 0;
|
Update ZFS from version 6 to 13 and bring some FreeBSD-specific changes.
This bring huge amount of changes, I'll enumerate only user-visible changes:
- Delegated Administration
Allows regular users to perform ZFS operations, like file system
creation, snapshot creation, etc.
- L2ARC
Level 2 cache for ZFS - allows to use additional disks for cache.
Huge performance improvements mostly for random read of mostly
static content.
- slog
Allow to use additional disks for ZFS Intent Log to speed up
operations like fsync(2).
- vfs.zfs.super_owner
Allows regular users to perform privileged operations on files stored
on ZFS file systems owned by him. Very careful with this one.
- chflags(2)
Not all the flags are supported. This still needs work.
- ZFSBoot
Support to boot off of ZFS pool. Not finished, AFAIK.
Submitted by: dfr
- Snapshot properties
- New failure modes
Before if write requested failed, system paniced. Now one
can select from one of three failure modes:
- panic - panic on write error
- wait - wait for disk to reappear
- continue - serve read requests if possible, block write requests
- Refquota, refreservation properties
Just quota and reservation properties, but don't count space consumed
by children file systems, clones and snapshots.
- Sparse volumes
ZVOLs that don't reserve space in the pool.
- External attributes
Compatible with extattr(2).
- NFSv4-ACLs
Not sure about the status, might not be complete yet.
Submitted by: trasz
- Creation-time properties
- Regression tests for zpool(8) command.
Obtained from: OpenSolaris
2008-11-17 20:49:29 +00:00
|
|
|
dp = NULL;
|
2015-07-09 15:06:58 +00:00
|
|
|
cnp->cn_nameptr = cnp->cn_pnbuf;
|
|
|
|
if (cnp->cn_pnbuf[0] == '/') {
|
2019-02-08 04:18:17 +00:00
|
|
|
ndp->ni_resflags |= NIRES_ABS;
|
2020-02-01 06:40:35 +00:00
|
|
|
error = namei_handle_root(ndp, &dp, 2);
|
|
|
|
if (error != 0) {
|
|
|
|
/*
|
|
|
|
* Simplify error handling, we should almost never be
|
|
|
|
* here.
|
|
|
|
*/
|
|
|
|
vrefact(ndp->ni_rootdir);
|
|
|
|
}
|
2015-07-09 15:06:58 +00:00
|
|
|
} else {
|
Update ZFS from version 6 to 13 and bring some FreeBSD-specific changes.
This bring huge amount of changes, I'll enumerate only user-visible changes:
- Delegated Administration
Allows regular users to perform ZFS operations, like file system
creation, snapshot creation, etc.
- L2ARC
Level 2 cache for ZFS - allows to use additional disks for cache.
Huge performance improvements mostly for random read of mostly
static content.
- slog
Allow to use additional disks for ZFS Intent Log to speed up
operations like fsync(2).
- vfs.zfs.super_owner
Allows regular users to perform privileged operations on files stored
on ZFS file systems owned by him. Very careful with this one.
- chflags(2)
Not all the flags are supported. This still needs work.
- ZFSBoot
Support to boot off of ZFS pool. Not finished, AFAIK.
Submitted by: dfr
- Snapshot properties
- New failure modes
Before if write requested failed, system paniced. Now one
can select from one of three failure modes:
- panic - panic on write error
- wait - wait for disk to reappear
- continue - serve read requests if possible, block write requests
- Refquota, refreservation properties
Just quota and reservation properties, but don't count space consumed
by children file systems, clones and snapshots.
- Sparse volumes
ZVOLs that don't reserve space in the pool.
- External attributes
Compatible with extattr(2).
- NFSv4-ACLs
Not sure about the status, might not be complete yet.
Submitted by: trasz
- Creation-time properties
- Regression tests for zpool(8) command.
Obtained from: OpenSolaris
2008-11-17 20:49:29 +00:00
|
|
|
if (ndp->ni_startdir != NULL) {
|
2020-02-01 06:40:35 +00:00
|
|
|
vrefact(ndp->ni_rootdir);
|
Update ZFS from version 6 to 13 and bring some FreeBSD-specific changes.
This bring huge amount of changes, I'll enumerate only user-visible changes:
- Delegated Administration
Allows regular users to perform ZFS operations, like file system
creation, snapshot creation, etc.
- L2ARC
Level 2 cache for ZFS - allows to use additional disks for cache.
Huge performance improvements mostly for random read of mostly
static content.
- slog
Allow to use additional disks for ZFS Intent Log to speed up
operations like fsync(2).
- vfs.zfs.super_owner
Allows regular users to perform privileged operations on files stored
on ZFS file systems owned by him. Very careful with this one.
- chflags(2)
Not all the flags are supported. This still needs work.
- ZFSBoot
Support to boot off of ZFS pool. Not finished, AFAIK.
Submitted by: dfr
- Snapshot properties
- New failure modes
Before if write requested failed, system paniced. Now one
can select from one of three failure modes:
- panic - panic on write error
- wait - wait for disk to reappear
- continue - serve read requests if possible, block write requests
- Refquota, refreservation properties
Just quota and reservation properties, but don't count space consumed
by children file systems, clones and snapshots.
- Sparse volumes
ZVOLs that don't reserve space in the pool.
- External attributes
Compatible with extattr(2).
- NFSv4-ACLs
Not sure about the status, might not be complete yet.
Submitted by: trasz
- Creation-time properties
- Regression tests for zpool(8) command.
Obtained from: OpenSolaris
2008-11-17 20:49:29 +00:00
|
|
|
dp = ndp->ni_startdir;
|
2015-07-09 16:32:58 +00:00
|
|
|
startdir_used = 1;
|
|
|
|
} else if (ndp->ni_dirfd == AT_FDCWD) {
|
|
|
|
dp = fdp->fd_cdir;
|
2020-02-01 06:40:35 +00:00
|
|
|
if (dp == ndp->ni_rootdir) {
|
|
|
|
vrefactn(dp, 2);
|
|
|
|
} else {
|
|
|
|
vrefact(ndp->ni_rootdir);
|
|
|
|
vrefact(dp);
|
|
|
|
}
|
2015-07-09 16:32:58 +00:00
|
|
|
} else {
|
2020-02-01 06:40:35 +00:00
|
|
|
vrefact(ndp->ni_rootdir);
|
Change the cap_rights_t type from uint64_t to a structure that we can extend
in the future in a backward compatible (API and ABI) way.
The cap_rights_t represents capability rights. We used to use one bit to
represent one right, but we are running out of spare bits. Currently the new
structure provides place for 114 rights (so 50 more than the previous
cap_rights_t), but it is possible to grow the structure to hold at least 285
rights, although we can make it even larger if 285 rights won't be enough.
The structure definition looks like this:
struct cap_rights {
uint64_t cr_rights[CAP_RIGHTS_VERSION + 2];
};
The initial CAP_RIGHTS_VERSION is 0.
The top two bits in the first element of the cr_rights[] array contain total
number of elements in the array - 2. This means if those two bits are equal to
0, we have 2 array elements.
The top two bits in all remaining array elements should be 0.
The next five bits in all array elements contain array index. Only one bit is
used and bit position in this five-bits range defines array index. This means
there can be at most five array elements in the future.
To define new right the CAPRIGHT() macro must be used. The macro takes two
arguments - an array index and a bit to set, eg.
#define CAP_PDKILL CAPRIGHT(1, 0x0000000000000800ULL)
We still support aliases that combine few rights, but the rights have to belong
to the same array element, eg:
#define CAP_LOOKUP CAPRIGHT(0, 0x0000000000000400ULL)
#define CAP_FCHMOD CAPRIGHT(0, 0x0000000000002000ULL)
#define CAP_FCHMODAT (CAP_FCHMOD | CAP_LOOKUP)
There is new API to manage the new cap_rights_t structure:
cap_rights_t *cap_rights_init(cap_rights_t *rights, ...);
void cap_rights_set(cap_rights_t *rights, ...);
void cap_rights_clear(cap_rights_t *rights, ...);
bool cap_rights_is_set(const cap_rights_t *rights, ...);
bool cap_rights_is_valid(const cap_rights_t *rights);
void cap_rights_merge(cap_rights_t *dst, const cap_rights_t *src);
void cap_rights_remove(cap_rights_t *dst, const cap_rights_t *src);
bool cap_rights_contains(const cap_rights_t *big, const cap_rights_t *little);
Capability rights to the cap_rights_init(), cap_rights_set(),
cap_rights_clear() and cap_rights_is_set() functions are provided by
separating them with commas, eg:
cap_rights_t rights;
cap_rights_init(&rights, CAP_READ, CAP_WRITE, CAP_FSTAT);
There is no need to terminate the list of rights, as those functions are
actually macros that take care of the termination, eg:
#define cap_rights_set(rights, ...) \
__cap_rights_set((rights), __VA_ARGS__, 0ULL)
void __cap_rights_set(cap_rights_t *rights, ...);
Thanks to using one bit as an array index we can assert in those functions that
there are no two rights belonging to different array elements provided
together. For example this is illegal and will be detected, because CAP_LOOKUP
belongs to element 0 and CAP_PDKILL to element 1:
cap_rights_init(&rights, CAP_LOOKUP | CAP_PDKILL);
Providing several rights that belongs to the same array's element this way is
correct, but is not advised. It should only be used for aliases definition.
This commit also breaks compatibility with some existing Capsicum system calls,
but I see no other way to do that. This should be fine as Capsicum is still
experimental and this change is not going to 9.x.
Sponsored by: The FreeBSD Foundation
2013-09-05 00:09:56 +00:00
|
|
|
rights = ndp->ni_rightsneeded;
|
2020-02-15 01:28:42 +00:00
|
|
|
cap_rights_set_one(&rights, CAP_LOOKUP);
|
Change the cap_rights_t type from uint64_t to a structure that we can extend
in the future in a backward compatible (API and ABI) way.
The cap_rights_t represents capability rights. We used to use one bit to
represent one right, but we are running out of spare bits. Currently the new
structure provides place for 114 rights (so 50 more than the previous
cap_rights_t), but it is possible to grow the structure to hold at least 285
rights, although we can make it even larger if 285 rights won't be enough.
The structure definition looks like this:
struct cap_rights {
uint64_t cr_rights[CAP_RIGHTS_VERSION + 2];
};
The initial CAP_RIGHTS_VERSION is 0.
The top two bits in the first element of the cr_rights[] array contain total
number of elements in the array - 2. This means if those two bits are equal to
0, we have 2 array elements.
The top two bits in all remaining array elements should be 0.
The next five bits in all array elements contain array index. Only one bit is
used and bit position in this five-bits range defines array index. This means
there can be at most five array elements in the future.
To define new right the CAPRIGHT() macro must be used. The macro takes two
arguments - an array index and a bit to set, eg.
#define CAP_PDKILL CAPRIGHT(1, 0x0000000000000800ULL)
We still support aliases that combine few rights, but the rights have to belong
to the same array element, eg:
#define CAP_LOOKUP CAPRIGHT(0, 0x0000000000000400ULL)
#define CAP_FCHMOD CAPRIGHT(0, 0x0000000000002000ULL)
#define CAP_FCHMODAT (CAP_FCHMOD | CAP_LOOKUP)
There is new API to manage the new cap_rights_t structure:
cap_rights_t *cap_rights_init(cap_rights_t *rights, ...);
void cap_rights_set(cap_rights_t *rights, ...);
void cap_rights_clear(cap_rights_t *rights, ...);
bool cap_rights_is_set(const cap_rights_t *rights, ...);
bool cap_rights_is_valid(const cap_rights_t *rights);
void cap_rights_merge(cap_rights_t *dst, const cap_rights_t *src);
void cap_rights_remove(cap_rights_t *dst, const cap_rights_t *src);
bool cap_rights_contains(const cap_rights_t *big, const cap_rights_t *little);
Capability rights to the cap_rights_init(), cap_rights_set(),
cap_rights_clear() and cap_rights_is_set() functions are provided by
separating them with commas, eg:
cap_rights_t rights;
cap_rights_init(&rights, CAP_READ, CAP_WRITE, CAP_FSTAT);
There is no need to terminate the list of rights, as those functions are
actually macros that take care of the termination, eg:
#define cap_rights_set(rights, ...) \
__cap_rights_set((rights), __VA_ARGS__, 0ULL)
void __cap_rights_set(cap_rights_t *rights, ...);
Thanks to using one bit as an array index we can assert in those functions that
there are no two rights belonging to different array elements provided
together. For example this is illegal and will be detected, because CAP_LOOKUP
belongs to element 0 and CAP_PDKILL to element 1:
cap_rights_init(&rights, CAP_LOOKUP | CAP_PDKILL);
Providing several rights that belongs to the same array's element this way is
correct, but is not advised. It should only be used for aliases definition.
This commit also breaks compatibility with some existing Capsicum system calls,
but I see no other way to do that. This should be fine as Capsicum is still
experimental and this change is not going to 9.x.
Sponsored by: The FreeBSD Foundation
2013-09-05 00:09:56 +00:00
|
|
|
|
2009-07-28 21:39:58 +00:00
|
|
|
if (cnp->cn_flags & AUDITVNODE1)
|
|
|
|
AUDIT_ARG_ATFD1(ndp->ni_dirfd);
|
|
|
|
if (cnp->cn_flags & AUDITVNODE2)
|
|
|
|
AUDIT_ARG_ATFD2(ndp->ni_dirfd);
|
2020-02-02 16:34:57 +00:00
|
|
|
/*
|
|
|
|
* Effectively inlined fgetvp_rights, because we need to
|
|
|
|
* inspect the file as well as grabbing the vnode.
|
|
|
|
*/
|
|
|
|
error = fget_cap_locked(fdp, ndp->ni_dirfd, &rights,
|
|
|
|
&dfp, &ndp->ni_filecaps);
|
2020-02-03 18:59:07 +00:00
|
|
|
if (error != 0) {
|
|
|
|
/*
|
|
|
|
* Preserve the error; it should either be EBADF
|
|
|
|
* or capability-related, both of which can be
|
|
|
|
* safely returned to the caller.
|
|
|
|
*/
|
|
|
|
} else if (dfp->f_ops == &badfileops) {
|
|
|
|
error = EBADF;
|
|
|
|
} else if (dfp->f_vnode == NULL) {
|
2015-08-12 16:17:00 +00:00
|
|
|
error = ENOTDIR;
|
2020-02-02 16:34:57 +00:00
|
|
|
} else {
|
|
|
|
dp = dfp->f_vnode;
|
|
|
|
vrefact(dp);
|
|
|
|
|
|
|
|
if ((dfp->f_flag & FSEARCH) != 0)
|
|
|
|
cnp->cn_flags |= NOEXECCHECK;
|
|
|
|
}
|
2011-08-13 09:21:16 +00:00
|
|
|
#ifdef CAPABILITIES
|
|
|
|
/*
|
Merge Capsicum overhaul:
- Capability is no longer separate descriptor type. Now every descriptor
has set of its own capability rights.
- The cap_new(2) system call is left, but it is no longer documented and
should not be used in new code.
- The new syscall cap_rights_limit(2) should be used instead of
cap_new(2), which limits capability rights of the given descriptor
without creating a new one.
- The cap_getrights(2) syscall is renamed to cap_rights_get(2).
- If CAP_IOCTL capability right is present we can further reduce allowed
ioctls list with the new cap_ioctls_limit(2) syscall. List of allowed
ioctls can be retrived with cap_ioctls_get(2) syscall.
- If CAP_FCNTL capability right is present we can further reduce fcntls
that can be used with the new cap_fcntls_limit(2) syscall and retrive
them with cap_fcntls_get(2).
- To support ioctl and fcntl white-listing the filedesc structure was
heavly modified.
- The audit subsystem, kdump and procstat tools were updated to
recognize new syscalls.
- Capability rights were revised and eventhough I tried hard to provide
backward API and ABI compatibility there are some incompatible changes
that are described in detail below:
CAP_CREATE old behaviour:
- Allow for openat(2)+O_CREAT.
- Allow for linkat(2).
- Allow for symlinkat(2).
CAP_CREATE new behaviour:
- Allow for openat(2)+O_CREAT.
Added CAP_LINKAT:
- Allow for linkat(2). ABI: Reuses CAP_RMDIR bit.
- Allow to be target for renameat(2).
Added CAP_SYMLINKAT:
- Allow for symlinkat(2).
Removed CAP_DELETE. Old behaviour:
- Allow for unlinkat(2) when removing non-directory object.
- Allow to be source for renameat(2).
Removed CAP_RMDIR. Old behaviour:
- Allow for unlinkat(2) when removing directory.
Added CAP_RENAMEAT:
- Required for source directory for the renameat(2) syscall.
Added CAP_UNLINKAT (effectively it replaces CAP_DELETE and CAP_RMDIR):
- Allow for unlinkat(2) on any object.
- Required if target of renameat(2) exists and will be removed by this
call.
Removed CAP_MAPEXEC.
CAP_MMAP old behaviour:
- Allow for mmap(2) with any combination of PROT_NONE, PROT_READ and
PROT_WRITE.
CAP_MMAP new behaviour:
- Allow for mmap(2)+PROT_NONE.
Added CAP_MMAP_R:
- Allow for mmap(PROT_READ).
Added CAP_MMAP_W:
- Allow for mmap(PROT_WRITE).
Added CAP_MMAP_X:
- Allow for mmap(PROT_EXEC).
Added CAP_MMAP_RW:
- Allow for mmap(PROT_READ | PROT_WRITE).
Added CAP_MMAP_RX:
- Allow for mmap(PROT_READ | PROT_EXEC).
Added CAP_MMAP_WX:
- Allow for mmap(PROT_WRITE | PROT_EXEC).
Added CAP_MMAP_RWX:
- Allow for mmap(PROT_READ | PROT_WRITE | PROT_EXEC).
Renamed CAP_MKDIR to CAP_MKDIRAT.
Renamed CAP_MKFIFO to CAP_MKFIFOAT.
Renamed CAP_MKNODE to CAP_MKNODEAT.
CAP_READ old behaviour:
- Allow pread(2).
- Disallow read(2), readv(2) (if there is no CAP_SEEK).
CAP_READ new behaviour:
- Allow read(2), readv(2).
- Disallow pread(2) (CAP_SEEK was also required).
CAP_WRITE old behaviour:
- Allow pwrite(2).
- Disallow write(2), writev(2) (if there is no CAP_SEEK).
CAP_WRITE new behaviour:
- Allow write(2), writev(2).
- Disallow pwrite(2) (CAP_SEEK was also required).
Added convinient defines:
#define CAP_PREAD (CAP_SEEK | CAP_READ)
#define CAP_PWRITE (CAP_SEEK | CAP_WRITE)
#define CAP_MMAP_R (CAP_MMAP | CAP_SEEK | CAP_READ)
#define CAP_MMAP_W (CAP_MMAP | CAP_SEEK | CAP_WRITE)
#define CAP_MMAP_X (CAP_MMAP | CAP_SEEK | 0x0000000000000008ULL)
#define CAP_MMAP_RW (CAP_MMAP_R | CAP_MMAP_W)
#define CAP_MMAP_RX (CAP_MMAP_R | CAP_MMAP_X)
#define CAP_MMAP_WX (CAP_MMAP_W | CAP_MMAP_X)
#define CAP_MMAP_RWX (CAP_MMAP_R | CAP_MMAP_W | CAP_MMAP_X)
#define CAP_RECV CAP_READ
#define CAP_SEND CAP_WRITE
#define CAP_SOCK_CLIENT \
(CAP_CONNECT | CAP_GETPEERNAME | CAP_GETSOCKNAME | CAP_GETSOCKOPT | \
CAP_PEELOFF | CAP_RECV | CAP_SEND | CAP_SETSOCKOPT | CAP_SHUTDOWN)
#define CAP_SOCK_SERVER \
(CAP_ACCEPT | CAP_BIND | CAP_GETPEERNAME | CAP_GETSOCKNAME | \
CAP_GETSOCKOPT | CAP_LISTEN | CAP_PEELOFF | CAP_RECV | CAP_SEND | \
CAP_SETSOCKOPT | CAP_SHUTDOWN)
Added defines for backward API compatibility:
#define CAP_MAPEXEC CAP_MMAP_X
#define CAP_DELETE CAP_UNLINKAT
#define CAP_MKDIR CAP_MKDIRAT
#define CAP_RMDIR CAP_UNLINKAT
#define CAP_MKFIFO CAP_MKFIFOAT
#define CAP_MKNOD CAP_MKNODAT
#define CAP_SOCK_ALL (CAP_SOCK_CLIENT | CAP_SOCK_SERVER)
Sponsored by: The FreeBSD Foundation
Reviewed by: Christoph Mallon <christoph.mallon@gmx.de>
Many aspects discussed with: rwatson, benl, jonathan
ABI compatibility discussed with: kib
2013-03-02 00:53:12 +00:00
|
|
|
* If file descriptor doesn't have all rights,
|
|
|
|
* all lookups relative to it must also be
|
2011-08-13 09:21:16 +00:00
|
|
|
* strictly relative.
|
|
|
|
*/
|
Change the cap_rights_t type from uint64_t to a structure that we can extend
in the future in a backward compatible (API and ABI) way.
The cap_rights_t represents capability rights. We used to use one bit to
represent one right, but we are running out of spare bits. Currently the new
structure provides place for 114 rights (so 50 more than the previous
cap_rights_t), but it is possible to grow the structure to hold at least 285
rights, although we can make it even larger if 285 rights won't be enough.
The structure definition looks like this:
struct cap_rights {
uint64_t cr_rights[CAP_RIGHTS_VERSION + 2];
};
The initial CAP_RIGHTS_VERSION is 0.
The top two bits in the first element of the cr_rights[] array contain total
number of elements in the array - 2. This means if those two bits are equal to
0, we have 2 array elements.
The top two bits in all remaining array elements should be 0.
The next five bits in all array elements contain array index. Only one bit is
used and bit position in this five-bits range defines array index. This means
there can be at most five array elements in the future.
To define new right the CAPRIGHT() macro must be used. The macro takes two
arguments - an array index and a bit to set, eg.
#define CAP_PDKILL CAPRIGHT(1, 0x0000000000000800ULL)
We still support aliases that combine few rights, but the rights have to belong
to the same array element, eg:
#define CAP_LOOKUP CAPRIGHT(0, 0x0000000000000400ULL)
#define CAP_FCHMOD CAPRIGHT(0, 0x0000000000002000ULL)
#define CAP_FCHMODAT (CAP_FCHMOD | CAP_LOOKUP)
There is new API to manage the new cap_rights_t structure:
cap_rights_t *cap_rights_init(cap_rights_t *rights, ...);
void cap_rights_set(cap_rights_t *rights, ...);
void cap_rights_clear(cap_rights_t *rights, ...);
bool cap_rights_is_set(const cap_rights_t *rights, ...);
bool cap_rights_is_valid(const cap_rights_t *rights);
void cap_rights_merge(cap_rights_t *dst, const cap_rights_t *src);
void cap_rights_remove(cap_rights_t *dst, const cap_rights_t *src);
bool cap_rights_contains(const cap_rights_t *big, const cap_rights_t *little);
Capability rights to the cap_rights_init(), cap_rights_set(),
cap_rights_clear() and cap_rights_is_set() functions are provided by
separating them with commas, eg:
cap_rights_t rights;
cap_rights_init(&rights, CAP_READ, CAP_WRITE, CAP_FSTAT);
There is no need to terminate the list of rights, as those functions are
actually macros that take care of the termination, eg:
#define cap_rights_set(rights, ...) \
__cap_rights_set((rights), __VA_ARGS__, 0ULL)
void __cap_rights_set(cap_rights_t *rights, ...);
Thanks to using one bit as an array index we can assert in those functions that
there are no two rights belonging to different array elements provided
together. For example this is illegal and will be detected, because CAP_LOOKUP
belongs to element 0 and CAP_PDKILL to element 1:
cap_rights_init(&rights, CAP_LOOKUP | CAP_PDKILL);
Providing several rights that belongs to the same array's element this way is
correct, but is not advised. It should only be used for aliases definition.
This commit also breaks compatibility with some existing Capsicum system calls,
but I see no other way to do that. This should be fine as Capsicum is still
experimental and this change is not going to 9.x.
Sponsored by: The FreeBSD Foundation
2013-09-05 00:09:56 +00:00
|
|
|
CAP_ALL(&rights);
|
|
|
|
if (!cap_rights_contains(&ndp->ni_filecaps.fc_rights,
|
|
|
|
&rights) ||
|
Merge Capsicum overhaul:
- Capability is no longer separate descriptor type. Now every descriptor
has set of its own capability rights.
- The cap_new(2) system call is left, but it is no longer documented and
should not be used in new code.
- The new syscall cap_rights_limit(2) should be used instead of
cap_new(2), which limits capability rights of the given descriptor
without creating a new one.
- The cap_getrights(2) syscall is renamed to cap_rights_get(2).
- If CAP_IOCTL capability right is present we can further reduce allowed
ioctls list with the new cap_ioctls_limit(2) syscall. List of allowed
ioctls can be retrived with cap_ioctls_get(2) syscall.
- If CAP_FCNTL capability right is present we can further reduce fcntls
that can be used with the new cap_fcntls_limit(2) syscall and retrive
them with cap_fcntls_get(2).
- To support ioctl and fcntl white-listing the filedesc structure was
heavly modified.
- The audit subsystem, kdump and procstat tools were updated to
recognize new syscalls.
- Capability rights were revised and eventhough I tried hard to provide
backward API and ABI compatibility there are some incompatible changes
that are described in detail below:
CAP_CREATE old behaviour:
- Allow for openat(2)+O_CREAT.
- Allow for linkat(2).
- Allow for symlinkat(2).
CAP_CREATE new behaviour:
- Allow for openat(2)+O_CREAT.
Added CAP_LINKAT:
- Allow for linkat(2). ABI: Reuses CAP_RMDIR bit.
- Allow to be target for renameat(2).
Added CAP_SYMLINKAT:
- Allow for symlinkat(2).
Removed CAP_DELETE. Old behaviour:
- Allow for unlinkat(2) when removing non-directory object.
- Allow to be source for renameat(2).
Removed CAP_RMDIR. Old behaviour:
- Allow for unlinkat(2) when removing directory.
Added CAP_RENAMEAT:
- Required for source directory for the renameat(2) syscall.
Added CAP_UNLINKAT (effectively it replaces CAP_DELETE and CAP_RMDIR):
- Allow for unlinkat(2) on any object.
- Required if target of renameat(2) exists and will be removed by this
call.
Removed CAP_MAPEXEC.
CAP_MMAP old behaviour:
- Allow for mmap(2) with any combination of PROT_NONE, PROT_READ and
PROT_WRITE.
CAP_MMAP new behaviour:
- Allow for mmap(2)+PROT_NONE.
Added CAP_MMAP_R:
- Allow for mmap(PROT_READ).
Added CAP_MMAP_W:
- Allow for mmap(PROT_WRITE).
Added CAP_MMAP_X:
- Allow for mmap(PROT_EXEC).
Added CAP_MMAP_RW:
- Allow for mmap(PROT_READ | PROT_WRITE).
Added CAP_MMAP_RX:
- Allow for mmap(PROT_READ | PROT_EXEC).
Added CAP_MMAP_WX:
- Allow for mmap(PROT_WRITE | PROT_EXEC).
Added CAP_MMAP_RWX:
- Allow for mmap(PROT_READ | PROT_WRITE | PROT_EXEC).
Renamed CAP_MKDIR to CAP_MKDIRAT.
Renamed CAP_MKFIFO to CAP_MKFIFOAT.
Renamed CAP_MKNODE to CAP_MKNODEAT.
CAP_READ old behaviour:
- Allow pread(2).
- Disallow read(2), readv(2) (if there is no CAP_SEEK).
CAP_READ new behaviour:
- Allow read(2), readv(2).
- Disallow pread(2) (CAP_SEEK was also required).
CAP_WRITE old behaviour:
- Allow pwrite(2).
- Disallow write(2), writev(2) (if there is no CAP_SEEK).
CAP_WRITE new behaviour:
- Allow write(2), writev(2).
- Disallow pwrite(2) (CAP_SEEK was also required).
Added convinient defines:
#define CAP_PREAD (CAP_SEEK | CAP_READ)
#define CAP_PWRITE (CAP_SEEK | CAP_WRITE)
#define CAP_MMAP_R (CAP_MMAP | CAP_SEEK | CAP_READ)
#define CAP_MMAP_W (CAP_MMAP | CAP_SEEK | CAP_WRITE)
#define CAP_MMAP_X (CAP_MMAP | CAP_SEEK | 0x0000000000000008ULL)
#define CAP_MMAP_RW (CAP_MMAP_R | CAP_MMAP_W)
#define CAP_MMAP_RX (CAP_MMAP_R | CAP_MMAP_X)
#define CAP_MMAP_WX (CAP_MMAP_W | CAP_MMAP_X)
#define CAP_MMAP_RWX (CAP_MMAP_R | CAP_MMAP_W | CAP_MMAP_X)
#define CAP_RECV CAP_READ
#define CAP_SEND CAP_WRITE
#define CAP_SOCK_CLIENT \
(CAP_CONNECT | CAP_GETPEERNAME | CAP_GETSOCKNAME | CAP_GETSOCKOPT | \
CAP_PEELOFF | CAP_RECV | CAP_SEND | CAP_SETSOCKOPT | CAP_SHUTDOWN)
#define CAP_SOCK_SERVER \
(CAP_ACCEPT | CAP_BIND | CAP_GETPEERNAME | CAP_GETSOCKNAME | \
CAP_GETSOCKOPT | CAP_LISTEN | CAP_PEELOFF | CAP_RECV | CAP_SEND | \
CAP_SETSOCKOPT | CAP_SHUTDOWN)
Added defines for backward API compatibility:
#define CAP_MAPEXEC CAP_MMAP_X
#define CAP_DELETE CAP_UNLINKAT
#define CAP_MKDIR CAP_MKDIRAT
#define CAP_RMDIR CAP_UNLINKAT
#define CAP_MKFIFO CAP_MKFIFOAT
#define CAP_MKNOD CAP_MKNODAT
#define CAP_SOCK_ALL (CAP_SOCK_CLIENT | CAP_SOCK_SERVER)
Sponsored by: The FreeBSD Foundation
Reviewed by: Christoph Mallon <christoph.mallon@gmx.de>
Many aspects discussed with: rwatson, benl, jonathan
ABI compatibility discussed with: kib
2013-03-02 00:53:12 +00:00
|
|
|
ndp->ni_filecaps.fc_fcntls != CAP_FCNTL_ALL ||
|
|
|
|
ndp->ni_filecaps.fc_nioctls != -1) {
|
2016-11-02 12:43:15 +00:00
|
|
|
ndp->ni_lcf |= NI_LCF_STRICTRELATIVE;
|
Merge Capsicum overhaul:
- Capability is no longer separate descriptor type. Now every descriptor
has set of its own capability rights.
- The cap_new(2) system call is left, but it is no longer documented and
should not be used in new code.
- The new syscall cap_rights_limit(2) should be used instead of
cap_new(2), which limits capability rights of the given descriptor
without creating a new one.
- The cap_getrights(2) syscall is renamed to cap_rights_get(2).
- If CAP_IOCTL capability right is present we can further reduce allowed
ioctls list with the new cap_ioctls_limit(2) syscall. List of allowed
ioctls can be retrived with cap_ioctls_get(2) syscall.
- If CAP_FCNTL capability right is present we can further reduce fcntls
that can be used with the new cap_fcntls_limit(2) syscall and retrive
them with cap_fcntls_get(2).
- To support ioctl and fcntl white-listing the filedesc structure was
heavly modified.
- The audit subsystem, kdump and procstat tools were updated to
recognize new syscalls.
- Capability rights were revised and eventhough I tried hard to provide
backward API and ABI compatibility there are some incompatible changes
that are described in detail below:
CAP_CREATE old behaviour:
- Allow for openat(2)+O_CREAT.
- Allow for linkat(2).
- Allow for symlinkat(2).
CAP_CREATE new behaviour:
- Allow for openat(2)+O_CREAT.
Added CAP_LINKAT:
- Allow for linkat(2). ABI: Reuses CAP_RMDIR bit.
- Allow to be target for renameat(2).
Added CAP_SYMLINKAT:
- Allow for symlinkat(2).
Removed CAP_DELETE. Old behaviour:
- Allow for unlinkat(2) when removing non-directory object.
- Allow to be source for renameat(2).
Removed CAP_RMDIR. Old behaviour:
- Allow for unlinkat(2) when removing directory.
Added CAP_RENAMEAT:
- Required for source directory for the renameat(2) syscall.
Added CAP_UNLINKAT (effectively it replaces CAP_DELETE and CAP_RMDIR):
- Allow for unlinkat(2) on any object.
- Required if target of renameat(2) exists and will be removed by this
call.
Removed CAP_MAPEXEC.
CAP_MMAP old behaviour:
- Allow for mmap(2) with any combination of PROT_NONE, PROT_READ and
PROT_WRITE.
CAP_MMAP new behaviour:
- Allow for mmap(2)+PROT_NONE.
Added CAP_MMAP_R:
- Allow for mmap(PROT_READ).
Added CAP_MMAP_W:
- Allow for mmap(PROT_WRITE).
Added CAP_MMAP_X:
- Allow for mmap(PROT_EXEC).
Added CAP_MMAP_RW:
- Allow for mmap(PROT_READ | PROT_WRITE).
Added CAP_MMAP_RX:
- Allow for mmap(PROT_READ | PROT_EXEC).
Added CAP_MMAP_WX:
- Allow for mmap(PROT_WRITE | PROT_EXEC).
Added CAP_MMAP_RWX:
- Allow for mmap(PROT_READ | PROT_WRITE | PROT_EXEC).
Renamed CAP_MKDIR to CAP_MKDIRAT.
Renamed CAP_MKFIFO to CAP_MKFIFOAT.
Renamed CAP_MKNODE to CAP_MKNODEAT.
CAP_READ old behaviour:
- Allow pread(2).
- Disallow read(2), readv(2) (if there is no CAP_SEEK).
CAP_READ new behaviour:
- Allow read(2), readv(2).
- Disallow pread(2) (CAP_SEEK was also required).
CAP_WRITE old behaviour:
- Allow pwrite(2).
- Disallow write(2), writev(2) (if there is no CAP_SEEK).
CAP_WRITE new behaviour:
- Allow write(2), writev(2).
- Disallow pwrite(2) (CAP_SEEK was also required).
Added convinient defines:
#define CAP_PREAD (CAP_SEEK | CAP_READ)
#define CAP_PWRITE (CAP_SEEK | CAP_WRITE)
#define CAP_MMAP_R (CAP_MMAP | CAP_SEEK | CAP_READ)
#define CAP_MMAP_W (CAP_MMAP | CAP_SEEK | CAP_WRITE)
#define CAP_MMAP_X (CAP_MMAP | CAP_SEEK | 0x0000000000000008ULL)
#define CAP_MMAP_RW (CAP_MMAP_R | CAP_MMAP_W)
#define CAP_MMAP_RX (CAP_MMAP_R | CAP_MMAP_X)
#define CAP_MMAP_WX (CAP_MMAP_W | CAP_MMAP_X)
#define CAP_MMAP_RWX (CAP_MMAP_R | CAP_MMAP_W | CAP_MMAP_X)
#define CAP_RECV CAP_READ
#define CAP_SEND CAP_WRITE
#define CAP_SOCK_CLIENT \
(CAP_CONNECT | CAP_GETPEERNAME | CAP_GETSOCKNAME | CAP_GETSOCKOPT | \
CAP_PEELOFF | CAP_RECV | CAP_SEND | CAP_SETSOCKOPT | CAP_SHUTDOWN)
#define CAP_SOCK_SERVER \
(CAP_ACCEPT | CAP_BIND | CAP_GETPEERNAME | CAP_GETSOCKNAME | \
CAP_GETSOCKOPT | CAP_LISTEN | CAP_PEELOFF | CAP_RECV | CAP_SEND | \
CAP_SETSOCKOPT | CAP_SHUTDOWN)
Added defines for backward API compatibility:
#define CAP_MAPEXEC CAP_MMAP_X
#define CAP_DELETE CAP_UNLINKAT
#define CAP_MKDIR CAP_MKDIRAT
#define CAP_RMDIR CAP_UNLINKAT
#define CAP_MKFIFO CAP_MKFIFOAT
#define CAP_MKNOD CAP_MKNODAT
#define CAP_SOCK_ALL (CAP_SOCK_CLIENT | CAP_SOCK_SERVER)
Sponsored by: The FreeBSD Foundation
Reviewed by: Christoph Mallon <christoph.mallon@gmx.de>
Many aspects discussed with: rwatson, benl, jonathan
ABI compatibility discussed with: kib
2013-03-02 00:53:12 +00:00
|
|
|
}
|
2011-08-11 12:30:23 +00:00
|
|
|
#endif
|
2009-07-28 21:39:58 +00:00
|
|
|
}
|
2015-07-09 16:32:58 +00:00
|
|
|
if (error == 0 && dp->v_type != VDIR)
|
|
|
|
error = ENOTDIR;
|
Update ZFS from version 6 to 13 and bring some FreeBSD-specific changes.
This bring huge amount of changes, I'll enumerate only user-visible changes:
- Delegated Administration
Allows regular users to perform ZFS operations, like file system
creation, snapshot creation, etc.
- L2ARC
Level 2 cache for ZFS - allows to use additional disks for cache.
Huge performance improvements mostly for random read of mostly
static content.
- slog
Allow to use additional disks for ZFS Intent Log to speed up
operations like fsync(2).
- vfs.zfs.super_owner
Allows regular users to perform privileged operations on files stored
on ZFS file systems owned by him. Very careful with this one.
- chflags(2)
Not all the flags are supported. This still needs work.
- ZFSBoot
Support to boot off of ZFS pool. Not finished, AFAIK.
Submitted by: dfr
- Snapshot properties
- New failure modes
Before if write requested failed, system paniced. Now one
can select from one of three failure modes:
- panic - panic on write error
- wait - wait for disk to reappear
- continue - serve read requests if possible, block write requests
- Refquota, refreservation properties
Just quota and reservation properties, but don't count space consumed
by children file systems, clones and snapshots.
- Sparse volumes
ZVOLs that don't reserve space in the pool.
- External attributes
Compatible with extattr(2).
- NFSv4-ACLs
Not sure about the status, might not be complete yet.
Submitted by: trasz
- Creation-time properties
- Regression tests for zpool(8) command.
Obtained from: OpenSolaris
2008-11-17 20:49:29 +00:00
|
|
|
}
|
2018-11-29 19:13:10 +00:00
|
|
|
if (error == 0 && (cnp->cn_flags & BENEATH) != 0) {
|
2018-11-11 00:04:36 +00:00
|
|
|
if (ndp->ni_dirfd == AT_FDCWD) {
|
|
|
|
ndp->ni_beneath_latch = fdp->fd_cdir;
|
|
|
|
vrefact(ndp->ni_beneath_latch);
|
|
|
|
} else {
|
|
|
|
rights = ndp->ni_rightsneeded;
|
2020-02-15 01:28:42 +00:00
|
|
|
cap_rights_set_one(&rights, CAP_LOOKUP);
|
2018-11-11 00:04:36 +00:00
|
|
|
error = fgetvp_rights(td, ndp->ni_dirfd, &rights,
|
|
|
|
&dirfd_caps, &ndp->ni_beneath_latch);
|
|
|
|
if (error == 0 && dp->v_type != VDIR) {
|
|
|
|
vrele(ndp->ni_beneath_latch);
|
|
|
|
error = ENOTDIR;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (error == 0)
|
|
|
|
ndp->ni_lcf |= NI_LCF_LATCH;
|
|
|
|
}
|
2015-07-09 16:32:58 +00:00
|
|
|
FILEDESC_SUNLOCK(fdp);
|
2020-02-21 01:44:31 +00:00
|
|
|
/*
|
|
|
|
* If we are auditing the kernel pathname, save the user pathname.
|
|
|
|
*/
|
|
|
|
if (cnp->cn_flags & AUDITVNODE1)
|
|
|
|
AUDIT_ARG_UPATH1_VP(td, ndp->ni_rootdir, dp, cnp->cn_pnbuf);
|
|
|
|
if (cnp->cn_flags & AUDITVNODE2)
|
|
|
|
AUDIT_ARG_UPATH2_VP(td, ndp->ni_rootdir, dp, cnp->cn_pnbuf);
|
2015-07-09 16:32:58 +00:00
|
|
|
if (ndp->ni_startdir != NULL && !startdir_used)
|
|
|
|
vrele(ndp->ni_startdir);
|
|
|
|
if (error != 0) {
|
|
|
|
if (dp != NULL)
|
|
|
|
vrele(dp);
|
2016-08-27 22:43:41 +00:00
|
|
|
goto out;
|
2008-03-31 12:01:21 +00:00
|
|
|
}
|
2018-11-29 19:13:10 +00:00
|
|
|
MPASS((ndp->ni_lcf & (NI_LCF_BENEATH_ABS | NI_LCF_LATCH)) !=
|
|
|
|
NI_LCF_BENEATH_ABS);
|
2018-10-25 22:16:34 +00:00
|
|
|
if (((ndp->ni_lcf & NI_LCF_STRICTRELATIVE) != 0 &&
|
|
|
|
lookup_cap_dotdot != 0) ||
|
|
|
|
((ndp->ni_lcf & NI_LCF_STRICTRELATIVE) == 0 &&
|
|
|
|
(cnp->cn_flags & BENEATH) != 0))
|
2016-11-02 12:43:15 +00:00
|
|
|
ndp->ni_lcf |= NI_LCF_CAP_DOTDOT;
|
2015-09-28 12:14:16 +00:00
|
|
|
SDT_PROBE3(vfs, namei, lookup, entry, dp, cnp->cn_pnbuf,
|
|
|
|
cnp->cn_flags);
|
1994-05-24 10:09:53 +00:00
|
|
|
for (;;) {
|
|
|
|
ndp->ni_startdir = dp;
|
1994-09-27 20:33:41 +00:00
|
|
|
error = lookup(ndp);
|
2016-08-27 22:43:41 +00:00
|
|
|
if (error != 0)
|
|
|
|
goto out;
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
2009-05-29 09:52:13 +00:00
|
|
|
* If not a symbolic link, we're done.
|
1994-05-24 10:09:53 +00:00
|
|
|
*/
|
|
|
|
if ((cnp->cn_flags & ISSYMLINK) == 0) {
|
2015-07-09 15:06:24 +00:00
|
|
|
vrele(ndp->ni_rootdir);
|
2002-07-24 15:42:22 +00:00
|
|
|
if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0) {
|
2014-08-24 12:51:12 +00:00
|
|
|
namei_cleanup_cnp(cnp);
|
2002-07-24 15:42:22 +00:00
|
|
|
} else
|
1994-05-24 10:09:53 +00:00
|
|
|
cnp->cn_flags |= HASBUF;
|
2018-11-11 00:04:36 +00:00
|
|
|
if ((ndp->ni_lcf & (NI_LCF_BENEATH_ABS |
|
|
|
|
NI_LCF_BENEATH_LATCHED)) == NI_LCF_BENEATH_ABS) {
|
|
|
|
NDFREE(ndp, 0);
|
|
|
|
error = ENOTCAPABLE;
|
|
|
|
}
|
|
|
|
nameicap_cleanup(ndp, true);
|
|
|
|
SDT_PROBE2(vfs, namei, lookup, return, error,
|
|
|
|
(error == 0 ? ndp->ni_vp : NULL));
|
|
|
|
return (error);
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
|
|
|
if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
|
|
|
|
error = ELOOP;
|
|
|
|
break;
|
|
|
|
}
|
2002-08-01 01:21:40 +00:00
|
|
|
#ifdef MAC
|
2002-10-19 21:25:51 +00:00
|
|
|
if ((cnp->cn_flags & NOMACCHECK) == 0) {
|
2007-10-24 19:04:04 +00:00
|
|
|
error = mac_vnode_check_readlink(td->td_ucred,
|
2002-10-19 21:25:51 +00:00
|
|
|
ndp->ni_vp);
|
2015-07-09 17:17:26 +00:00
|
|
|
if (error != 0)
|
2002-10-19 21:25:51 +00:00
|
|
|
break;
|
|
|
|
}
|
2002-08-01 01:21:40 +00:00
|
|
|
#endif
|
1994-05-24 10:09:53 +00:00
|
|
|
if (ndp->ni_pathlen > 1)
|
2003-02-19 05:47:46 +00:00
|
|
|
cp = uma_zalloc(namei_zone, M_WAITOK);
|
1994-05-24 10:09:53 +00:00
|
|
|
else
|
|
|
|
cp = cnp->cn_pnbuf;
|
|
|
|
aiov.iov_base = cp;
|
|
|
|
aiov.iov_len = MAXPATHLEN;
|
|
|
|
auio.uio_iov = &aiov;
|
|
|
|
auio.uio_iovcnt = 1;
|
|
|
|
auio.uio_offset = 0;
|
|
|
|
auio.uio_rw = UIO_READ;
|
|
|
|
auio.uio_segflg = UIO_SYSSPACE;
|
2013-02-21 19:02:50 +00:00
|
|
|
auio.uio_td = td;
|
1994-05-24 10:09:53 +00:00
|
|
|
auio.uio_resid = MAXPATHLEN;
|
1994-09-27 20:33:41 +00:00
|
|
|
error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred);
|
2015-07-09 17:17:26 +00:00
|
|
|
if (error != 0) {
|
1994-05-24 10:09:53 +00:00
|
|
|
if (ndp->ni_pathlen > 1)
|
2002-03-20 04:09:59 +00:00
|
|
|
uma_zfree(namei_zone, cp);
|
1994-05-24 10:09:53 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
linklen = MAXPATHLEN - auio.uio_resid;
|
2001-06-24 05:24:41 +00:00
|
|
|
if (linklen == 0) {
|
|
|
|
if (ndp->ni_pathlen > 1)
|
2002-03-20 04:09:59 +00:00
|
|
|
uma_zfree(namei_zone, cp);
|
2001-06-24 05:24:41 +00:00
|
|
|
error = ENOENT;
|
|
|
|
break;
|
|
|
|
}
|
2017-11-17 19:25:39 +00:00
|
|
|
if (linklen + ndp->ni_pathlen > MAXPATHLEN) {
|
1994-05-24 10:09:53 +00:00
|
|
|
if (ndp->ni_pathlen > 1)
|
2002-03-20 04:09:59 +00:00
|
|
|
uma_zfree(namei_zone, cp);
|
1994-05-24 10:09:53 +00:00
|
|
|
error = ENAMETOOLONG;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (ndp->ni_pathlen > 1) {
|
|
|
|
bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen);
|
2002-03-20 04:09:59 +00:00
|
|
|
uma_zfree(namei_zone, cnp->cn_pnbuf);
|
1994-05-24 10:09:53 +00:00
|
|
|
cnp->cn_pnbuf = cp;
|
|
|
|
} else
|
|
|
|
cnp->cn_pnbuf[linklen] = '\0';
|
|
|
|
ndp->ni_pathlen += linklen;
|
|
|
|
vput(ndp->ni_vp);
|
|
|
|
dp = ndp->ni_dvp;
|
2015-07-09 15:06:58 +00:00
|
|
|
/*
|
|
|
|
* Check if root directory should replace current directory.
|
|
|
|
*/
|
|
|
|
cnp->cn_nameptr = cnp->cn_pnbuf;
|
|
|
|
if (*(cnp->cn_nameptr) == '/') {
|
|
|
|
vrele(dp);
|
2020-02-01 06:40:35 +00:00
|
|
|
error = namei_handle_root(ndp, &dp, 1);
|
2016-08-27 22:43:41 +00:00
|
|
|
if (error != 0)
|
|
|
|
goto out;
|
2015-07-09 15:06:58 +00:00
|
|
|
}
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
|
|
|
vput(ndp->ni_vp);
|
|
|
|
ndp->ni_vp = NULL;
|
2005-04-09 11:53:16 +00:00
|
|
|
vrele(ndp->ni_dvp);
|
2016-08-27 22:43:41 +00:00
|
|
|
out:
|
|
|
|
vrele(ndp->ni_rootdir);
|
2018-11-11 00:04:36 +00:00
|
|
|
MPASS(error != 0);
|
2016-08-27 22:43:41 +00:00
|
|
|
namei_cleanup_cnp(cnp);
|
2018-11-11 00:04:36 +00:00
|
|
|
nameicap_cleanup(ndp, true);
|
2015-09-28 12:14:16 +00:00
|
|
|
SDT_PROBE2(vfs, namei, lookup, return, error, NULL);
|
1994-05-24 10:09:53 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
2006-09-13 18:39:09 +00:00
|
|
|
static int
|
2012-09-09 19:11:52 +00:00
|
|
|
compute_cn_lkflags(struct mount *mp, int lkflags, int cnflags)
|
2006-09-13 18:39:09 +00:00
|
|
|
{
|
2008-11-03 19:33:20 +00:00
|
|
|
|
2012-09-09 19:11:52 +00:00
|
|
|
if (mp == NULL || ((lkflags & LK_SHARED) &&
|
|
|
|
(!(mp->mnt_kern_flag & MNTK_LOOKUP_SHARED) ||
|
|
|
|
((cnflags & ISDOTDOT) &&
|
|
|
|
(mp->mnt_kern_flag & MNTK_LOOKUP_EXCL_DOTDOT))))) {
|
2006-09-13 18:39:09 +00:00
|
|
|
lkflags &= ~LK_SHARED;
|
|
|
|
lkflags |= LK_EXCLUSIVE;
|
|
|
|
}
|
2014-11-02 13:10:31 +00:00
|
|
|
lkflags |= LK_NODDLKTREAT;
|
2008-11-03 19:33:20 +00:00
|
|
|
return (lkflags);
|
2006-09-13 18:39:09 +00:00
|
|
|
}
|
|
|
|
|
2009-03-11 14:13:47 +00:00
|
|
|
static __inline int
|
|
|
|
needs_exclusive_leaf(struct mount *mp, int flags)
|
|
|
|
{
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Intermediate nodes can use shared locks, we only need to
|
|
|
|
* force an exclusive lock for leaf nodes.
|
|
|
|
*/
|
|
|
|
if ((flags & (ISLASTCN | LOCKLEAF)) != (ISLASTCN | LOCKLEAF))
|
|
|
|
return (0);
|
|
|
|
|
|
|
|
/* Always use exclusive locks if LOCKSHARED isn't set. */
|
|
|
|
if (!(flags & LOCKSHARED))
|
|
|
|
return (1);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* For lookups during open(), if the mount point supports
|
|
|
|
* extended shared operations, then use a shared lock for the
|
|
|
|
* leaf node, otherwise use an exclusive lock.
|
|
|
|
*/
|
2013-11-09 20:30:13 +00:00
|
|
|
if ((flags & ISOPEN) != 0)
|
|
|
|
return (!MNT_EXTENDED_SHARED(mp));
|
2009-03-11 14:13:47 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Lookup requests outside of open() that specify LOCKSHARED
|
|
|
|
* only need a shared lock on the leaf vnode.
|
|
|
|
*/
|
2009-03-11 14:39:55 +00:00
|
|
|
return (0);
|
2009-03-11 14:13:47 +00:00
|
|
|
}
|
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
|
|
|
* Search a pathname.
|
|
|
|
* This is a very central and rather complicated routine.
|
|
|
|
*
|
|
|
|
* The pathname is pointed to by ni_ptr and is of length ni_pathlen.
|
|
|
|
* The starting directory is taken from ni_startdir. The pathname is
|
|
|
|
* descended until done, or a symbolic link is encountered. The variable
|
|
|
|
* ni_more is clear if the path is completed; it is set to one if a
|
|
|
|
* symbolic link needing interpretation is encountered.
|
|
|
|
*
|
|
|
|
* The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on
|
|
|
|
* whether the name is to be looked up, created, renamed, or deleted.
|
|
|
|
* When CREATE, RENAME, or DELETE is specified, information usable in
|
|
|
|
* creating, renaming, or deleting a directory entry may be calculated.
|
|
|
|
* If flag has LOCKPARENT or'ed into it, the parent directory is returned
|
|
|
|
* locked. If flag has WANTPARENT or'ed into it, the parent directory is
|
|
|
|
* returned unlocked. Otherwise the parent directory is not returned. If
|
|
|
|
* the target of the pathname exists and LOCKLEAF is or'ed into the flag
|
|
|
|
* the target is returned locked, otherwise it is returned unlocked.
|
|
|
|
* When creating or renaming and LOCKPARENT is specified, the target may not
|
|
|
|
* be ".". When deleting and LOCKPARENT is specified, the target may be ".".
|
1995-05-30 08:16:23 +00:00
|
|
|
*
|
1994-05-24 10:09:53 +00:00
|
|
|
* Overall outline of lookup:
|
|
|
|
*
|
|
|
|
* dirloop:
|
|
|
|
* identify next component of name at ndp->ni_ptr
|
|
|
|
* handle degenerate case where name is null string
|
|
|
|
* if .. and crossing mount points and on mounted filesys, find parent
|
|
|
|
* call VOP_LOOKUP routine for next component name
|
|
|
|
* directory vnode returned in ni_dvp, unlocked unless LOCKPARENT set
|
|
|
|
* component vnode returned in ni_vp (if it exists), locked.
|
|
|
|
* if result vnode is mounted on and crossing mount points,
|
|
|
|
* find mounted on vnode
|
|
|
|
* if more components of name, do next level at dirloop
|
|
|
|
* return the answer in ni_vp, locked if LOCKLEAF set
|
|
|
|
* if LOCKPARENT set, return locked parent in ni_dvp
|
|
|
|
* if WANTPARENT set, return unlocked parent in ni_dvp
|
|
|
|
*/
|
|
|
|
int
|
2006-08-05 21:40:59 +00:00
|
|
|
lookup(struct nameidata *ndp)
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
2017-01-04 14:43:57 +00:00
|
|
|
char *cp; /* pointer into pathname argument */
|
|
|
|
char *prev_ni_next; /* saved ndp->ni_next */
|
2016-04-15 16:10:11 +00:00
|
|
|
struct vnode *dp = NULL; /* the directory we are searching */
|
1994-05-24 10:09:53 +00:00
|
|
|
struct vnode *tdp; /* saved dp */
|
|
|
|
struct mount *mp; /* mount table entry */
|
2009-05-27 14:11:23 +00:00
|
|
|
struct prison *pr;
|
2017-01-04 14:43:57 +00:00
|
|
|
size_t prev_ni_pathlen; /* saved ndp->ni_pathlen */
|
1994-05-24 10:09:53 +00:00
|
|
|
int docache; /* == 0 do not cache last component */
|
|
|
|
int wantparent; /* 1 => wantparent or lockparent flag */
|
|
|
|
int rdonly; /* lookup read-only flag bit */
|
|
|
|
int error = 0;
|
2000-09-13 08:57:56 +00:00
|
|
|
int dpunlocked = 0; /* dp has already been unlocked */
|
2016-03-12 07:54:42 +00:00
|
|
|
int relookup = 0; /* do not consume the path component */
|
1994-05-24 10:09:53 +00:00
|
|
|
struct componentname *cnp = &ndp->ni_cnd;
|
2006-09-13 18:39:09 +00:00
|
|
|
int lkflags_save;
|
2012-01-01 18:45:59 +00:00
|
|
|
int ni_dvp_unlocked;
|
2006-09-13 18:39:09 +00:00
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
|
|
|
* Setup: break out flag bits into variables.
|
|
|
|
*/
|
2012-01-01 18:45:59 +00:00
|
|
|
ni_dvp_unlocked = 0;
|
1994-05-24 10:09:53 +00:00
|
|
|
wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT);
|
2005-03-28 13:56:56 +00:00
|
|
|
KASSERT(cnp->cn_nameiop == LOOKUP || wantparent,
|
|
|
|
("CREATE, DELETE, RENAME require LOCKPARENT or WANTPARENT."));
|
1994-05-24 10:09:53 +00:00
|
|
|
docache = (cnp->cn_flags & NOCACHE) ^ NOCACHE;
|
|
|
|
if (cnp->cn_nameiop == DELETE ||
|
1997-02-18 06:46:41 +00:00
|
|
|
(wantparent && cnp->cn_nameiop != CREATE &&
|
|
|
|
cnp->cn_nameiop != LOOKUP))
|
1994-05-24 10:09:53 +00:00
|
|
|
docache = 0;
|
|
|
|
rdonly = cnp->cn_flags & RDONLY;
|
|
|
|
cnp->cn_flags &= ~ISSYMLINK;
|
2005-03-29 10:07:15 +00:00
|
|
|
ndp->ni_dvp = NULL;
|
|
|
|
/*
|
|
|
|
* We use shared locks until we hit the parent of the last cn then
|
|
|
|
* we adjust based on the requesting flags.
|
|
|
|
*/
|
2018-09-20 18:25:26 +00:00
|
|
|
cnp->cn_lkflags = LK_SHARED;
|
1994-05-24 10:09:53 +00:00
|
|
|
dp = ndp->ni_startdir;
|
|
|
|
ndp->ni_startdir = NULLVP;
|
2008-01-10 01:10:58 +00:00
|
|
|
vn_lock(dp,
|
2012-09-09 19:11:52 +00:00
|
|
|
compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | LK_RETRY,
|
|
|
|
cnp->cn_flags));
|
1994-05-24 10:09:53 +00:00
|
|
|
|
|
|
|
dirloop:
|
|
|
|
/*
|
|
|
|
* Search a new directory.
|
|
|
|
*
|
|
|
|
* The last component of the filename is left accessible via
|
|
|
|
* cnp->cn_nameptr for callers that need the name. Callers needing
|
|
|
|
* the name set the SAVENAME flag. When done, they assume
|
|
|
|
* responsibility for freeing the pathname buffer.
|
|
|
|
*/
|
|
|
|
for (cp = cnp->cn_nameptr; *cp != 0 && *cp != '/'; cp++)
|
1999-10-03 12:18:29 +00:00
|
|
|
continue;
|
1994-05-24 10:09:53 +00:00
|
|
|
cnp->cn_namelen = cp - cnp->cn_nameptr;
|
|
|
|
if (cnp->cn_namelen > NAME_MAX) {
|
|
|
|
error = ENAMETOOLONG;
|
|
|
|
goto bad;
|
|
|
|
}
|
|
|
|
#ifdef NAMEI_DIAGNOSTIC
|
|
|
|
{ char c = *cp;
|
|
|
|
*cp = '\0';
|
|
|
|
printf("{%s}: ", cnp->cn_nameptr);
|
|
|
|
*cp = c; }
|
|
|
|
#endif
|
2017-01-04 14:43:57 +00:00
|
|
|
prev_ni_pathlen = ndp->ni_pathlen;
|
1994-05-24 10:09:53 +00:00
|
|
|
ndp->ni_pathlen -= cnp->cn_namelen;
|
2017-01-04 14:43:57 +00:00
|
|
|
KASSERT(ndp->ni_pathlen <= PATH_MAX,
|
|
|
|
("%s: ni_pathlen underflow to %zd\n", __func__, ndp->ni_pathlen));
|
|
|
|
prev_ni_next = ndp->ni_next;
|
1994-05-24 10:09:53 +00:00
|
|
|
ndp->ni_next = cp;
|
1995-07-31 00:35:58 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Replace multiple slashes by a single slash and trailing slashes
|
|
|
|
* by a null. This must be done before VOP_LOOKUP() because some
|
|
|
|
* fs's don't know about trailing slashes. Remember if there were
|
|
|
|
* trailing slashes to handle symlinks, existing non-directories
|
|
|
|
* and non-existing files that won't be directories specially later.
|
|
|
|
*/
|
|
|
|
while (*cp == '/' && (cp[1] == '/' || cp[1] == '\0')) {
|
|
|
|
cp++;
|
|
|
|
ndp->ni_pathlen--;
|
|
|
|
if (*cp == '\0') {
|
2009-06-06 00:49:49 +00:00
|
|
|
*ndp->ni_next = '\0';
|
2009-05-29 10:02:44 +00:00
|
|
|
cnp->cn_flags |= TRAILINGSLASH;
|
1995-07-31 00:35:58 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
ndp->ni_next = cp;
|
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
cnp->cn_flags |= MAKEENTRY;
|
|
|
|
if (*cp == '\0' && docache == 0)
|
|
|
|
cnp->cn_flags &= ~MAKEENTRY;
|
|
|
|
if (cnp->cn_namelen == 2 &&
|
|
|
|
cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.')
|
|
|
|
cnp->cn_flags |= ISDOTDOT;
|
|
|
|
else
|
|
|
|
cnp->cn_flags &= ~ISDOTDOT;
|
|
|
|
if (*ndp->ni_next == 0)
|
|
|
|
cnp->cn_flags |= ISLASTCN;
|
|
|
|
else
|
|
|
|
cnp->cn_flags &= ~ISLASTCN;
|
|
|
|
|
2009-11-10 11:50:37 +00:00
|
|
|
if ((cnp->cn_flags & ISLASTCN) != 0 &&
|
|
|
|
cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.' &&
|
|
|
|
(cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
|
|
|
|
error = EINVAL;
|
|
|
|
goto bad;
|
|
|
|
}
|
1994-05-24 10:09:53 +00:00
|
|
|
|
2016-11-02 12:43:15 +00:00
|
|
|
nameicap_tracker_add(ndp, dp);
|
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
|
|
|
* Check for degenerate name (e.g. / or "")
|
|
|
|
* which is a way of talking about a directory,
|
|
|
|
* e.g. like "/." or ".".
|
|
|
|
*/
|
|
|
|
if (cnp->cn_nameptr[0] == '\0') {
|
|
|
|
if (dp->v_type != VDIR) {
|
|
|
|
error = ENOTDIR;
|
|
|
|
goto bad;
|
|
|
|
}
|
1997-02-10 02:22:35 +00:00
|
|
|
if (cnp->cn_nameiop != LOOKUP) {
|
|
|
|
error = EISDIR;
|
|
|
|
goto bad;
|
|
|
|
}
|
1994-05-24 10:09:53 +00:00
|
|
|
if (wantparent) {
|
|
|
|
ndp->ni_dvp = dp;
|
|
|
|
VREF(dp);
|
|
|
|
}
|
|
|
|
ndp->ni_vp = dp;
|
2006-02-05 15:42:01 +00:00
|
|
|
|
|
|
|
if (cnp->cn_flags & AUDITVNODE1)
|
2009-07-28 21:52:24 +00:00
|
|
|
AUDIT_ARG_VNODE1(dp);
|
2006-02-05 15:42:01 +00:00
|
|
|
else if (cnp->cn_flags & AUDITVNODE2)
|
2009-07-28 21:52:24 +00:00
|
|
|
AUDIT_ARG_VNODE2(dp);
|
2006-02-05 15:42:01 +00:00
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
if (!(cnp->cn_flags & (LOCKPARENT | LOCKLEAF)))
|
2020-01-03 22:29:58 +00:00
|
|
|
VOP_UNLOCK(dp);
|
1999-12-15 23:02:35 +00:00
|
|
|
/* XXX This should probably move to the top of function. */
|
1994-05-24 10:09:53 +00:00
|
|
|
if (cnp->cn_flags & SAVESTART)
|
|
|
|
panic("lookup: SAVESTART");
|
2005-01-24 10:27:05 +00:00
|
|
|
goto success;
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2011-08-13 09:21:16 +00:00
|
|
|
* Handle "..": five special cases.
|
2016-11-02 12:43:15 +00:00
|
|
|
* 0. If doing a capability lookup and lookup_cap_dotdot is
|
|
|
|
* disabled, return ENOTCAPABLE.
|
2006-01-21 19:57:56 +00:00
|
|
|
* 1. Return an error if this is the last component of
|
|
|
|
* the name and the operation is DELETE or RENAME.
|
|
|
|
* 2. If at root directory (e.g. after chroot)
|
1994-05-24 10:09:53 +00:00
|
|
|
* or at absolute root directory
|
|
|
|
* then ignore it so can't get out.
|
2006-01-21 19:57:56 +00:00
|
|
|
* 3. If this vnode is the root of a mounted
|
1994-05-24 10:09:53 +00:00
|
|
|
* filesystem, then replace it with the
|
|
|
|
* vnode which was mounted on so we take the
|
2002-05-16 21:28:32 +00:00
|
|
|
* .. in the other filesystem.
|
2006-01-21 19:57:56 +00:00
|
|
|
* 4. If the vnode is the top directory of
|
1999-09-25 14:14:21 +00:00
|
|
|
* the jail or chroot, don't let them out.
|
2016-11-02 12:43:15 +00:00
|
|
|
* 5. If doing a capability lookup and lookup_cap_dotdot is
|
|
|
|
* enabled, return ENOTCAPABLE if the lookup would escape
|
|
|
|
* from the initial file descriptor directory. Checks are
|
|
|
|
* done by ensuring that namei() already traversed the
|
|
|
|
* result of dotdot lookup.
|
1994-05-24 10:09:53 +00:00
|
|
|
*/
|
|
|
|
if (cnp->cn_flags & ISDOTDOT) {
|
2016-11-02 12:43:15 +00:00
|
|
|
if ((ndp->ni_lcf & (NI_LCF_STRICTRELATIVE | NI_LCF_CAP_DOTDOT))
|
|
|
|
== NI_LCF_STRICTRELATIVE) {
|
2011-10-18 07:28:58 +00:00
|
|
|
#ifdef KTRACE
|
|
|
|
if (KTRPOINT(curthread, KTR_CAPFAIL))
|
2013-09-18 19:26:08 +00:00
|
|
|
ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL);
|
2011-10-18 07:28:58 +00:00
|
|
|
#endif
|
2011-08-13 09:21:16 +00:00
|
|
|
error = ENOTCAPABLE;
|
|
|
|
goto bad;
|
|
|
|
}
|
2006-01-21 19:57:56 +00:00
|
|
|
if ((cnp->cn_flags & ISLASTCN) != 0 &&
|
|
|
|
(cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
|
2006-01-22 19:37:02 +00:00
|
|
|
error = EINVAL;
|
2006-01-21 19:57:56 +00:00
|
|
|
goto bad;
|
|
|
|
}
|
1994-05-24 10:09:53 +00:00
|
|
|
for (;;) {
|
2009-05-27 14:11:23 +00:00
|
|
|
for (pr = cnp->cn_cred->cr_prison; pr != NULL;
|
|
|
|
pr = pr->pr_parent)
|
|
|
|
if (dp == pr->pr_root)
|
|
|
|
break;
|
1999-09-25 14:14:21 +00:00
|
|
|
if (dp == ndp->ni_rootdir ||
|
|
|
|
dp == ndp->ni_topdir ||
|
2007-02-15 09:53:49 +00:00
|
|
|
dp == rootvnode ||
|
2009-05-27 14:11:23 +00:00
|
|
|
pr != NULL ||
|
2007-02-15 09:53:49 +00:00
|
|
|
((dp->v_vflag & VV_ROOT) != 0 &&
|
|
|
|
(cnp->cn_flags & NOCROSSMOUNT) != 0)) {
|
1994-05-24 10:09:53 +00:00
|
|
|
ndp->ni_dvp = dp;
|
|
|
|
ndp->ni_vp = dp;
|
|
|
|
VREF(dp);
|
|
|
|
goto nextname;
|
|
|
|
}
|
2007-02-15 09:53:49 +00:00
|
|
|
if ((dp->v_vflag & VV_ROOT) == 0)
|
1994-05-24 10:09:53 +00:00
|
|
|
break;
|
2019-12-08 21:30:04 +00:00
|
|
|
if (VN_IS_DOOMED(dp)) { /* forced unmount */
|
2009-03-24 18:16:42 +00:00
|
|
|
error = ENOENT;
|
2000-11-30 20:04:44 +00:00
|
|
|
goto bad;
|
|
|
|
}
|
1994-05-24 10:09:53 +00:00
|
|
|
tdp = dp;
|
|
|
|
dp = dp->v_mount->mnt_vnodecovered;
|
|
|
|
VREF(dp);
|
2005-04-09 11:53:16 +00:00
|
|
|
vput(tdp);
|
2008-01-10 01:10:58 +00:00
|
|
|
vn_lock(dp,
|
|
|
|
compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags |
|
2012-09-09 19:11:52 +00:00
|
|
|
LK_RETRY, ISDOTDOT));
|
2016-11-02 12:43:15 +00:00
|
|
|
error = nameicap_check_dotdot(ndp, dp);
|
|
|
|
if (error != 0) {
|
|
|
|
#ifdef KTRACE
|
|
|
|
if (KTRPOINT(curthread, KTR_CAPFAIL))
|
|
|
|
ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL);
|
|
|
|
#endif
|
|
|
|
goto bad;
|
|
|
|
}
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We now have a segment name to search for, and a directory to search.
|
|
|
|
*/
|
|
|
|
unionlookup:
|
2002-08-01 01:21:40 +00:00
|
|
|
#ifdef MAC
|
2020-02-13 22:22:55 +00:00
|
|
|
error = mac_vnode_check_lookup(cnp->cn_thread->td_ucred, dp, cnp);
|
|
|
|
if (error)
|
|
|
|
goto bad;
|
2002-08-01 01:21:40 +00:00
|
|
|
#endif
|
1994-05-24 10:09:53 +00:00
|
|
|
ndp->ni_dvp = dp;
|
1997-02-10 02:22:35 +00:00
|
|
|
ndp->ni_vp = NULL;
|
1997-04-04 17:46:21 +00:00
|
|
|
ASSERT_VOP_LOCKED(dp, "lookup");
|
2005-03-29 10:07:15 +00:00
|
|
|
/*
|
|
|
|
* If we have a shared lock we may need to upgrade the lock for the
|
|
|
|
* last operation.
|
|
|
|
*/
|
2016-11-30 02:14:53 +00:00
|
|
|
if ((cnp->cn_flags & LOCKPARENT) && (cnp->cn_flags & ISLASTCN) &&
|
|
|
|
dp != vp_crossmp && VOP_ISLOCKED(dp) == LK_SHARED)
|
2016-11-30 02:17:03 +00:00
|
|
|
vn_lock(dp, LK_UPGRADE|LK_RETRY);
|
2019-12-08 21:30:04 +00:00
|
|
|
if (VN_IS_DOOMED(dp)) {
|
2013-04-01 09:59:38 +00:00
|
|
|
error = ENOENT;
|
|
|
|
goto bad;
|
|
|
|
}
|
2005-03-29 10:07:15 +00:00
|
|
|
/*
|
|
|
|
* If we're looking up the last component and we need an exclusive
|
|
|
|
* lock, adjust our lkflags.
|
|
|
|
*/
|
2009-03-11 14:13:47 +00:00
|
|
|
if (needs_exclusive_leaf(dp->v_mount, cnp->cn_flags))
|
2005-03-29 10:07:15 +00:00
|
|
|
cnp->cn_lkflags = LK_EXCLUSIVE;
|
2004-12-03 12:15:39 +00:00
|
|
|
#ifdef NAMEI_DIAGNOSTIC
|
2016-08-10 16:12:31 +00:00
|
|
|
vn_printf(dp, "lookup in ");
|
2004-12-03 12:15:39 +00:00
|
|
|
#endif
|
2006-09-13 18:39:09 +00:00
|
|
|
lkflags_save = cnp->cn_lkflags;
|
2012-09-09 19:11:52 +00:00
|
|
|
cnp->cn_lkflags = compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags,
|
|
|
|
cnp->cn_flags);
|
2016-03-12 09:05:43 +00:00
|
|
|
error = VOP_LOOKUP(dp, &ndp->ni_vp, cnp);
|
|
|
|
cnp->cn_lkflags = lkflags_save;
|
|
|
|
if (error != 0) {
|
1999-01-08 17:31:30 +00:00
|
|
|
KASSERT(ndp->ni_vp == NULL, ("leaf should be empty"));
|
1994-05-24 10:09:53 +00:00
|
|
|
#ifdef NAMEI_DIAGNOSTIC
|
|
|
|
printf("not found\n");
|
|
|
|
#endif
|
|
|
|
if ((error == ENOENT) &&
|
2002-08-04 10:29:36 +00:00
|
|
|
(dp->v_vflag & VV_ROOT) && (dp->v_mount != NULL) &&
|
1994-05-24 10:09:53 +00:00
|
|
|
(dp->v_mount->mnt_flag & MNT_UNION)) {
|
|
|
|
tdp = dp;
|
|
|
|
dp = dp->v_mount->mnt_vnodecovered;
|
|
|
|
VREF(dp);
|
2005-04-09 11:53:16 +00:00
|
|
|
vput(tdp);
|
2008-01-10 01:10:58 +00:00
|
|
|
vn_lock(dp,
|
|
|
|
compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags |
|
2012-09-09 19:11:52 +00:00
|
|
|
LK_RETRY, cnp->cn_flags));
|
2016-11-02 12:43:15 +00:00
|
|
|
nameicap_tracker_add(ndp, dp);
|
1994-05-24 10:09:53 +00:00
|
|
|
goto unionlookup;
|
|
|
|
}
|
|
|
|
|
2016-03-12 07:54:42 +00:00
|
|
|
if (error == ERELOOKUP) {
|
|
|
|
vref(dp);
|
|
|
|
ndp->ni_vp = dp;
|
|
|
|
error = 0;
|
|
|
|
relookup = 1;
|
|
|
|
goto good;
|
|
|
|
}
|
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
if (error != EJUSTRETURN)
|
|
|
|
goto bad;
|
|
|
|
/*
|
2009-06-06 00:49:49 +00:00
|
|
|
* At this point, we know we're at the end of the
|
|
|
|
* pathname. If creating / renaming, we can consider
|
|
|
|
* allowing the file or directory to be created / renamed,
|
|
|
|
* provided we're not on a read-only filesystem.
|
1994-05-24 10:09:53 +00:00
|
|
|
*/
|
1995-10-22 09:32:48 +00:00
|
|
|
if (rdonly) {
|
1994-05-24 10:09:53 +00:00
|
|
|
error = EROFS;
|
|
|
|
goto bad;
|
|
|
|
}
|
2009-06-06 00:49:49 +00:00
|
|
|
/* trailing slash only allowed for directories */
|
|
|
|
if ((cnp->cn_flags & TRAILINGSLASH) &&
|
|
|
|
!(cnp->cn_flags & WILLBEDIR)) {
|
1995-07-31 00:35:58 +00:00
|
|
|
error = ENOENT;
|
|
|
|
goto bad;
|
|
|
|
}
|
2005-03-28 09:24:50 +00:00
|
|
|
if ((cnp->cn_flags & LOCKPARENT) == 0)
|
2020-01-03 22:29:58 +00:00
|
|
|
VOP_UNLOCK(dp);
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
|
|
|
* We return with ni_vp NULL to indicate that the entry
|
|
|
|
* doesn't currently exist, leaving a pointer to the
|
2006-08-05 21:08:47 +00:00
|
|
|
* (possibly locked) directory vnode in ndp->ni_dvp.
|
1994-05-24 10:09:53 +00:00
|
|
|
*/
|
|
|
|
if (cnp->cn_flags & SAVESTART) {
|
|
|
|
ndp->ni_startdir = ndp->ni_dvp;
|
|
|
|
VREF(ndp->ni_startdir);
|
|
|
|
}
|
2005-01-24 10:27:05 +00:00
|
|
|
goto success;
|
2016-03-12 09:05:43 +00:00
|
|
|
}
|
2016-03-12 07:54:42 +00:00
|
|
|
|
|
|
|
good:
|
1994-05-24 10:09:53 +00:00
|
|
|
#ifdef NAMEI_DIAGNOSTIC
|
|
|
|
printf("found\n");
|
|
|
|
#endif
|
|
|
|
dp = ndp->ni_vp;
|
1995-07-31 00:35:58 +00:00
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
|
|
|
* Check to see if the vnode has been mounted on;
|
2002-05-16 21:28:32 +00:00
|
|
|
* if so find the root of the mounted filesystem.
|
1994-05-24 10:09:53 +00:00
|
|
|
*/
|
|
|
|
while (dp->v_type == VDIR && (mp = dp->v_mountedhere) &&
|
|
|
|
(cnp->cn_flags & NOCROSSMOUNT) == 0) {
|
2008-11-02 10:15:42 +00:00
|
|
|
if (vfs_busy(mp, 0))
|
1994-05-24 10:09:53 +00:00
|
|
|
continue;
|
2005-04-09 11:53:16 +00:00
|
|
|
vput(dp);
|
2006-04-28 00:59:48 +00:00
|
|
|
if (dp != ndp->ni_dvp)
|
Below is slightly edited description of the LOR by Tor Egge:
--------------------------
[Deadlock] is caused by a lock order reversal in vfs_lookup(), where
[some] process is trying to lock a directory vnode, that is the parent
directory of covered vnode) while holding an exclusive vnode lock on
covering vnode.
A simplified scenario:
root fs var fs
/ A / (/var) D
/var B /log (/var/log) E
vfs lock C vfs lock F
Within each file system, the lock order is clear: C->A->B and F->D->E
When traversing across mounts, the system can choose between two lock orders,
but everything must then follow that lock order:
L1: C->A->B
|
+->F->D->E
L2: F->D->E
|
+->C->A->B
The lookup() process for namei("/var") mixes those two lock orders:
VOP_LOOKUP() obtains B while A is held
vfs_busy() obtains a shared lock on F while A and B are held (follows L1,
violates L2)
vput() releases lock on B
VOP_UNLOCK() releases lock on A
VFS_ROOT() obtains lock on D while shared lock on F is held
vfs_unbusy() releases shared lock on F
vn_lock() obtains lock on A while D is held (violates L1, follows L2)
dounmount() follows L1 (B is locked while F is drained).
Without unmount activity, vfs_busy() will always succeed without blocking
and the deadlock isn't triggered (the system behaves as if L2 is followed).
With unmount, you can get 4 processes in a deadlock:
p1: holds D, want A (in lookup())
p2: holds shared lock on F, want D (in VFS_ROOT())
p3: holds B, want drain lock on F (in dounmount())
p4: holds A, want B (in VOP_LOOKUP())
You can have more than one instance of p2.
The reversal was introduced in revision 1.81 of src/sys/kern/vfs_lookup.c and
MFCed to revision 1.80.2.1, probably to avoid a cascade of vnode locks when nfs
servers are dead (VFS_ROOT() just hangs) spreading to the root fs root vnode.
- Tor Egge
To fix the LOR, ups@ noted that when crossing the mount point, ni_dvp
is actually not used by the callers of namei. Thus, placeholder deadfs
vnode vp_crossmp is introduced that is filled into ni_dvp.
Idea by: ups
Reviewed by: tegge, ups, jeff, rwatson (mac interaction)
Tested by: Peter Holm
MFC after: 2 weeks
2007-01-22 11:25:22 +00:00
|
|
|
vput(ndp->ni_dvp);
|
|
|
|
else
|
|
|
|
vrele(ndp->ni_dvp);
|
2016-12-12 15:37:11 +00:00
|
|
|
vrefact(vp_crossmp);
|
Below is slightly edited description of the LOR by Tor Egge:
--------------------------
[Deadlock] is caused by a lock order reversal in vfs_lookup(), where
[some] process is trying to lock a directory vnode, that is the parent
directory of covered vnode) while holding an exclusive vnode lock on
covering vnode.
A simplified scenario:
root fs var fs
/ A / (/var) D
/var B /log (/var/log) E
vfs lock C vfs lock F
Within each file system, the lock order is clear: C->A->B and F->D->E
When traversing across mounts, the system can choose between two lock orders,
but everything must then follow that lock order:
L1: C->A->B
|
+->F->D->E
L2: F->D->E
|
+->C->A->B
The lookup() process for namei("/var") mixes those two lock orders:
VOP_LOOKUP() obtains B while A is held
vfs_busy() obtains a shared lock on F while A and B are held (follows L1,
violates L2)
vput() releases lock on B
VOP_UNLOCK() releases lock on A
VFS_ROOT() obtains lock on D while shared lock on F is held
vfs_unbusy() releases shared lock on F
vn_lock() obtains lock on A while D is held (violates L1, follows L2)
dounmount() follows L1 (B is locked while F is drained).
Without unmount activity, vfs_busy() will always succeed without blocking
and the deadlock isn't triggered (the system behaves as if L2 is followed).
With unmount, you can get 4 processes in a deadlock:
p1: holds D, want A (in lookup())
p2: holds shared lock on F, want D (in VFS_ROOT())
p3: holds B, want drain lock on F (in dounmount())
p4: holds A, want B (in VOP_LOOKUP())
You can have more than one instance of p2.
The reversal was introduced in revision 1.81 of src/sys/kern/vfs_lookup.c and
MFCed to revision 1.80.2.1, probably to avoid a cascade of vnode locks when nfs
servers are dead (VFS_ROOT() just hangs) spreading to the root fs root vnode.
- Tor Egge
To fix the LOR, ups@ noted that when crossing the mount point, ni_dvp
is actually not used by the callers of namei. Thus, placeholder deadfs
vnode vp_crossmp is introduced that is filled into ni_dvp.
Idea by: ups
Reviewed by: tegge, ups, jeff, rwatson (mac interaction)
Tested by: Peter Holm
MFC after: 2 weeks
2007-01-22 11:25:22 +00:00
|
|
|
ndp->ni_dvp = vp_crossmp;
|
2012-09-09 19:11:52 +00:00
|
|
|
error = VFS_ROOT(mp, compute_cn_lkflags(mp, cnp->cn_lkflags,
|
|
|
|
cnp->cn_flags), &tdp);
|
2008-08-31 14:26:08 +00:00
|
|
|
vfs_unbusy(mp);
|
2008-01-10 01:10:58 +00:00
|
|
|
if (vn_lock(vp_crossmp, LK_SHARED | LK_NOWAIT))
|
Below is slightly edited description of the LOR by Tor Egge:
--------------------------
[Deadlock] is caused by a lock order reversal in vfs_lookup(), where
[some] process is trying to lock a directory vnode, that is the parent
directory of covered vnode) while holding an exclusive vnode lock on
covering vnode.
A simplified scenario:
root fs var fs
/ A / (/var) D
/var B /log (/var/log) E
vfs lock C vfs lock F
Within each file system, the lock order is clear: C->A->B and F->D->E
When traversing across mounts, the system can choose between two lock orders,
but everything must then follow that lock order:
L1: C->A->B
|
+->F->D->E
L2: F->D->E
|
+->C->A->B
The lookup() process for namei("/var") mixes those two lock orders:
VOP_LOOKUP() obtains B while A is held
vfs_busy() obtains a shared lock on F while A and B are held (follows L1,
violates L2)
vput() releases lock on B
VOP_UNLOCK() releases lock on A
VFS_ROOT() obtains lock on D while shared lock on F is held
vfs_unbusy() releases shared lock on F
vn_lock() obtains lock on A while D is held (violates L1, follows L2)
dounmount() follows L1 (B is locked while F is drained).
Without unmount activity, vfs_busy() will always succeed without blocking
and the deadlock isn't triggered (the system behaves as if L2 is followed).
With unmount, you can get 4 processes in a deadlock:
p1: holds D, want A (in lookup())
p2: holds shared lock on F, want D (in VFS_ROOT())
p3: holds B, want drain lock on F (in dounmount())
p4: holds A, want B (in VOP_LOOKUP())
You can have more than one instance of p2.
The reversal was introduced in revision 1.81 of src/sys/kern/vfs_lookup.c and
MFCed to revision 1.80.2.1, probably to avoid a cascade of vnode locks when nfs
servers are dead (VFS_ROOT() just hangs) spreading to the root fs root vnode.
- Tor Egge
To fix the LOR, ups@ noted that when crossing the mount point, ni_dvp
is actually not used by the callers of namei. Thus, placeholder deadfs
vnode vp_crossmp is introduced that is filled into ni_dvp.
Idea by: ups
Reviewed by: tegge, ups, jeff, rwatson (mac interaction)
Tested by: Peter Holm
MFC after: 2 weeks
2007-01-22 11:25:22 +00:00
|
|
|
panic("vp_crossmp exclusively locked or reclaimed");
|
2000-09-13 08:57:56 +00:00
|
|
|
if (error) {
|
|
|
|
dpunlocked = 1;
|
1994-05-24 10:09:53 +00:00
|
|
|
goto bad2;
|
2000-09-13 08:57:56 +00:00
|
|
|
}
|
1994-05-24 10:09:53 +00:00
|
|
|
ndp->ni_vp = dp = tdp;
|
|
|
|
}
|
|
|
|
|
1995-08-24 10:17:39 +00:00
|
|
|
/*
|
|
|
|
* Check for symbolic link
|
|
|
|
*/
|
|
|
|
if ((dp->v_type == VLNK) &&
|
2009-06-06 00:49:49 +00:00
|
|
|
((cnp->cn_flags & FOLLOW) || (cnp->cn_flags & TRAILINGSLASH) ||
|
1995-08-24 10:17:39 +00:00
|
|
|
*ndp->ni_next == '/')) {
|
|
|
|
cnp->cn_flags |= ISSYMLINK;
|
2019-12-08 21:30:04 +00:00
|
|
|
if (VN_IS_DOOMED(dp)) {
|
2009-03-24 18:16:42 +00:00
|
|
|
/*
|
|
|
|
* We can't know whether the directory was mounted with
|
|
|
|
* NOSYMFOLLOW, so we can't follow safely.
|
|
|
|
*/
|
|
|
|
error = ENOENT;
|
2000-11-30 20:04:44 +00:00
|
|
|
goto bad2;
|
|
|
|
}
|
1998-04-08 18:31:59 +00:00
|
|
|
if (dp->v_mount->mnt_flag & MNT_NOSYMFOLLOW) {
|
|
|
|
error = EACCES;
|
|
|
|
goto bad2;
|
|
|
|
}
|
2005-04-09 11:53:16 +00:00
|
|
|
/*
|
|
|
|
* Symlink code always expects an unlocked dvp.
|
|
|
|
*/
|
2012-01-01 18:45:59 +00:00
|
|
|
if (ndp->ni_dvp != ndp->ni_vp) {
|
2020-01-03 22:29:58 +00:00
|
|
|
VOP_UNLOCK(ndp->ni_dvp);
|
2012-01-01 18:45:59 +00:00
|
|
|
ni_dvp_unlocked = 1;
|
|
|
|
}
|
2005-01-24 10:27:05 +00:00
|
|
|
goto success;
|
1995-08-24 10:17:39 +00:00
|
|
|
}
|
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
nextname:
|
|
|
|
/*
|
2009-06-06 00:49:49 +00:00
|
|
|
* Not a symbolic link that we will follow. Continue with the
|
|
|
|
* next component if there is any; otherwise, we're done.
|
1994-05-24 10:09:53 +00:00
|
|
|
*/
|
2005-03-28 09:24:50 +00:00
|
|
|
KASSERT((cnp->cn_flags & ISLASTCN) || *ndp->ni_next == '/',
|
|
|
|
("lookup: invalid path state."));
|
2016-03-12 07:54:42 +00:00
|
|
|
if (relookup) {
|
|
|
|
relookup = 0;
|
2017-01-04 14:43:57 +00:00
|
|
|
ndp->ni_pathlen = prev_ni_pathlen;
|
|
|
|
ndp->ni_next = prev_ni_next;
|
2016-03-12 07:54:42 +00:00
|
|
|
if (ndp->ni_dvp != dp)
|
|
|
|
vput(ndp->ni_dvp);
|
|
|
|
else
|
|
|
|
vrele(ndp->ni_dvp);
|
|
|
|
goto dirloop;
|
|
|
|
}
|
2016-11-02 12:43:15 +00:00
|
|
|
if (cnp->cn_flags & ISDOTDOT) {
|
|
|
|
error = nameicap_check_dotdot(ndp, ndp->ni_vp);
|
|
|
|
if (error != 0) {
|
|
|
|
#ifdef KTRACE
|
|
|
|
if (KTRPOINT(curthread, KTR_CAPFAIL))
|
|
|
|
ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL);
|
|
|
|
#endif
|
|
|
|
goto bad2;
|
|
|
|
}
|
|
|
|
}
|
1994-05-24 10:09:53 +00:00
|
|
|
if (*ndp->ni_next == '/') {
|
|
|
|
cnp->cn_nameptr = ndp->ni_next;
|
|
|
|
while (*cnp->cn_nameptr == '/') {
|
|
|
|
cnp->cn_nameptr++;
|
|
|
|
ndp->ni_pathlen--;
|
|
|
|
}
|
2005-04-09 11:53:16 +00:00
|
|
|
if (ndp->ni_dvp != dp)
|
|
|
|
vput(ndp->ni_dvp);
|
|
|
|
else
|
|
|
|
vrele(ndp->ni_dvp);
|
1994-05-24 10:09:53 +00:00
|
|
|
goto dirloop;
|
|
|
|
}
|
2009-05-29 10:02:44 +00:00
|
|
|
/*
|
|
|
|
* If we're processing a path with a trailing slash,
|
|
|
|
* check that the end result is a directory.
|
|
|
|
*/
|
|
|
|
if ((cnp->cn_flags & TRAILINGSLASH) && dp->v_type != VDIR) {
|
|
|
|
error = ENOTDIR;
|
|
|
|
goto bad2;
|
|
|
|
}
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
2002-05-16 21:28:32 +00:00
|
|
|
* Disallow directory write attempts on read-only filesystems.
|
1994-05-24 10:09:53 +00:00
|
|
|
*/
|
1995-10-22 09:32:48 +00:00
|
|
|
if (rdonly &&
|
|
|
|
(cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
|
|
|
|
error = EROFS;
|
|
|
|
goto bad2;
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
|
|
|
if (cnp->cn_flags & SAVESTART) {
|
|
|
|
ndp->ni_startdir = ndp->ni_dvp;
|
|
|
|
VREF(ndp->ni_startdir);
|
|
|
|
}
|
2005-04-09 11:53:16 +00:00
|
|
|
if (!wantparent) {
|
2012-01-01 18:45:59 +00:00
|
|
|
ni_dvp_unlocked = 2;
|
2005-04-09 11:53:16 +00:00
|
|
|
if (ndp->ni_dvp != dp)
|
|
|
|
vput(ndp->ni_dvp);
|
|
|
|
else
|
|
|
|
vrele(ndp->ni_dvp);
|
2012-01-01 18:45:59 +00:00
|
|
|
} else if ((cnp->cn_flags & LOCKPARENT) == 0 && ndp->ni_dvp != dp) {
|
2020-01-03 22:29:58 +00:00
|
|
|
VOP_UNLOCK(ndp->ni_dvp);
|
2012-01-01 18:45:59 +00:00
|
|
|
ni_dvp_unlocked = 1;
|
|
|
|
}
|
1997-12-29 00:25:11 +00:00
|
|
|
|
2006-02-05 15:42:01 +00:00
|
|
|
if (cnp->cn_flags & AUDITVNODE1)
|
2009-07-28 21:52:24 +00:00
|
|
|
AUDIT_ARG_VNODE1(dp);
|
2006-02-05 15:42:01 +00:00
|
|
|
else if (cnp->cn_flags & AUDITVNODE2)
|
2009-07-28 21:52:24 +00:00
|
|
|
AUDIT_ARG_VNODE2(dp);
|
2006-02-05 15:42:01 +00:00
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
if ((cnp->cn_flags & LOCKLEAF) == 0)
|
2020-01-03 22:29:58 +00:00
|
|
|
VOP_UNLOCK(dp);
|
2005-01-24 10:27:05 +00:00
|
|
|
success:
|
2007-09-21 10:16:56 +00:00
|
|
|
/*
|
2018-09-20 18:25:26 +00:00
|
|
|
* Because of shared lookup we may have the vnode shared locked, but
|
2007-09-21 10:16:56 +00:00
|
|
|
* the caller may want it to be exclusively locked.
|
|
|
|
*/
|
2009-03-11 14:13:47 +00:00
|
|
|
if (needs_exclusive_leaf(dp->v_mount, cnp->cn_flags) &&
|
|
|
|
VOP_ISLOCKED(dp) != LK_EXCLUSIVE) {
|
2008-01-10 01:10:58 +00:00
|
|
|
vn_lock(dp, LK_UPGRADE | LK_RETRY);
|
2019-12-08 21:30:04 +00:00
|
|
|
if (VN_IS_DOOMED(dp)) {
|
2008-12-18 11:58:12 +00:00
|
|
|
error = ENOENT;
|
|
|
|
goto bad2;
|
|
|
|
}
|
2007-09-21 10:16:56 +00:00
|
|
|
}
|
1994-05-24 10:09:53 +00:00
|
|
|
return (0);
|
|
|
|
|
|
|
|
bad2:
|
2012-01-01 18:45:59 +00:00
|
|
|
if (ni_dvp_unlocked != 2) {
|
|
|
|
if (dp != ndp->ni_dvp && !ni_dvp_unlocked)
|
|
|
|
vput(ndp->ni_dvp);
|
|
|
|
else
|
|
|
|
vrele(ndp->ni_dvp);
|
|
|
|
}
|
1994-05-24 10:09:53 +00:00
|
|
|
bad:
|
2005-04-09 11:53:16 +00:00
|
|
|
if (!dpunlocked)
|
2000-09-13 08:57:56 +00:00
|
|
|
vput(dp);
|
1994-05-24 10:09:53 +00:00
|
|
|
ndp->ni_vp = NULL;
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
1994-09-27 20:33:41 +00:00
|
|
|
/*
|
|
|
|
* relookup - lookup a path name component
|
2007-05-27 20:50:23 +00:00
|
|
|
* Used by lookup to re-acquire things.
|
1994-09-27 20:33:41 +00:00
|
|
|
*/
|
|
|
|
int
|
2006-08-05 21:40:59 +00:00
|
|
|
relookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
|
1994-09-27 20:33:41 +00:00
|
|
|
{
|
2016-04-15 16:10:11 +00:00
|
|
|
struct vnode *dp = NULL; /* the directory we are searching */
|
1994-09-27 20:33:41 +00:00
|
|
|
int wantparent; /* 1 => wantparent or lockparent flag */
|
|
|
|
int rdonly; /* lookup read-only flag bit */
|
|
|
|
int error = 0;
|
|
|
|
|
2005-03-28 09:24:50 +00:00
|
|
|
KASSERT(cnp->cn_flags & ISLASTCN,
|
|
|
|
("relookup: Not given last component."));
|
1994-09-27 20:33:41 +00:00
|
|
|
/*
|
|
|
|
* Setup: break out flag bits into variables.
|
|
|
|
*/
|
|
|
|
wantparent = cnp->cn_flags & (LOCKPARENT|WANTPARENT);
|
2005-04-13 10:57:13 +00:00
|
|
|
KASSERT(wantparent, ("relookup: parent not wanted."));
|
1994-09-27 20:33:41 +00:00
|
|
|
rdonly = cnp->cn_flags & RDONLY;
|
|
|
|
cnp->cn_flags &= ~ISSYMLINK;
|
|
|
|
dp = dvp;
|
2005-03-29 10:07:15 +00:00
|
|
|
cnp->cn_lkflags = LK_EXCLUSIVE;
|
2008-01-10 01:10:58 +00:00
|
|
|
vn_lock(dp, LK_EXCLUSIVE | LK_RETRY);
|
1994-09-27 20:33:41 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Search a new directory.
|
|
|
|
*
|
|
|
|
* The last component of the filename is left accessible via
|
|
|
|
* cnp->cn_nameptr for callers that need the name. Callers needing
|
|
|
|
* the name set the SAVENAME flag. When done, they assume
|
|
|
|
* responsibility for freeing the pathname buffer.
|
|
|
|
*/
|
|
|
|
#ifdef NAMEI_DIAGNOSTIC
|
|
|
|
printf("{%s}: ", cnp->cn_nameptr);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/*
|
2010-03-26 11:33:12 +00:00
|
|
|
* Check for "" which represents the root directory after slash
|
|
|
|
* removal.
|
1994-09-27 20:33:41 +00:00
|
|
|
*/
|
|
|
|
if (cnp->cn_nameptr[0] == '\0') {
|
2010-03-26 11:33:12 +00:00
|
|
|
/*
|
|
|
|
* Support only LOOKUP for "/" because lookup()
|
|
|
|
* can't succeed for CREATE, DELETE and RENAME.
|
|
|
|
*/
|
|
|
|
KASSERT(cnp->cn_nameiop == LOOKUP, ("nameiop must be LOOKUP"));
|
|
|
|
KASSERT(dp->v_type == VDIR, ("dp is not a directory"));
|
|
|
|
|
1994-09-27 20:33:41 +00:00
|
|
|
if (!(cnp->cn_flags & LOCKLEAF))
|
2020-01-03 22:29:58 +00:00
|
|
|
VOP_UNLOCK(dp);
|
1994-09-27 20:33:41 +00:00
|
|
|
*vpp = dp;
|
1999-12-15 23:02:35 +00:00
|
|
|
/* XXX This should probably move to the top of function. */
|
1994-09-27 20:33:41 +00:00
|
|
|
if (cnp->cn_flags & SAVESTART)
|
|
|
|
panic("lookup: SAVESTART");
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (cnp->cn_flags & ISDOTDOT)
|
|
|
|
panic ("relookup: lookup on dot-dot");
|
1994-05-24 10:09:53 +00:00
|
|
|
|
1994-09-27 20:33:41 +00:00
|
|
|
/*
|
|
|
|
* We now have a segment name to search for, and a directory to search.
|
|
|
|
*/
|
2004-12-03 12:15:39 +00:00
|
|
|
#ifdef NAMEI_DIAGNOSTIC
|
2016-08-10 16:12:31 +00:00
|
|
|
vn_printf(dp, "search in ");
|
2004-12-03 12:15:39 +00:00
|
|
|
#endif
|
1999-01-28 00:57:57 +00:00
|
|
|
if ((error = VOP_LOOKUP(dp, vpp, cnp)) != 0) {
|
1999-01-08 17:31:30 +00:00
|
|
|
KASSERT(*vpp == NULL, ("leaf should be empty"));
|
1994-09-27 20:33:41 +00:00
|
|
|
if (error != EJUSTRETURN)
|
|
|
|
goto bad;
|
|
|
|
/*
|
|
|
|
* If creating and at end of pathname, then can consider
|
|
|
|
* allowing file to be created.
|
|
|
|
*/
|
1995-10-22 09:32:48 +00:00
|
|
|
if (rdonly) {
|
1994-09-27 20:33:41 +00:00
|
|
|
error = EROFS;
|
|
|
|
goto bad;
|
|
|
|
}
|
|
|
|
/* ASSERT(dvp == ndp->ni_startdir) */
|
|
|
|
if (cnp->cn_flags & SAVESTART)
|
|
|
|
VREF(dvp);
|
2005-03-28 09:24:50 +00:00
|
|
|
if ((cnp->cn_flags & LOCKPARENT) == 0)
|
2020-01-03 22:29:58 +00:00
|
|
|
VOP_UNLOCK(dp);
|
1994-09-27 20:33:41 +00:00
|
|
|
/*
|
|
|
|
* We return with ni_vp NULL to indicate that the entry
|
|
|
|
* doesn't currently exist, leaving a pointer to the
|
2006-08-05 21:08:47 +00:00
|
|
|
* (possibly locked) directory vnode in ndp->ni_dvp.
|
1994-09-27 20:33:41 +00:00
|
|
|
*/
|
|
|
|
return (0);
|
|
|
|
}
|
2006-09-13 18:39:09 +00:00
|
|
|
|
1994-09-27 20:33:41 +00:00
|
|
|
dp = *vpp;
|
|
|
|
|
|
|
|
/*
|
2002-05-16 21:28:32 +00:00
|
|
|
* Disallow directory write attempts on read-only filesystems.
|
1994-09-27 20:33:41 +00:00
|
|
|
*/
|
1995-10-22 09:32:48 +00:00
|
|
|
if (rdonly &&
|
|
|
|
(cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
|
2005-04-13 10:57:13 +00:00
|
|
|
if (dvp == dp)
|
|
|
|
vrele(dvp);
|
|
|
|
else
|
|
|
|
vput(dvp);
|
1995-10-22 09:32:48 +00:00
|
|
|
error = EROFS;
|
2005-04-13 10:57:13 +00:00
|
|
|
goto bad;
|
1994-09-27 20:33:41 +00:00
|
|
|
}
|
2005-04-13 10:57:13 +00:00
|
|
|
/*
|
|
|
|
* Set the parent lock/ref state to the requested state.
|
|
|
|
*/
|
|
|
|
if ((cnp->cn_flags & LOCKPARENT) == 0 && dvp != dp) {
|
|
|
|
if (wantparent)
|
2020-01-03 22:29:58 +00:00
|
|
|
VOP_UNLOCK(dvp);
|
2005-04-13 10:57:13 +00:00
|
|
|
else
|
|
|
|
vput(dvp);
|
|
|
|
} else if (!wantparent)
|
|
|
|
vrele(dvp);
|
|
|
|
/*
|
|
|
|
* Check for symbolic link
|
|
|
|
*/
|
|
|
|
KASSERT(dp->v_type != VLNK || !(cnp->cn_flags & FOLLOW),
|
|
|
|
("relookup: symlink found.\n"));
|
|
|
|
|
1994-09-27 20:33:41 +00:00
|
|
|
/* ASSERT(dvp == ndp->ni_startdir) */
|
|
|
|
if (cnp->cn_flags & SAVESTART)
|
|
|
|
VREF(dvp);
|
1997-02-10 02:22:35 +00:00
|
|
|
|
1994-09-27 20:33:41 +00:00
|
|
|
if ((cnp->cn_flags & LOCKLEAF) == 0)
|
2020-01-03 22:29:58 +00:00
|
|
|
VOP_UNLOCK(dp);
|
1994-09-27 20:33:41 +00:00
|
|
|
return (0);
|
|
|
|
bad:
|
|
|
|
vput(dp);
|
|
|
|
*vpp = NULL;
|
|
|
|
return (error);
|
|
|
|
}
|
2005-02-07 18:44:55 +00:00
|
|
|
|
Change the cap_rights_t type from uint64_t to a structure that we can extend
in the future in a backward compatible (API and ABI) way.
The cap_rights_t represents capability rights. We used to use one bit to
represent one right, but we are running out of spare bits. Currently the new
structure provides place for 114 rights (so 50 more than the previous
cap_rights_t), but it is possible to grow the structure to hold at least 285
rights, although we can make it even larger if 285 rights won't be enough.
The structure definition looks like this:
struct cap_rights {
uint64_t cr_rights[CAP_RIGHTS_VERSION + 2];
};
The initial CAP_RIGHTS_VERSION is 0.
The top two bits in the first element of the cr_rights[] array contain total
number of elements in the array - 2. This means if those two bits are equal to
0, we have 2 array elements.
The top two bits in all remaining array elements should be 0.
The next five bits in all array elements contain array index. Only one bit is
used and bit position in this five-bits range defines array index. This means
there can be at most five array elements in the future.
To define new right the CAPRIGHT() macro must be used. The macro takes two
arguments - an array index and a bit to set, eg.
#define CAP_PDKILL CAPRIGHT(1, 0x0000000000000800ULL)
We still support aliases that combine few rights, but the rights have to belong
to the same array element, eg:
#define CAP_LOOKUP CAPRIGHT(0, 0x0000000000000400ULL)
#define CAP_FCHMOD CAPRIGHT(0, 0x0000000000002000ULL)
#define CAP_FCHMODAT (CAP_FCHMOD | CAP_LOOKUP)
There is new API to manage the new cap_rights_t structure:
cap_rights_t *cap_rights_init(cap_rights_t *rights, ...);
void cap_rights_set(cap_rights_t *rights, ...);
void cap_rights_clear(cap_rights_t *rights, ...);
bool cap_rights_is_set(const cap_rights_t *rights, ...);
bool cap_rights_is_valid(const cap_rights_t *rights);
void cap_rights_merge(cap_rights_t *dst, const cap_rights_t *src);
void cap_rights_remove(cap_rights_t *dst, const cap_rights_t *src);
bool cap_rights_contains(const cap_rights_t *big, const cap_rights_t *little);
Capability rights to the cap_rights_init(), cap_rights_set(),
cap_rights_clear() and cap_rights_is_set() functions are provided by
separating them with commas, eg:
cap_rights_t rights;
cap_rights_init(&rights, CAP_READ, CAP_WRITE, CAP_FSTAT);
There is no need to terminate the list of rights, as those functions are
actually macros that take care of the termination, eg:
#define cap_rights_set(rights, ...) \
__cap_rights_set((rights), __VA_ARGS__, 0ULL)
void __cap_rights_set(cap_rights_t *rights, ...);
Thanks to using one bit as an array index we can assert in those functions that
there are no two rights belonging to different array elements provided
together. For example this is illegal and will be detected, because CAP_LOOKUP
belongs to element 0 and CAP_PDKILL to element 1:
cap_rights_init(&rights, CAP_LOOKUP | CAP_PDKILL);
Providing several rights that belongs to the same array's element this way is
correct, but is not advised. It should only be used for aliases definition.
This commit also breaks compatibility with some existing Capsicum system calls,
but I see no other way to do that. This should be fine as Capsicum is still
experimental and this change is not going to 9.x.
Sponsored by: The FreeBSD Foundation
2013-09-05 00:09:56 +00:00
|
|
|
void
|
|
|
|
NDINIT_ALL(struct nameidata *ndp, u_long op, u_long flags, enum uio_seg segflg,
|
|
|
|
const char *namep, int dirfd, struct vnode *startdir, cap_rights_t *rightsp,
|
|
|
|
struct thread *td)
|
|
|
|
{
|
|
|
|
|
|
|
|
ndp->ni_cnd.cn_nameiop = op;
|
|
|
|
ndp->ni_cnd.cn_flags = flags;
|
|
|
|
ndp->ni_segflg = segflg;
|
|
|
|
ndp->ni_dirp = namep;
|
|
|
|
ndp->ni_dirfd = dirfd;
|
|
|
|
ndp->ni_startdir = startdir;
|
2019-02-08 04:18:17 +00:00
|
|
|
ndp->ni_resflags = 0;
|
2018-12-14 03:55:08 +00:00
|
|
|
filecaps_init(&ndp->ni_filecaps);
|
|
|
|
ndp->ni_cnd.cn_thread = td;
|
Change the cap_rights_t type from uint64_t to a structure that we can extend
in the future in a backward compatible (API and ABI) way.
The cap_rights_t represents capability rights. We used to use one bit to
represent one right, but we are running out of spare bits. Currently the new
structure provides place for 114 rights (so 50 more than the previous
cap_rights_t), but it is possible to grow the structure to hold at least 285
rights, although we can make it even larger if 285 rights won't be enough.
The structure definition looks like this:
struct cap_rights {
uint64_t cr_rights[CAP_RIGHTS_VERSION + 2];
};
The initial CAP_RIGHTS_VERSION is 0.
The top two bits in the first element of the cr_rights[] array contain total
number of elements in the array - 2. This means if those two bits are equal to
0, we have 2 array elements.
The top two bits in all remaining array elements should be 0.
The next five bits in all array elements contain array index. Only one bit is
used and bit position in this five-bits range defines array index. This means
there can be at most five array elements in the future.
To define new right the CAPRIGHT() macro must be used. The macro takes two
arguments - an array index and a bit to set, eg.
#define CAP_PDKILL CAPRIGHT(1, 0x0000000000000800ULL)
We still support aliases that combine few rights, but the rights have to belong
to the same array element, eg:
#define CAP_LOOKUP CAPRIGHT(0, 0x0000000000000400ULL)
#define CAP_FCHMOD CAPRIGHT(0, 0x0000000000002000ULL)
#define CAP_FCHMODAT (CAP_FCHMOD | CAP_LOOKUP)
There is new API to manage the new cap_rights_t structure:
cap_rights_t *cap_rights_init(cap_rights_t *rights, ...);
void cap_rights_set(cap_rights_t *rights, ...);
void cap_rights_clear(cap_rights_t *rights, ...);
bool cap_rights_is_set(const cap_rights_t *rights, ...);
bool cap_rights_is_valid(const cap_rights_t *rights);
void cap_rights_merge(cap_rights_t *dst, const cap_rights_t *src);
void cap_rights_remove(cap_rights_t *dst, const cap_rights_t *src);
bool cap_rights_contains(const cap_rights_t *big, const cap_rights_t *little);
Capability rights to the cap_rights_init(), cap_rights_set(),
cap_rights_clear() and cap_rights_is_set() functions are provided by
separating them with commas, eg:
cap_rights_t rights;
cap_rights_init(&rights, CAP_READ, CAP_WRITE, CAP_FSTAT);
There is no need to terminate the list of rights, as those functions are
actually macros that take care of the termination, eg:
#define cap_rights_set(rights, ...) \
__cap_rights_set((rights), __VA_ARGS__, 0ULL)
void __cap_rights_set(cap_rights_t *rights, ...);
Thanks to using one bit as an array index we can assert in those functions that
there are no two rights belonging to different array elements provided
together. For example this is illegal and will be detected, because CAP_LOOKUP
belongs to element 0 and CAP_PDKILL to element 1:
cap_rights_init(&rights, CAP_LOOKUP | CAP_PDKILL);
Providing several rights that belongs to the same array's element this way is
correct, but is not advised. It should only be used for aliases definition.
This commit also breaks compatibility with some existing Capsicum system calls,
but I see no other way to do that. This should be fine as Capsicum is still
experimental and this change is not going to 9.x.
Sponsored by: The FreeBSD Foundation
2013-09-05 00:09:56 +00:00
|
|
|
if (rightsp != NULL)
|
|
|
|
ndp->ni_rightsneeded = *rightsp;
|
|
|
|
else
|
2020-02-15 01:28:42 +00:00
|
|
|
cap_rights_init_zero(&ndp->ni_rightsneeded);
|
Change the cap_rights_t type from uint64_t to a structure that we can extend
in the future in a backward compatible (API and ABI) way.
The cap_rights_t represents capability rights. We used to use one bit to
represent one right, but we are running out of spare bits. Currently the new
structure provides place for 114 rights (so 50 more than the previous
cap_rights_t), but it is possible to grow the structure to hold at least 285
rights, although we can make it even larger if 285 rights won't be enough.
The structure definition looks like this:
struct cap_rights {
uint64_t cr_rights[CAP_RIGHTS_VERSION + 2];
};
The initial CAP_RIGHTS_VERSION is 0.
The top two bits in the first element of the cr_rights[] array contain total
number of elements in the array - 2. This means if those two bits are equal to
0, we have 2 array elements.
The top two bits in all remaining array elements should be 0.
The next five bits in all array elements contain array index. Only one bit is
used and bit position in this five-bits range defines array index. This means
there can be at most five array elements in the future.
To define new right the CAPRIGHT() macro must be used. The macro takes two
arguments - an array index and a bit to set, eg.
#define CAP_PDKILL CAPRIGHT(1, 0x0000000000000800ULL)
We still support aliases that combine few rights, but the rights have to belong
to the same array element, eg:
#define CAP_LOOKUP CAPRIGHT(0, 0x0000000000000400ULL)
#define CAP_FCHMOD CAPRIGHT(0, 0x0000000000002000ULL)
#define CAP_FCHMODAT (CAP_FCHMOD | CAP_LOOKUP)
There is new API to manage the new cap_rights_t structure:
cap_rights_t *cap_rights_init(cap_rights_t *rights, ...);
void cap_rights_set(cap_rights_t *rights, ...);
void cap_rights_clear(cap_rights_t *rights, ...);
bool cap_rights_is_set(const cap_rights_t *rights, ...);
bool cap_rights_is_valid(const cap_rights_t *rights);
void cap_rights_merge(cap_rights_t *dst, const cap_rights_t *src);
void cap_rights_remove(cap_rights_t *dst, const cap_rights_t *src);
bool cap_rights_contains(const cap_rights_t *big, const cap_rights_t *little);
Capability rights to the cap_rights_init(), cap_rights_set(),
cap_rights_clear() and cap_rights_is_set() functions are provided by
separating them with commas, eg:
cap_rights_t rights;
cap_rights_init(&rights, CAP_READ, CAP_WRITE, CAP_FSTAT);
There is no need to terminate the list of rights, as those functions are
actually macros that take care of the termination, eg:
#define cap_rights_set(rights, ...) \
__cap_rights_set((rights), __VA_ARGS__, 0ULL)
void __cap_rights_set(cap_rights_t *rights, ...);
Thanks to using one bit as an array index we can assert in those functions that
there are no two rights belonging to different array elements provided
together. For example this is illegal and will be detected, because CAP_LOOKUP
belongs to element 0 and CAP_PDKILL to element 1:
cap_rights_init(&rights, CAP_LOOKUP | CAP_PDKILL);
Providing several rights that belongs to the same array's element this way is
correct, but is not advised. It should only be used for aliases definition.
This commit also breaks compatibility with some existing Capsicum system calls,
but I see no other way to do that. This should be fine as Capsicum is still
experimental and this change is not going to 9.x.
Sponsored by: The FreeBSD Foundation
2013-09-05 00:09:56 +00:00
|
|
|
}
|
|
|
|
|
2005-04-05 08:58:49 +00:00
|
|
|
/*
|
|
|
|
* Free data allocated by namei(); see namei(9) for details.
|
|
|
|
*/
|
|
|
|
void
|
2006-08-05 21:40:59 +00:00
|
|
|
NDFREE(struct nameidata *ndp, const u_int flags)
|
2005-04-05 08:58:49 +00:00
|
|
|
{
|
2005-04-09 11:53:16 +00:00
|
|
|
int unlock_dvp;
|
|
|
|
int unlock_vp;
|
|
|
|
|
|
|
|
unlock_dvp = 0;
|
|
|
|
unlock_vp = 0;
|
2005-04-05 08:58:49 +00:00
|
|
|
|
|
|
|
if (!(flags & NDF_NO_FREE_PNBUF) &&
|
|
|
|
(ndp->ni_cnd.cn_flags & HASBUF)) {
|
|
|
|
uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf);
|
|
|
|
ndp->ni_cnd.cn_flags &= ~HASBUF;
|
|
|
|
}
|
2005-04-09 11:53:16 +00:00
|
|
|
if (!(flags & NDF_NO_VP_UNLOCK) &&
|
|
|
|
(ndp->ni_cnd.cn_flags & LOCKLEAF) && ndp->ni_vp)
|
|
|
|
unlock_vp = 1;
|
2019-05-21 15:12:13 +00:00
|
|
|
if (!(flags & NDF_NO_DVP_UNLOCK) &&
|
|
|
|
(ndp->ni_cnd.cn_flags & LOCKPARENT) &&
|
|
|
|
ndp->ni_dvp != ndp->ni_vp)
|
|
|
|
unlock_dvp = 1;
|
2005-04-09 11:53:16 +00:00
|
|
|
if (!(flags & NDF_NO_VP_RELE) && ndp->ni_vp) {
|
|
|
|
if (unlock_vp) {
|
|
|
|
vput(ndp->ni_vp);
|
|
|
|
unlock_vp = 0;
|
|
|
|
} else
|
|
|
|
vrele(ndp->ni_vp);
|
|
|
|
ndp->ni_vp = NULL;
|
|
|
|
}
|
|
|
|
if (unlock_vp)
|
2020-01-03 22:29:58 +00:00
|
|
|
VOP_UNLOCK(ndp->ni_vp);
|
2005-04-05 08:58:49 +00:00
|
|
|
if (!(flags & NDF_NO_DVP_RELE) &&
|
|
|
|
(ndp->ni_cnd.cn_flags & (LOCKPARENT|WANTPARENT))) {
|
2005-04-09 11:53:16 +00:00
|
|
|
if (unlock_dvp) {
|
|
|
|
vput(ndp->ni_dvp);
|
|
|
|
unlock_dvp = 0;
|
|
|
|
} else
|
|
|
|
vrele(ndp->ni_dvp);
|
2005-04-05 08:58:49 +00:00
|
|
|
ndp->ni_dvp = NULL;
|
|
|
|
}
|
2005-04-09 11:53:16 +00:00
|
|
|
if (unlock_dvp)
|
2020-01-03 22:29:58 +00:00
|
|
|
VOP_UNLOCK(ndp->ni_dvp);
|
2005-04-05 08:58:49 +00:00
|
|
|
if (!(flags & NDF_NO_STARTDIR_RELE) &&
|
|
|
|
(ndp->ni_cnd.cn_flags & SAVESTART)) {
|
|
|
|
vrele(ndp->ni_startdir);
|
|
|
|
ndp->ni_startdir = NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2005-02-07 18:44:55 +00:00
|
|
|
/*
|
|
|
|
* Determine if there is a suitable alternate filename under the specified
|
|
|
|
* prefix for the specified path. If the create flag is set, then the
|
|
|
|
* alternate prefix will be used so long as the parent directory exists.
|
2016-04-29 22:15:33 +00:00
|
|
|
* This is used by the various compatibility ABIs so that Linux binaries prefer
|
2005-02-07 18:44:55 +00:00
|
|
|
* files under /compat/linux for example. The chosen path (whether under
|
|
|
|
* the prefix or under /) is returned in a kernel malloc'd buffer pointed
|
|
|
|
* to by pathbuf. The caller is responsible for free'ing the buffer from
|
|
|
|
* the M_TEMP bucket if one is returned.
|
|
|
|
*/
|
|
|
|
int
|
Implement the linux syscalls
openat, mkdirat, mknodat, fchownat, futimesat, fstatat, unlinkat,
renameat, linkat, symlinkat, readlinkat, fchmodat, faccessat.
Submitted by: rdivacky
Sponsored by: Google Summer of Code 2007
Tested by: pho
2008-04-08 09:45:49 +00:00
|
|
|
kern_alternate_path(struct thread *td, const char *prefix, const char *path,
|
|
|
|
enum uio_seg pathseg, char **pathbuf, int create, int dirfd)
|
2005-02-07 18:44:55 +00:00
|
|
|
{
|
|
|
|
struct nameidata nd, ndroot;
|
|
|
|
char *ptr, *buf, *cp;
|
|
|
|
size_t len, sz;
|
|
|
|
int error;
|
|
|
|
|
|
|
|
buf = (char *) malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
|
|
|
|
*pathbuf = buf;
|
|
|
|
|
|
|
|
/* Copy the prefix into the new pathname as a starting point. */
|
|
|
|
len = strlcpy(buf, prefix, MAXPATHLEN);
|
|
|
|
if (len >= MAXPATHLEN) {
|
|
|
|
*pathbuf = NULL;
|
|
|
|
free(buf, M_TEMP);
|
|
|
|
return (EINVAL);
|
|
|
|
}
|
|
|
|
sz = MAXPATHLEN - len;
|
|
|
|
ptr = buf + len;
|
|
|
|
|
|
|
|
/* Append the filename to the prefix. */
|
|
|
|
if (pathseg == UIO_SYSSPACE)
|
|
|
|
error = copystr(path, ptr, sz, &len);
|
|
|
|
else
|
|
|
|
error = copyinstr(path, ptr, sz, &len);
|
|
|
|
|
|
|
|
if (error) {
|
|
|
|
*pathbuf = NULL;
|
|
|
|
free(buf, M_TEMP);
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Only use a prefix with absolute pathnames. */
|
|
|
|
if (*ptr != '/') {
|
|
|
|
error = EINVAL;
|
|
|
|
goto keeporig;
|
|
|
|
}
|
|
|
|
|
Implement the linux syscalls
openat, mkdirat, mknodat, fchownat, futimesat, fstatat, unlinkat,
renameat, linkat, symlinkat, readlinkat, fchmodat, faccessat.
Submitted by: rdivacky
Sponsored by: Google Summer of Code 2007
Tested by: pho
2008-04-08 09:45:49 +00:00
|
|
|
if (dirfd != AT_FDCWD) {
|
|
|
|
/*
|
|
|
|
* We want the original because the "prefix" is
|
|
|
|
* included in the already opened dirfd.
|
|
|
|
*/
|
|
|
|
bcopy(ptr, buf, len);
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
2005-02-07 18:44:55 +00:00
|
|
|
/*
|
|
|
|
* We know that there is a / somewhere in this pathname.
|
|
|
|
* Search backwards for it, to find the file's parent dir
|
|
|
|
* to see if it exists in the alternate tree. If it does,
|
|
|
|
* and we want to create a file (cflag is set). We don't
|
|
|
|
* need to worry about the root comparison in this case.
|
|
|
|
*/
|
|
|
|
|
|
|
|
if (create) {
|
|
|
|
for (cp = &ptr[len] - 1; *cp != '/'; cp--);
|
|
|
|
*cp = '\0';
|
|
|
|
|
2017-10-15 18:53:21 +00:00
|
|
|
NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, buf, td);
|
2005-02-07 18:44:55 +00:00
|
|
|
error = namei(&nd);
|
|
|
|
*cp = '/';
|
|
|
|
if (error != 0)
|
2005-09-21 19:49:42 +00:00
|
|
|
goto keeporig;
|
2005-02-07 18:44:55 +00:00
|
|
|
} else {
|
2017-10-15 18:53:21 +00:00
|
|
|
NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, buf, td);
|
2005-02-07 18:44:55 +00:00
|
|
|
|
|
|
|
error = namei(&nd);
|
|
|
|
if (error != 0)
|
2005-09-21 19:49:42 +00:00
|
|
|
goto keeporig;
|
2005-02-07 18:44:55 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* We now compare the vnode of the prefix to the one
|
|
|
|
* vnode asked. If they resolve to be the same, then we
|
|
|
|
* ignore the match so that the real root gets used.
|
|
|
|
* This avoids the problem of traversing "../.." to find the
|
|
|
|
* root directory and never finding it, because "/" resolves
|
|
|
|
* to the emulation root directory. This is expensive :-(
|
|
|
|
*/
|
2012-10-22 17:50:54 +00:00
|
|
|
NDINIT(&ndroot, LOOKUP, FOLLOW, UIO_SYSSPACE, prefix,
|
2005-09-21 19:49:42 +00:00
|
|
|
td);
|
2005-02-07 18:44:55 +00:00
|
|
|
|
|
|
|
/* We shouldn't ever get an error from this namei(). */
|
|
|
|
error = namei(&ndroot);
|
|
|
|
if (error == 0) {
|
|
|
|
if (nd.ni_vp == ndroot.ni_vp)
|
|
|
|
error = ENOENT;
|
|
|
|
|
|
|
|
NDFREE(&ndroot, NDF_ONLY_PNBUF);
|
|
|
|
vrele(ndroot.ni_vp);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
NDFREE(&nd, NDF_ONLY_PNBUF);
|
|
|
|
vrele(nd.ni_vp);
|
|
|
|
|
|
|
|
keeporig:
|
|
|
|
/* If there was an error, use the original path name. */
|
|
|
|
if (error)
|
|
|
|
bcopy(ptr, buf, len);
|
|
|
|
return (error);
|
|
|
|
}
|