MFV illumos r266986:

2915 DTrace in a zone should see "cpu", "curpsinfo", et al
2916 DTrace in a zone should be able to access fds[]
2917 DTrace in a zone should have limited provider access

MFC after:	2 weeks
This commit is contained in:
rpaulo 2014-06-26 19:38:16 +00:00
commit 3191dbe25d
11 changed files with 845 additions and 46 deletions

View File

@ -97,6 +97,7 @@ INTFUNC(ntohll(0x1234567890abcdefL))
STRFUNC(inet_ntoa((ipaddr_t *)alloca(sizeof (ipaddr_t))))
STRFUNC(inet_ntoa6((in6_addr_t *)alloca(sizeof (in6_addr_t))))
STRFUNC(inet_ntop(AF_INET, (void *)alloca(sizeof (ipaddr_t))))
INTFUNC(getf(0))
BEGIN
/subr == DIF_SUBR_MAX + 1/

View File

@ -0,0 +1,91 @@
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2012, Joyent, Inc. All rights reserved.
#
tmpin=/tmp/tst.fds.$$.d
tmpout1=/tmp/tst.fds.$$.out1
tmpout2=/tmp/tst.fds.$$.out2
cat > $tmpin <<EOF
#define DUMPFIELD(fd, fmt, field) \
errmsg = "could not dump field"; \
printf("%d: field =fmt\n", fd, fds[fd].field);
/*
* Note that we are explicitly not looking at fi_mount -- it (by design) does
* not work if not running with kernel permissions.
*/
#define DUMP(fd) \
DUMPFIELD(fd, %s, fi_name); \
DUMPFIELD(fd, %s, fi_dirname); \
DUMPFIELD(fd, %s, fi_pathname); \
DUMPFIELD(fd, %d, fi_offset); \
DUMPFIELD(fd, %s, fi_fs); \
DUMPFIELD(fd, %o, fi_oflags);
BEGIN
{
DUMP(0);
DUMP(1);
DUMP(2);
DUMP(3);
DUMP(4);
exit(0);
}
ERROR
{
printf("error: %s\n", errmsg);
exit(1);
}
EOF
#
# First, with all privs
#
/usr/sbin/dtrace -q -Cs /dev/stdin < $tmpin > $tmpout2
mv $tmpout2 $tmpout1
#
# And now with only dtrace_proc and dtrace_user -- the output should be
# identical.
#
ppriv -s A=basic,dtrace_proc,dtrace_user $$
/usr/sbin/dtrace -q -Cs /dev/stdin < $tmpin > $tmpout2
echo ">>> $tmpout1"
cat $tmpout1
echo ">>> $tmpout2"
cat $tmpout2
rval=0
if ! cmp $tmpout1 $tmpout2 ; then
rval=1
fi
rm $tmpout1 $tmpout2 $tmpin
exit $rval

View File

@ -0,0 +1,98 @@
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2012, Joyent, Inc. All rights reserved.
#
ppriv -s A=basic,dtrace_proc,dtrace_user $$
/usr/sbin/dtrace -q -Cs /dev/stdin <<EOF
#define CANREAD(field) \
BEGIN { this->fp = getf(0); errmsg = "can't read field"; \
printf("field: "); trace(this->fp->field); printf("\n"); }
#define CANTREAD(field) \
BEGIN { errmsg = ""; this->fp = getf(0); trace(this->fp->field); \
printf("\nable to successfully read field!"); exit(1); }
CANREAD(f_flag)
CANREAD(f_flag2)
CANREAD(f_vnode)
CANREAD(f_offset)
CANREAD(f_cred)
CANREAD(f_audit_data)
CANREAD(f_count)
/*
* We can potentially read parts of our cred, but we can't dereference
* through cr_zone.
*/
CANTREAD(f_cred->cr_zone->zone_id)
CANREAD(f_vnode->v_path)
CANREAD(f_vnode->v_op)
CANREAD(f_vnode->v_op->vnop_name)
CANTREAD(f_vnode->v_flag)
CANTREAD(f_vnode->v_count)
CANTREAD(f_vnode->v_pages)
CANTREAD(f_vnode->v_type)
CANTREAD(f_vnode->v_vfsmountedhere)
CANTREAD(f_vnode->v_op->vop_open)
BEGIN
{
errmsg = "";
this->fp = getf(0);
this->fp2 = getf(1);
trace(this->fp->f_vnode);
printf("\nable to successfully read this->fp!");
exit(1);
}
BEGIN
{
errmsg = "";
this->fp = getf(0);
}
BEGIN
{
trace(this->fp->f_vnode);
printf("\nable to successfully read this->fp from prior clause!");
}
BEGIN
{
exit(0);
}
ERROR
/errmsg != ""/
{
printf("fatal error: %s", errmsg);
exit(1);
}
EOF

View File

@ -0,0 +1,138 @@
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2012, Joyent, Inc. All rights reserved.
#
ppriv -s A=basic,dtrace_proc,dtrace_user $$
#
# When we have dtrace_proc (but lack dtrace_kernel), we expect to be able to
# read certain curpsinfo/curlwpsinfo/curcpu fields even though they require
# reading in-kernel state. However, there are other fields in these translated
# structures that we know we shouldn't be able to read, as they require reading
# in-kernel state that we cannot read with only dtrace_proc. Finally, there
# are a few fields that we may or may not be able to read depending on the
# specifics of context. This test therefore asserts that we can read what we
# think we should be able to, that we can't read what we think we shouldn't be
# able to, and (for purposes of completeness) that we are indifferent about
# what we cannot assert one way or the other.
#
/usr/sbin/dtrace -q -Cs /dev/stdin <<EOF
#define CANREAD(what, field) \
BEGIN { errmsg = "can't read field from what"; printf("field: "); \
trace(what->field); printf("\n"); }
#define CANTREAD(what, field) \
BEGIN { errmsg = ""; trace(what->field); \
printf("\nable to successfully read field from what!"); exit(1); }
#define MIGHTREAD(what, field) \
BEGIN { errmsg = ""; printf("field: "); trace(what->field); printf("\n"); }
#define CANREADVAR(vname) \
BEGIN { errmsg = "can't read vname"; printf("vname: "); \
trace(vname); printf("\n"); }
#define CANTREADVAR(vname) \
BEGIN { errmsg = ""; trace(vname); \
printf("\nable to successfully read vname!"); exit(1); }
#define MIGHTREADVAR(vname) \
BEGIN { errmsg = ""; printf("vname: "); trace(vname); printf("\n"); }
CANREAD(curpsinfo, pr_pid)
CANREAD(curpsinfo, pr_nlwp)
CANREAD(curpsinfo, pr_ppid)
CANREAD(curpsinfo, pr_uid)
CANREAD(curpsinfo, pr_euid)
CANREAD(curpsinfo, pr_gid)
CANREAD(curpsinfo, pr_egid)
CANREAD(curpsinfo, pr_addr)
CANREAD(curpsinfo, pr_start)
CANREAD(curpsinfo, pr_fname)
CANREAD(curpsinfo, pr_psargs)
CANREAD(curpsinfo, pr_argc)
CANREAD(curpsinfo, pr_argv)
CANREAD(curpsinfo, pr_envp)
CANREAD(curpsinfo, pr_dmodel)
/*
* If our p_pgidp points to the same pid structure as our p_pidp, we will
* be able to read pr_pgid -- but we won't if not.
*/
MIGHTREAD(curpsinfo, pr_pgid)
CANTREAD(curpsinfo, pr_sid)
CANTREAD(curpsinfo, pr_ttydev)
CANTREAD(curpsinfo, pr_projid)
CANTREAD(curpsinfo, pr_zoneid)
CANTREAD(curpsinfo, pr_contract)
CANREAD(curlwpsinfo, pr_flag)
CANREAD(curlwpsinfo, pr_lwpid)
CANREAD(curlwpsinfo, pr_addr)
CANREAD(curlwpsinfo, pr_wchan)
CANREAD(curlwpsinfo, pr_stype)
CANREAD(curlwpsinfo, pr_state)
CANREAD(curlwpsinfo, pr_sname)
CANREAD(curlwpsinfo, pr_syscall)
CANREAD(curlwpsinfo, pr_pri)
CANREAD(curlwpsinfo, pr_onpro)
CANREAD(curlwpsinfo, pr_bindpro)
CANREAD(curlwpsinfo, pr_bindpset)
CANTREAD(curlwpsinfo, pr_clname)
CANTREAD(curlwpsinfo, pr_lgrp)
CANREAD(curcpu, cpu_id)
CANTREAD(curcpu, cpu_pset)
CANTREAD(curcpu, cpu_chip)
CANTREAD(curcpu, cpu_lgrp)
CANTREAD(curcpu, cpu_info)
/*
* We cannot assert one thing or another about the variable "root": for those
* with only dtrace_proc, it will be readable in the global but not readable in
* the non-global.
*/
MIGHTREADVAR(root)
CANREADVAR(cpu)
CANTREADVAR(pset)
CANTREADVAR(cwd)
CANTREADVAR(chip)
CANTREADVAR(lgrp)
BEGIN
{
exit(0);
}
ERROR
/errmsg != ""/
{
printf("fatal error: %s", errmsg);
exit(1);
}

View File

@ -0,0 +1,126 @@
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2012, Joyent, Inc. All rights reserved.
#
#
# First, make sure that we can successfully enable the io provider
#
if ! dtrace -P io -n BEGIN'{exit(0)}' > /dev/null 2>&1 ; then
echo failed to enable io provider with full privs
exit 1
fi
ppriv -s A=basic,dtrace_proc,dtrace_user $$
#
# Now make sure that we cannot enable the io provider with reduced privs
#
if ! dtrace -x errtags -P io -n BEGIN'{exit(1)}' 2>&1 | \
grep D_PDESC_ZERO > /dev/null 2>&1 ; then
echo successfully enabled the io provider with reduced privs
exit 1
fi
#
# Keeping our reduced privs, we want to assure that we can see every provider
# that we think we should be able to see -- and that we can see curpsinfo
# state but can't otherwise see arguments.
#
/usr/sbin/dtrace -wq -Cs /dev/stdin <<EOF
int seen[string];
int err;
#define CANENABLE(provider) \
provider::: \
/err == 0 && progenyof(\$pid) && !seen["provider"]/ \
{ \
trace(arg0); \
printf("\nsuccessful trace of arg0 in %s:%s:%s:%s\n", \
probeprov, probemod, probefunc, probename); \
exit(++err); \
} \
\
provider::: \
/progenyof(\$pid)/ \
{ \
seen["provider"]++; \
} \
\
provider::: \
/progenyof(\$pid)/ \
{ \
errstr = "provider"; \
this->ignore = stringof(curpsinfo->pr_psargs); \
errstr = ""; \
} \
\
END \
/err == 0 && !seen["provider"]/ \
{ \
printf("no probes from provider\n"); \
exit(++err); \
} \
\
END \
/err == 0/ \
{ \
printf("saw %d probes from provider\n", seen["provider"]); \
}
CANENABLE(proc)
CANENABLE(sched)
CANENABLE(vminfo)
CANENABLE(sysinfo)
BEGIN
{
/*
* We'll kick off a system of a do-nothing command -- which should be
* enough to kick proc, sched, vminfo and sysinfo probes.
*/
system("echo > /dev/null");
}
ERROR
/err == 0 && errstr != ""/
{
printf("fatal error: couldn't read curpsinfo->pr_psargs in ");
printf("%s-provided probe\n", errstr);
exit(++err);
}
proc:::exit
/progenyof(\$pid)/
{
exit(0);
}
tick-10ms
/i++ > 500/
{
printf("exit probe did not seem to fire\n");
exit(++err);
}
EOF

View File

@ -21,7 +21,7 @@
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, Joyent, Inc. All rights reserved.
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
@ -122,8 +122,9 @@
#define DT_VERS_1_8_1 DT_VERSION_NUMBER(1, 8, 1)
#define DT_VERS_1_9 DT_VERSION_NUMBER(1, 9, 0)
#define DT_VERS_1_9_1 DT_VERSION_NUMBER(1, 9, 1)
#define DT_VERS_LATEST DT_VERS_1_9_1
#define DT_VERS_STRING "Sun D 1.9.1"
#define DT_VERS_1_10 DT_VERSION_NUMBER(1, 10, 0)
#define DT_VERS_LATEST DT_VERS_1_10
#define DT_VERS_STRING "Sun D 1.10"
const dt_version_t _dtrace_versions[] = {
DT_VERS_1_0, /* D API 1.0.0 (PSARC 2001/466) Solaris 10 FCS */
@ -145,6 +146,7 @@ const dt_version_t _dtrace_versions[] = {
DT_VERS_1_8_1, /* D API 1.8.1 */
DT_VERS_1_9, /* D API 1.9 */
DT_VERS_1_9_1, /* D API 1.9.1 */
DT_VERS_1_10, /* D API 1.10 */
0
};
@ -275,6 +277,8 @@ static const dt_ident_t _dtrace_globals[] = {
&dt_idops_func, "uint64_t(uint64_t)" },
{ "htons", DT_IDENT_FUNC, 0, DIF_SUBR_HTONS, DT_ATTR_EVOLCMN, DT_VERS_1_3,
&dt_idops_func, "uint16_t(uint16_t)" },
{ "getf", DT_IDENT_FUNC, 0, DIF_SUBR_GETF, DT_ATTR_STABCMN, DT_VERS_1_10,
&dt_idops_func, "file_t *(int)" },
{ "gid", DT_IDENT_SCALAR, 0, DIF_VAR_GID, DT_ATTR_STABCMN, DT_VERS_1_0,
&dt_idops_type, "gid_t" },
{ "id", DT_IDENT_SCALAR, 0, DIF_VAR_ID, DT_ATTR_STABCMN, DT_VERS_1_0,

View File

@ -219,6 +219,7 @@ static dtrace_provider_t *dtrace_provider; /* provider list */
static dtrace_meta_t *dtrace_meta_pid; /* user-land meta provider */
static int dtrace_opens; /* number of opens */
static int dtrace_helpers; /* number of helpers */
static int dtrace_getf; /* number of unpriv getf()s */
#if defined(sun)
static void *dtrace_softstate; /* softstate pointer */
#endif
@ -468,8 +469,8 @@ static kmutex_t dtrace_errlock;
* disallow all negative sizes. Ranges of size 0 are allowed.
*/
#define DTRACE_INRANGE(testaddr, testsz, baseaddr, basesz) \
((testaddr) - (baseaddr) < (basesz) && \
(testaddr) + (testsz) - (baseaddr) <= (basesz) && \
((testaddr) - (uintptr_t)(baseaddr) < (basesz) && \
(testaddr) + (testsz) - (uintptr_t)(baseaddr) <= (basesz) && \
(testaddr) + (testsz) >= (testaddr))
/*
@ -578,6 +579,8 @@ void dtrace_dynvar_clean(dtrace_dstate_t *);
dtrace_dynvar_t *dtrace_dynvar(dtrace_dstate_t *, uint_t, dtrace_key_t *,
size_t, dtrace_dynvar_op_t, dtrace_mstate_t *, dtrace_vstate_t *);
uintptr_t dtrace_dif_varstr(uintptr_t, dtrace_state_t *, dtrace_mstate_t *);
static int dtrace_priv_proc(dtrace_state_t *);
static void dtrace_getf_barrier(void);
/*
* DTrace Probe Context Functions
@ -722,7 +725,7 @@ dtrace_canstore(uint64_t addr, size_t sz, dtrace_mstate_t *mstate,
* up both thread-local variables and any global dynamically-allocated
* variables.
*/
if (DTRACE_INRANGE(addr, sz, (uintptr_t)vstate->dtvs_dynvars.dtds_base,
if (DTRACE_INRANGE(addr, sz, vstate->dtvs_dynvars.dtds_base,
vstate->dtvs_dynvars.dtds_size)) {
dtrace_dstate_t *dstate = &vstate->dtvs_dynvars;
uintptr_t base = (uintptr_t)dstate->dtds_base +
@ -789,6 +792,7 @@ dtrace_canload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate,
dtrace_vstate_t *vstate)
{
volatile uintptr_t *illval = &cpu_core[curcpu].cpuc_dtrace_illval;
file_t *fp;
/*
* If we hold the privilege to read from kernel memory, then
@ -806,10 +810,104 @@ dtrace_canload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate,
/*
* We're allowed to read from our own string table.
*/
if (DTRACE_INRANGE(addr, sz, (uintptr_t)mstate->dtms_difo->dtdo_strtab,
if (DTRACE_INRANGE(addr, sz, mstate->dtms_difo->dtdo_strtab,
mstate->dtms_difo->dtdo_strlen))
return (1);
if (vstate->dtvs_state != NULL &&
dtrace_priv_proc(vstate->dtvs_state)) {
proc_t *p;
/*
* When we have privileges to the current process, there are
* several context-related kernel structures that are safe to
* read, even absent the privilege to read from kernel memory.
* These reads are safe because these structures contain only
* state that (1) we're permitted to read, (2) is harmless or
* (3) contains pointers to additional kernel state that we're
* not permitted to read (and as such, do not present an
* opportunity for privilege escalation). Finally (and
* critically), because of the nature of their relation with
* the current thread context, the memory associated with these
* structures cannot change over the duration of probe context,
* and it is therefore impossible for this memory to be
* deallocated and reallocated as something else while it's
* being operated upon.
*/
if (DTRACE_INRANGE(addr, sz, curthread, sizeof (kthread_t)))
return (1);
if ((p = curthread->t_procp) != NULL && DTRACE_INRANGE(addr,
sz, curthread->t_procp, sizeof (proc_t))) {
return (1);
}
if (curthread->t_cred != NULL && DTRACE_INRANGE(addr, sz,
curthread->t_cred, sizeof (cred_t))) {
return (1);
}
#if defined(sun)
if (p != NULL && p->p_pidp != NULL && DTRACE_INRANGE(addr, sz,
&(p->p_pidp->pid_id), sizeof (pid_t))) {
return (1);
}
if (curthread->t_cpu != NULL && DTRACE_INRANGE(addr, sz,
curthread->t_cpu, offsetof(cpu_t, cpu_pause_thread))) {
return (1);
}
#endif
}
if ((fp = mstate->dtms_getf) != NULL) {
uintptr_t psz = sizeof (void *);
vnode_t *vp;
vnodeops_t *op;
/*
* When getf() returns a file_t, the enabling is implicitly
* granted the (transient) right to read the returned file_t
* as well as the v_path and v_op->vnop_name of the underlying
* vnode. These accesses are allowed after a successful
* getf() because the members that they refer to cannot change
* once set -- and the barrier logic in the kernel's closef()
* path assures that the file_t and its referenced vode_t
* cannot themselves be stale (that is, it impossible for
* either dtms_getf itself or its f_vnode member to reference
* freed memory).
*/
if (DTRACE_INRANGE(addr, sz, fp, sizeof (file_t)))
return (1);
if ((vp = fp->f_vnode) != NULL) {
#if defined(sun)
if (DTRACE_INRANGE(addr, sz, &vp->v_path, psz))
return (1);
if (vp->v_path != NULL && DTRACE_INRANGE(addr, sz,
vp->v_path, strlen(vp->v_path) + 1)) {
return (1);
}
#endif
if (DTRACE_INRANGE(addr, sz, &vp->v_op, psz))
return (1);
#if defined(sun)
if ((op = vp->v_op) != NULL &&
DTRACE_INRANGE(addr, sz, &op->vnop_name, psz)) {
return (1);
}
if (op != NULL && op->vnop_name != NULL &&
DTRACE_INRANGE(addr, sz, op->vnop_name,
strlen(op->vnop_name) + 1)) {
return (1);
}
#endif
}
}
DTRACE_CPUFLAG_SET(CPU_DTRACE_KPRIV);
*illval = addr;
return (0);
@ -1189,8 +1287,7 @@ dtrace_priv_proc_common_zone(dtrace_state_t *state)
*/
ASSERT(s_cr != NULL);
if ((cr = CRED()) != NULL &&
s_cr->cr_zone == cr->cr_zone)
if ((cr = CRED()) != NULL && s_cr->cr_zone == cr->cr_zone)
return (1);
return (0);
@ -1289,6 +1386,115 @@ dtrace_priv_kernel_destructive(dtrace_state_t *state)
return (0);
}
/*
* Determine if the dte_cond of the specified ECB allows for processing of
* the current probe to continue. Note that this routine may allow continued
* processing, but with access(es) stripped from the mstate's dtms_access
* field.
*/
static int
dtrace_priv_probe(dtrace_state_t *state, dtrace_mstate_t *mstate,
dtrace_ecb_t *ecb)
{
dtrace_probe_t *probe = ecb->dte_probe;
dtrace_provider_t *prov = probe->dtpr_provider;
dtrace_pops_t *pops = &prov->dtpv_pops;
int mode = DTRACE_MODE_NOPRIV_DROP;
ASSERT(ecb->dte_cond);
#if defined(sun)
if (pops->dtps_mode != NULL) {
mode = pops->dtps_mode(prov->dtpv_arg,
probe->dtpr_id, probe->dtpr_arg);
ASSERT((mode & DTRACE_MODE_USER) ||
(mode & DTRACE_MODE_KERNEL));
ASSERT((mode & DTRACE_MODE_NOPRIV_RESTRICT) ||
(mode & DTRACE_MODE_NOPRIV_DROP));
}
/*
* If the dte_cond bits indicate that this consumer is only allowed to
* see user-mode firings of this probe, call the provider's dtps_mode()
* entry point to check that the probe was fired while in a user
* context. If that's not the case, use the policy specified by the
* provider to determine if we drop the probe or merely restrict
* operation.
*/
if (ecb->dte_cond & DTRACE_COND_USERMODE) {
ASSERT(mode != DTRACE_MODE_NOPRIV_DROP);
if (!(mode & DTRACE_MODE_USER)) {
if (mode & DTRACE_MODE_NOPRIV_DROP)
return (0);
mstate->dtms_access &= ~DTRACE_ACCESS_ARGS;
}
}
#endif
/*
* This is more subtle than it looks. We have to be absolutely certain
* that CRED() isn't going to change out from under us so it's only
* legit to examine that structure if we're in constrained situations.
* Currently, the only times we'll this check is if a non-super-user
* has enabled the profile or syscall providers -- providers that
* allow visibility of all processes. For the profile case, the check
* above will ensure that we're examining a user context.
*/
if (ecb->dte_cond & DTRACE_COND_OWNER) {
cred_t *cr;
cred_t *s_cr = state->dts_cred.dcr_cred;
proc_t *proc;
ASSERT(s_cr != NULL);
if ((cr = CRED()) == NULL ||
s_cr->cr_uid != cr->cr_uid ||
s_cr->cr_uid != cr->cr_ruid ||
s_cr->cr_uid != cr->cr_suid ||
s_cr->cr_gid != cr->cr_gid ||
s_cr->cr_gid != cr->cr_rgid ||
s_cr->cr_gid != cr->cr_sgid ||
(proc = ttoproc(curthread)) == NULL ||
(proc->p_flag & SNOCD)) {
if (mode & DTRACE_MODE_NOPRIV_DROP)
return (0);
#if defined(sun)
mstate->dtms_access &= ~DTRACE_ACCESS_PROC;
#endif
}
}
#if defined(sun)
/*
* If our dte_cond is set to DTRACE_COND_ZONEOWNER and we are not
* in our zone, check to see if our mode policy is to restrict rather
* than to drop; if to restrict, strip away both DTRACE_ACCESS_PROC
* and DTRACE_ACCESS_ARGS
*/
if (ecb->dte_cond & DTRACE_COND_ZONEOWNER) {
cred_t *cr;
cred_t *s_cr = state->dts_cred.dcr_cred;
ASSERT(s_cr != NULL);
if ((cr = CRED()) == NULL ||
s_cr->cr_zone->zone_id != cr->cr_zone->zone_id) {
if (mode & DTRACE_MODE_NOPRIV_DROP)
return (0);
mstate->dtms_access &=
~(DTRACE_ACCESS_PROC | DTRACE_ACCESS_ARGS);
}
}
#endif
return (1);
}
/*
* Note: not called from probe context. This function is called
* asynchronously (and at a regular interval) from outside of probe context to
@ -2907,7 +3113,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
#endif
case DIF_VAR_CURTHREAD:
if (!dtrace_priv_kernel(state))
if (!dtrace_priv_proc(state))
return (0);
return ((uint64_t)(uintptr_t)curthread);
@ -4577,11 +4783,32 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs,
break;
}
case DIF_SUBR_GETF: {
uintptr_t fd = tupregs[0].dttk_value;
struct filedesc *fdp;
file_t *fp;
if (!dtrace_priv_proc(state)) {
regs[rd] = 0;
break;
}
fdp = curproc->p_fd;
FILEDESC_SLOCK(fdp);
fp = fget_locked(fdp, fd);
mstate->dtms_getf = fp;
regs[rd] = (uintptr_t)fp;
FILEDESC_SUNLOCK(fdp);
break;
}
case DIF_SUBR_CLEANPATH: {
char *dest = (char *)mstate->dtms_scratch_ptr, c;
uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
uintptr_t src = tupregs[0].dttk_value;
int i = 0, j = 0;
#if defined(sun)
zone_t *z;
#endif
if (!dtrace_strcanload(src, size, mstate, vstate)) {
regs[rd] = 0;
@ -4680,6 +4907,25 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs,
} while (c != '\0');
dest[j] = '\0';
#if defined(sun)
if (mstate->dtms_getf != NULL &&
!(mstate->dtms_access & DTRACE_ACCESS_KERNEL) &&
(z = state->dts_cred.dcr_cred->cr_zone) != kcred->cr_zone) {
/*
* If we've done a getf() as a part of this ECB and we
* don't have kernel access (and we're not in the global
* zone), check if the path we cleaned up begins with
* the zone's root path, and trim it off if so. Note
* that this is an output cleanliness issue, not a
* security issue: knowing one's zone root path does
* not enable privilege escalation.
*/
if (strstr(dest, z->zone_rootpath) == dest)
dest += strlen(z->zone_rootpath) - 1;
}
#endif
regs[rd] = (uintptr_t)dest;
mstate->dtms_scratch_ptr += size;
break;
@ -5153,71 +5399,50 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate,
pc = DIF_INSTR_LABEL(instr);
break;
case DIF_OP_RLDSB:
if (!dtrace_canstore(regs[r1], 1, mstate, vstate)) {
*flags |= CPU_DTRACE_KPRIV;
*illval = regs[r1];
if (!dtrace_canload(regs[r1], 1, mstate, vstate))
break;
}
/*FALLTHROUGH*/
case DIF_OP_LDSB:
regs[rd] = (int8_t)dtrace_load8(regs[r1]);
break;
case DIF_OP_RLDSH:
if (!dtrace_canstore(regs[r1], 2, mstate, vstate)) {
*flags |= CPU_DTRACE_KPRIV;
*illval = regs[r1];
if (!dtrace_canload(regs[r1], 2, mstate, vstate))
break;
}
/*FALLTHROUGH*/
case DIF_OP_LDSH:
regs[rd] = (int16_t)dtrace_load16(regs[r1]);
break;
case DIF_OP_RLDSW:
if (!dtrace_canstore(regs[r1], 4, mstate, vstate)) {
*flags |= CPU_DTRACE_KPRIV;
*illval = regs[r1];
if (!dtrace_canload(regs[r1], 4, mstate, vstate))
break;
}
/*FALLTHROUGH*/
case DIF_OP_LDSW:
regs[rd] = (int32_t)dtrace_load32(regs[r1]);
break;
case DIF_OP_RLDUB:
if (!dtrace_canstore(regs[r1], 1, mstate, vstate)) {
*flags |= CPU_DTRACE_KPRIV;
*illval = regs[r1];
if (!dtrace_canload(regs[r1], 1, mstate, vstate))
break;
}
/*FALLTHROUGH*/
case DIF_OP_LDUB:
regs[rd] = dtrace_load8(regs[r1]);
break;
case DIF_OP_RLDUH:
if (!dtrace_canstore(regs[r1], 2, mstate, vstate)) {
*flags |= CPU_DTRACE_KPRIV;
*illval = regs[r1];
if (!dtrace_canload(regs[r1], 2, mstate, vstate))
break;
}
/*FALLTHROUGH*/
case DIF_OP_LDUH:
regs[rd] = dtrace_load16(regs[r1]);
break;
case DIF_OP_RLDUW:
if (!dtrace_canstore(regs[r1], 4, mstate, vstate)) {
*flags |= CPU_DTRACE_KPRIV;
*illval = regs[r1];
if (!dtrace_canload(regs[r1], 4, mstate, vstate))
break;
}
/*FALLTHROUGH*/
case DIF_OP_LDUW:
regs[rd] = dtrace_load32(regs[r1]);
break;
case DIF_OP_RLDX:
if (!dtrace_canstore(regs[r1], 8, mstate, vstate)) {
*flags |= CPU_DTRACE_KPRIV;
*illval = regs[r1];
if (!dtrace_canload(regs[r1], 8, mstate, vstate))
break;
}
/*FALLTHROUGH*/
case DIF_OP_LDX:
regs[rd] = dtrace_load64(regs[r1]);
@ -6180,6 +6405,8 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
uint64_t val = 0;
mstate.dtms_present = DTRACE_MSTATE_ARGS | DTRACE_MSTATE_PROBE;
mstate.dtms_getf = NULL;
*flags &= ~CPU_DTRACE_ERROR;
if (prov == dtrace_provider) {
@ -8853,6 +9080,20 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs,
subr == DIF_SUBR_COPYOUTSTR) {
dp->dtdo_destructive = 1;
}
if (subr == DIF_SUBR_GETF) {
/*
* If we have a getf() we need to record that
* in our state. Note that our state can be
* NULL if this is a helper -- but in that
* case, the call to getf() is itself illegal,
* and will be caught (slightly later) when
* the helper is validated.
*/
if (vstate->dtvs_state != NULL)
vstate->dtvs_state->dts_getf++;
}
break;
case DIF_OP_PUSHTR:
if (type != DIF_TYPE_STRING && type != DIF_TYPE_CTF)
@ -13758,6 +13999,24 @@ dtrace_state_go(dtrace_state_t *state, processorid_t *cpu)
state->dts_activity = DTRACE_ACTIVITY_WARMUP;
#if defined(sun)
if (state->dts_getf != 0 &&
!(state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL)) {
/*
* We don't have kernel privs but we have at least one call
* to getf(); we need to bump our zone's count, and (if
* this is the first enabling to have an unprivileged call
* to getf()) we need to hook into closef().
*/
state->dts_cred.dcr_cred->cr_zone->zone_dtrace_getf++;
if (dtrace_getf++ == 0) {
ASSERT(dtrace_closef == NULL);
dtrace_closef = dtrace_getf_barrier;
}
}
#endif
/*
* Now it's time to actually fire the BEGIN probe. We need to disable
* interrupts here both to record the CPU on which we fired the BEGIN
@ -13874,6 +14133,26 @@ dtrace_state_stop(dtrace_state_t *state, processorid_t *cpu)
state->dts_activity = DTRACE_ACTIVITY_STOPPED;
dtrace_sync();
#if defined(sun)
if (state->dts_getf != 0 &&
!(state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL)) {
/*
* We don't have kernel privs but we have at least one call
* to getf(); we need to lower our zone's count, and (if
* this is the last enabling to have an unprivileged call
* to getf()) we need to clear the closef() hook.
*/
ASSERT(state->dts_cred.dcr_cred->cr_zone->zone_dtrace_getf > 0);
ASSERT(dtrace_closef == dtrace_getf_barrier);
ASSERT(dtrace_getf > 0);
state->dts_cred.dcr_cred->cr_zone->zone_dtrace_getf--;
if (--dtrace_getf == 0)
dtrace_closef = NULL;
}
#endif
return (0);
}
@ -15543,6 +15822,25 @@ dtrace_toxrange_add(uintptr_t base, uintptr_t limit)
dtrace_toxranges++;
}
static void
dtrace_getf_barrier()
{
#if defined(sun)
/*
* When we have unprivileged (that is, non-DTRACE_CRV_KERNEL) enablings
* that contain calls to getf(), this routine will be called on every
* closef() before either the underlying vnode is released or the
* file_t itself is freed. By the time we are here, it is essential
* that the file_t can no longer be accessed from a call to getf()
* in probe context -- that assures that a dtrace_sync() can be used
* to clear out any enablings referring to the old structures.
*/
if (curthread->t_procp->p_zone->zone_dtrace_getf != 0 ||
kcred->cr_zone->zone_dtrace_getf != 0)
dtrace_sync();
#endif
}
/*
* DTrace Driver Cookbook Functions
*/
@ -16744,6 +17042,9 @@ dtrace_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
dtrace_modload = NULL;
dtrace_modunload = NULL;
ASSERT(dtrace_getf == 0);
ASSERT(dtrace_closef == NULL);
mutex_exit(&cpu_lock);
if (dtrace_helptrace_enabled) {

View File

@ -21,6 +21,7 @@
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
@ -85,11 +86,11 @@ static dtrace_pattr_t xpv_attr = {
sdt_provider_t sdt_providers[] = {
{ "vtrace", "__vtrace_", &vtrace_attr, 0 },
{ "sysinfo", "__cpu_sysinfo_", &info_attr, 0 },
{ "vminfo", "__cpu_vminfo_", &info_attr, 0 },
{ "sysinfo", "__cpu_sysinfo_", &info_attr, DTRACE_PRIV_USER },
{ "vminfo", "__cpu_vminfo_", &info_attr, DTRACE_PRIV_USER },
{ "fpuinfo", "__fpuinfo_", &fpu_attr, 0 },
{ "sched", "__sched_", &stab_attr, 0 },
{ "proc", "__proc_", &stab_attr, 0 },
{ "sched", "__sched_", &stab_attr, DTRACE_PRIV_USER },
{ "proc", "__proc_", &stab_attr, DTRACE_PRIV_USER },
{ "io", "__io_", &stab_attr, 0 },
{ "mib", "__mib_", &stab_attr, 0 },
{ "fsinfo", "__fsinfo_", &fsinfo_attr, 0 },
@ -851,6 +852,20 @@ sdt_argdesc_t sdt_args[] = {
{ NULL }
};
/*ARGSUSED*/
int
sdt_mode(void *arg, dtrace_id_t id, void *parg)
{
/*
* We tell DTrace that we're in kernel mode, that the firing needs to
* be dropped for anything that doesn't have necessary privileges, and
* that it needs to be restricted for anything that has restricted
* (i.e., not all-zone) privileges.
*/
return (DTRACE_MODE_KERNEL | DTRACE_MODE_NOPRIV_DROP |
DTRACE_MODE_LIMITEDPRIV_RESTRICT);
}
/*ARGSUSED*/
void
sdt_getargdesc(void *arg, dtrace_id_t id, void *parg, dtrace_argdesc_t *desc)

View File

@ -25,7 +25,7 @@
*/
/*
* Copyright (c) 2011, Joyent, Inc. All rights reserved.
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
@ -312,8 +312,8 @@ typedef enum dtrace_probespec {
#define DIF_SUBR_SX_EXCLUSIVE_HELD 49
#define DIF_SUBR_SX_ISEXCLUSIVE 50
#define DIF_SUBR_MEMSTR 51
#define DIF_SUBR_MAX 51 /* max subroutine value */
#define DIF_SUBR_GETF 52
#define DIF_SUBR_MAX 52 /* max subroutine value */
typedef uint32_t dif_instr_t;
@ -1706,7 +1706,22 @@ typedef struct dof_helper {
*
* 1.10.3 Return value
*
* A boolean value.
* A bitwise OR that encapsulates both the mode (either DTRACE_MODE_KERNEL
* or DTRACE_MODE_USER) and the policy when the privilege of the enabling
* is insufficient for that mode (a combination of DTRACE_MODE_NOPRIV_DROP,
* DTRACE_MODE_NOPRIV_RESTRICT, and DTRACE_MODE_LIMITEDPRIV_RESTRICT). If
* DTRACE_MODE_NOPRIV_DROP bit is set, insufficient privilege will result
* in the probe firing being silently ignored for the enabling; if the
* DTRACE_NODE_NOPRIV_RESTRICT bit is set, insufficient privilege will not
* prevent probe processing for the enabling, but restrictions will be in
* place that induce a UPRIV fault upon attempt to examine probe arguments
* or current process state. If the DTRACE_MODE_LIMITEDPRIV_RESTRICT bit
* is set, similar restrictions will be placed upon operation if the
* privilege is sufficient to process the enabling, but does not otherwise
* entitle the enabling to all zones. The DTRACE_MODE_NOPRIV_DROP and
* DTRACE_MODE_NOPRIV_RESTRICT are mutually exclusive (and one of these
* two policies must be specified), but either may be combined (or not)
* with DTRACE_MODE_LIMITEDPRIV_RESTRICT.
*
* 1.10.4 Caller's context
*
@ -2101,6 +2116,12 @@ typedef struct dtrace_pops {
void (*dtps_destroy)(void *arg, dtrace_id_t id, void *parg);
} dtrace_pops_t;
#define DTRACE_MODE_KERNEL 0x01
#define DTRACE_MODE_USER 0x02
#define DTRACE_MODE_NOPRIV_DROP 0x10
#define DTRACE_MODE_NOPRIV_RESTRICT 0x20
#define DTRACE_MODE_LIMITEDPRIV_RESTRICT 0x40
typedef uintptr_t dtrace_provider_id_t;
extern int dtrace_register(const char *, const dtrace_pattr_t *, uint32_t,
@ -2321,6 +2342,7 @@ extern void (*dtrace_helpers_cleanup)(void);
extern void (*dtrace_helpers_fork)(proc_t *parent, proc_t *child);
extern void (*dtrace_cpustart_init)(void);
extern void (*dtrace_cpustart_fini)(void);
extern void (*dtrace_closef)(void);
extern void (*dtrace_debugger_init)(void);
extern void (*dtrace_debugger_fini)(void);

View File

@ -934,6 +934,7 @@ typedef struct dtrace_mstate {
uintptr_t dtms_strtok; /* saved strtok() pointer */
uint32_t dtms_access; /* memory access rights */
dtrace_difo_t *dtms_difo; /* current dif object */
file_t *dtms_getf; /* cached rval of getf() */
} dtrace_mstate_t;
#define DTRACE_COND_OWNER 0x1
@ -1166,6 +1167,7 @@ struct dtrace_state {
dtrace_optval_t dts_options[DTRACEOPT_MAX]; /* options */
dtrace_cred_t dts_cred; /* credentials */
size_t dts_nretained; /* number of retained enabs */
int dts_getf; /* number of getf() calls */
};
struct dtrace_provider {

View File

@ -136,6 +136,7 @@ typedef struct kdtrace_thread {
#define t_procp td_proc
#define t_tid td_tid
#define t_did td_tid
#define t_cred td_ucred
int priv_policy(const cred_t *, int, boolean_t, int, const char *);