From 19e252baebe7a7466b33c27560420b7d95fe294d Mon Sep 17 00:00:00 2001 From: Alexander Leidinger Date: Sat, 5 May 2012 19:42:38 +0000 Subject: [PATCH] - >500 static DTrace probes for the linuxulator - DTrace scripts to check for errors, performance, ... they serve mostly as examples of what you can do with the static probe;s with moderate load the scripts may be overwhelmed, excessive lock-tracing may influence program behavior (see the last design decission) Design decissions: - use "linuxulator" as the provider for the native bitsize; add the bitsize for the non-native emulation (e.g. "linuxuator32" on amd64) - Add probes only for locks which are acquired in one function and released in another function. Locks which are aquired and released in the same function should be easy to pair in the code, inter-function locking is more easy to verify in DTrace. - Probes for locks should be fired after locking and before releasing to prevent races (to provide data/function stability in DTrace, see the man-page of "dtrace -v ..." and the corresponding DTrace docs). --- sys/amd64/linux32/linux.h | 1 + sys/amd64/linux32/linux32_dummy.c | 9 + sys/compat/linux/check_error.d | 144 +++++++ sys/compat/linux/check_internal_locks.d | 132 ++++++ sys/compat/linux/linux_dtrace.h | 95 +++++ sys/compat/linux/linux_emul.c | 125 +++++- sys/compat/linux/linux_emul.h | 41 +- sys/compat/linux/linux_fork.c | 10 + sys/compat/linux/linux_futex.c | 509 ++++++++++++++++++++---- sys/compat/linux/linux_mib.c | 314 +++++++++++++-- sys/compat/linux/linux_misc.c | 14 + sys/compat/linux/linux_sysctl.c | 62 ++- sys/compat/linux/linux_time.c | 235 +++++++++-- sys/compat/linux/linux_uid16.c | 189 +++++++-- sys/compat/linux/linux_util.c | 124 +++++- sys/compat/linux/linux_util.h | 8 + sys/compat/linux/stats_timing.d | 94 +++++ sys/compat/linux/trace_futexes.d | 182 +++++++++ sys/i386/linux/linux.h | 1 + sys/i386/linux/linux_dummy.c | 9 + 20 files changed, 2082 insertions(+), 216 deletions(-) create mode 100644 sys/compat/linux/check_error.d create mode 100644 sys/compat/linux/check_internal_locks.d create mode 100644 sys/compat/linux/linux_dtrace.h create mode 100644 sys/compat/linux/stats_timing.d create mode 100644 sys/compat/linux/trace_futexes.d diff --git a/sys/amd64/linux32/linux.h b/sys/amd64/linux32/linux.h index 4eb14250e59f..2c269d33e6ce 100644 --- a/sys/amd64/linux32/linux.h +++ b/sys/amd64/linux32/linux.h @@ -42,6 +42,7 @@ extern u_char linux_debug_map[]; #define ldebug(name) isclr(linux_debug_map, LINUX_SYS_linux_ ## name) #define ARGS(nm, fmt) "linux(%ld): "#nm"("fmt")\n", (long)td->td_proc->p_pid #define LMSG(fmt) "linux(%ld): "fmt"\n", (long)td->td_proc->p_pid +#define LINUX_DTRACE linuxulator32 #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_LINUX); diff --git a/sys/amd64/linux32/linux32_dummy.c b/sys/amd64/linux32/linux32_dummy.c index 9abc0ee4ac9d..95bf3ec88e9c 100644 --- a/sys/amd64/linux32/linux32_dummy.c +++ b/sys/amd64/linux32/linux32_dummy.c @@ -29,14 +29,23 @@ #include __FBSDID("$FreeBSD$"); +#include "opt_compat.h" +#include "opt_kdtrace.h" + #include +#include +#include #include #include #include #include +#include #include +/* DTrace init */ +LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE); + DUMMY(stime); DUMMY(olduname); DUMMY(syslog); diff --git a/sys/compat/linux/check_error.d b/sys/compat/linux/check_error.d new file mode 100644 index 000000000000..9e3c00a37dbe --- /dev/null +++ b/sys/compat/linux/check_error.d @@ -0,0 +1,144 @@ +#!/usr/sbin/dtrace -qs + +/*- + * Copyright (c) 2008-2012 Alexander Leidinger + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +/* + * Report error conditions: + * - emulation errors (unsupportet stuff, unknown stuff, ...) + * - kernel errors (resource shortage, ...) + * - programming errors (errors which can happen, but should not happen) + */ + +linuxulator*:dummy::not_implemented, +linuxulator*:emul:proc_exit:child_clear_tid_error, +linuxulator*:emul:proc_exit:futex_failed, +linuxulator*:emul:linux_schedtail:copyout_error, +linuxulator*:futex:futex_get:error, +linuxulator*:futex:futex_sleep:requeue_error, +linuxulator*:futex:futex_sleep:sleep_error, +linuxulator*:futex:futex_wait:copyin_error, +linuxulator*:futex:futex_wait:itimerfix_error, +linuxulator*:futex:futex_wait:sleep_error, +linuxulator*:futex:futex_atomic_op:missing_access_check, +linuxulator*:futex:futex_atomic_op:unimplemented_op, +linuxulator*:futex:futex_atomic_op:unimplemented_cmp, +linuxulator*:futex:linux_sys_futex:unimplemented_clockswitch, +linuxulator*:futex:linux_sys_futex:copyin_error, +linuxulator*:futex:linux_sys_futex:unhandled_efault, +linuxulator*:futex:linux_sys_futex:unimplemented_lock_pi, +linuxulator*:futex:linux_sys_futex:unimplemented_unlock_pi, +linuxulator*:futex:linux_sys_futex:unimplemented_trylock_pi, +linuxulator*:futex:linux_sys_futex:unimplemented_wait_requeue_pi, +linuxulator*:futex:linux_sys_futex:unimplemented_cmp_requeue_pi, +linuxulator*:futex:linux_sys_futex:unknown_operation, +linuxulator*:futex:linux_get_robust_list:copyout_error, +linuxulator*:futex:handle_futex_death:copyin_error, +linuxulator*:futex:fetch_robust_entry:copyin_error, +linuxulator*:futex:release_futexes:copyin_error, +linuxulator*:time:linux_clock_gettime:conversion_error, +linuxulator*:time:linux_clock_gettime:gettime_error, +linuxulator*:time:linux_clock_gettime:copyout_error, +linuxulator*:time:linux_clock_settime:conversion_error, +linuxulator*:time:linux_clock_settime:settime_error, +linuxulator*:time:linux_clock_settime:copyin_error, +linuxulator*:time:linux_clock_getres:conversion_error, +linuxulator*:time:linux_clock_getres:getres_error, +linuxulator*:time:linux_clock_getres:copyout_error, +linuxulator*:time:linux_nanosleep:conversion_error, +linuxulator*:time:linux_nanosleep:nanosleep_error, +linuxulator*:time:linux_nanosleep:copyout_error, +linuxulator*:time:linux_nanosleep:copyin_error, +linuxulator*:time:linux_clock_nanosleep:copyin_error, +linuxulator*:time:linux_clock_nanosleep:conversion_error, +linuxulator*:time:linux_clock_nanosleep:copyout_error, +linuxulator*:time:linux_clock_nanosleep:nanosleep_error, +linuxulator*:sysctl:handle_string:copyout_error, +linuxulator*:sysctl:linux_sysctl:copyin_error, +linuxulator*:mib:linux_sysctl_osname:sysctl_string_error, +linuxulator*:mib:linux_sysctl_osrelease:sysctl_string_error, +linuxulator*:mib:linux_sysctl_oss_version:sysctl_string_error, +linuxulator*:mib:linux_prison_create:vfs_copyopt_error, +linuxulator*:mib:linux_prison_check:vfs_copyopt_error, +linuxulator*:mib:linux_prison_check:vfs_getopt_error, +linuxulator*:mib:linux_prison_set:vfs_copyopt_error, +linuxulator*:mib:linux_prison_set:vfs_getopt_error, +linuxulator*:mib:linux_prison_get:vfs_setopt_error, +linuxulator*:mib:linux_prison_get:vfs_setopts_error +{ + printf("ERROR: %s in %s:%s:%s\n", probename, probeprov, probemod, probefunc); + stack(); + ustack(); +} + +linuxulator*:util:linux_driver_get_name_dev:nullcall, +linuxulator*:util:linux_driver_get_major_minor:nullcall, +linuxulator*:futex:linux_sys_futex:invalid_cmp_requeue_use, +linuxulator*:futex:linux_sys_futex:deprecated_requeue, +linuxulator*:futex:linux_set_robust_list:size_error, +linuxulator*:time:linux_clock_getres:nullcall +{ + printf("WARNING: %s:%s:%s:%s in application %s, maybe an application error?\n", probename, probeprov, probemod, probefunc, execname); + stack(); + ustack(); +} + +linuxulator*:util:linux_driver_get_major_minor:notfound +{ + printf("WARNING: Application %s failed to find %s in %s:%s:%s, this may or may not be a problem.\n", execname, stringof(args[0]), probename, probeprov, probemod); + stack(); + ustack(); +} + +linuxulator*:time:linux_to_native_clockid:unknown_clockid +{ + printf("INFO: Application %s tried to use unknown clockid %d. Please report this to freebsd-emulation@FreeBSD.org.\n", execname, arg0); +} + +linuxulator*:time:linux_to_native_clockid:unsupported_clockid, +linuxulator*:time:linux_clock_nanosleep:unsupported_clockid +{ + printf("WARNING: Application %s tried to use unsupported clockid (%d), this may or may not be a problem for the application.\nPatches to support this clockid are welcome on the freebsd-emulation@FreeBSD.org mailinglist.\n", execname, arg0); +} + +linuxulator*:time:linux_clock_nanosleep:unsupported_flags +{ + printf("WARNING: Application %s tried to use unsupported flags (%d), this may or may not be a problem for the application.\nPatches to support those flags are welcome on the freebsd-emulation@FreeBSD.org mailinglist.\n", execname, arg0); +} + +linuxulator*:sysctl:linux_sysctl:wrong_length +{ + printf("ERROR: Application %s issued a sysctl which failed the length restrictions.\nThe length passed is %d, the min length supported is 1 and the max length supported is %d.\n", execname, arg0, arg1); + stack(); + ustack(); +} + +linuxulator*:sysctl:linux_sysctl:unsupported_sysctl +{ + printf("ERROR: Application %s issued an unsupported sysctl (%s).\nPatches to support this sysctl are welcome on the freebsd-emulation@FreeBSD.org mailinglist.\n", execname, stringof(args[0])); +} diff --git a/sys/compat/linux/check_internal_locks.d b/sys/compat/linux/check_internal_locks.d new file mode 100644 index 000000000000..2bdef684fadb --- /dev/null +++ b/sys/compat/linux/check_internal_locks.d @@ -0,0 +1,132 @@ +#!/usr/sbin/dtrace -qs + +/*- + * Copyright (c) 2008-2012 Alexander Leidinger + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +/** + * Check if the internal locks are correctly acquired/released: + * - no recursive locking (mtx locks, write locks) + * - no unlocking of already unlocked one + * + * Print stacktrace if a lock is longer locked than about 10sec or more. + */ + +#pragma D option dynvarsize=32m +#pragma D option specsize=32m + +BEGIN +{ + check["emul_lock"] = 0; + check["emul_shared_rlock"] = 0; + check["emul_shared_wlock"] = 0; + check["futex_mtx"] = 0; +} + +linuxulator*:locks:emul_lock:locked, +linuxulator*:locks:emul_shared_wlock:locked, +linuxulator*:locks:futex_mtx:locked +/check[probefunc] > 0/ +{ + printf("ERROR: recursive lock of %s (%p),", probefunc, arg0); + printf(" or missing SDT probe in kernel. Stack trace follows:"); + stack(); +} + +linuxulator*:locks:emul_lock:locked, +linuxulator*:locks:emul_shared_rlock:locked, +linuxulator*:locks:emul_shared_wlock:locked, +linuxulator*:locks:futex_mtx:locked +{ + ++check[probefunc]; + @stats[probefunc] = count(); + + ts[probefunc] = timestamp; + spec[probefunc] = speculation(); +} + +linuxulator*:locks:emul_lock:unlock, +linuxulator*:locks:emul_shared_rlock:unlock, +linuxulator*:locks:emul_shared_wlock:unlock, +linuxulator*:locks:futex_mtx:unlock +/check[probefunc] == 0/ +{ + printf("ERROR: unlock attemt of unlocked %s (%p),", probefunc, arg0); + printf(" missing SDT probe in kernel, or dtrace program started"); + printf(" while the %s was already held (race condition).", probefunc); + printf(" Stack trace follows:"); + stack(); +} + +linuxulator*:locks:emul_lock:unlock, +linuxulator*:locks:emul_shared_rlock:unlock, +linuxulator*:locks:emul_shared_wlock:unlock, +linuxulator*:locks:futex_mtx:unlock +{ + discard(spec[probefunc]); + spec[probefunc] = 0; + --check[probefunc]; +} + +/* Timeout handling */ + +tick-10s +/spec["emul_lock"] != 0 && timestamp - ts["emul_lock"] >= 9999999000/ +{ + commit(spec["emul_lock"]); + spec["emul_lock"] = 0; +} + +tick-10s +/spec["emul_shared_wlock"] != 0 && timestamp - ts["emul_shared_wlock"] >= 9999999000/ +{ + commit(spec["emul_shared_wlock"]); + spec["emul_shared_wlock"] = 0; +} + +tick-10s +/spec["emul_shared_rlock"] != 0 && timestamp - ts["emul_shared_rlock"] >= 9999999000/ +{ + commit(spec["emul_shared_rlock"]); + spec["emul_shared_rlock"] = 0; +} + +tick-10s +/spec["futex_mtx"] != 0 && timestamp - ts["futex_mtx"] >= 9999999000/ +{ + commit(spec["futex_mtx"]); + spec["futex_mtx"] = 0; +} + + +/* Statistics */ + +END +{ + printf("Number of locks per type:"); + printa(@stats); +} diff --git a/sys/compat/linux/linux_dtrace.h b/sys/compat/linux/linux_dtrace.h new file mode 100644 index 000000000000..b6a2b33f764f --- /dev/null +++ b/sys/compat/linux/linux_dtrace.h @@ -0,0 +1,95 @@ +/*- + * Copyright (c) 2008-2012 Alexander Leidinger + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _LINUX_DTRACE_H_ +#define _LINUX_DTRACE_H_ + +/** + * DTrace support macros for the linuxulator. + * + * Some wrapper macros to make it more easy to handle the linuxulator + * providers and to allow to make the name depend upon the bitsize. + * + * Basically this is the same as the normal SDT macros in sys/sdt.h. The + * difference is that the provider name is automatically inserted, and + * we do not use a different name for the probe-description. + */ + +#define LIN_SDT_PROVIDER_DEFINE(x) SDT_PROVIDER_DEFINE(x) +#define LIN_SDT_PROVIDER_DECLARE(x) SDT_PROVIDER_DECLARE(x) + +#define _LIN_SDT_PROBE_DECLARE(a, b, c, d) SDT_PROBE_DECLARE(a, b, c, d) +#define LIN_SDT_PROBE_DECLARE(a, b, c) _LIN_SDT_PROBE_DECLARE( \ + LINUX_DTRACE, a, b, c) + +#define _LIN_SDT_PROBE_DEFINE0(a, b, c, d) SDT_PROBE_DEFINE(a, \ + b, c, d, d) +#define LIN_SDT_PROBE_DEFINE0(a, b, c) _LIN_SDT_PROBE_DEFINE0(\ + LINUX_DTRACE, a, b, c) +#define _LIN_SDT_PROBE_DEFINE1(a, b, c, d, e) SDT_PROBE_DEFINE1(a, \ + b, c, d, d, e) +#define LIN_SDT_PROBE_DEFINE1(a, b, c, d) _LIN_SDT_PROBE_DEFINE1(\ + LINUX_DTRACE, a, b, c, d) +#define _LIN_SDT_PROBE_DEFINE2(a, b, c, d, e, f) SDT_PROBE_DEFINE2(a, \ + b, c, d, d, e, f) +#define LIN_SDT_PROBE_DEFINE2(a, b, c, d, e) _LIN_SDT_PROBE_DEFINE2(\ + LINUX_DTRACE, a, b, c, d, e) +#define _LIN_SDT_PROBE_DEFINE3(a, b, c, d, e, f, g) SDT_PROBE_DEFINE3(a, \ + b, c, d, d, e, f, g) +#define LIN_SDT_PROBE_DEFINE3(a, b, c, d, e, f) _LIN_SDT_PROBE_DEFINE3(\ + LINUX_DTRACE, a, b, c, d, e, f) +#define _LIN_SDT_PROBE_DEFINE4(a, b, c, d, e, f, g, h) SDT_PROBE_DEFINE4(a, \ + b, c, d, d, e, f, g, h) +#define LIN_SDT_PROBE_DEFINE4(a, b, c, d, e, f, g) _LIN_SDT_PROBE_DEFINE4(\ + LINUX_DTRACE, a, b, c, d, e, f, g) +#define _LIN_SDT_PROBE_DEFINE5(a, b, c, d, e, f, g, h, i) \ + SDT_PROBE_DEFINE5(a, b, c, d, d, e, f, g, h, i) +#define LIN_SDT_PROBE_DEFINE5(a, b, c, d, e, f, g, h) _LIN_SDT_PROBE_DEFINE5(\ + LINUX_DTRACE, a, b, c, d, e, f, g, h) + +#define _LIN_SDT_PROBE_ARGTYPE(a, b, c, d, e, f) SDT_PROBE_ARGTYPE(a, b,\ + c, d, e, f) +#define LIN_SDT_PROBE_ARGTYPE(a, b, c, d, e) _LIN_SDT_PROBE_ARGTYPE( \ + LINUX_DTRACE, a, b, c, d, e) + +#define LIN_SDT_PROBE0(a, b, c) SDT_PROBE1(LINUX_DTRACE, a, b, \ + c, 0) +#define LIN_SDT_PROBE1(a, b, c, d) SDT_PROBE1(LINUX_DTRACE, a, b, \ + c, d) +#define LIN_SDT_PROBE2(a, b, c, d, e) SDT_PROBE2(LINUX_DTRACE, a, b, \ + c, d, e) +#define LIN_SDT_PROBE3(a, b, c, d, e, f) SDT_PROBE3(LINUX_DTRACE, a, b, \ + c, d, e, f) +#define LIN_SDT_PROBE4(a, b, c, d, e, f, g) SDT_PROBE4(LINUX_DTRACE, a, b, \ + c, d, e, f, g) +#define _LIN_SDT_PROBE5(a, b, c, d, e, f, g, h, i) SDT_PROBE(a, b, c, d, \ + e, f, g, h, i) +#define LIN_SDT_PROBE5(a, b, c, d, e, f, g, h) _LIN_SDT_PROBE5(LINUX_DTRACE, \ + a, b, c, d, e, f, g, h) + +#endif /* _LINUX_DTRACE_H_ */ diff --git a/sys/compat/linux/linux_emul.c b/sys/compat/linux/linux_emul.c index d6b2f71f5cef..6a1098d409f9 100644 --- a/sys/compat/linux/linux_emul.c +++ b/sys/compat/linux/linux_emul.c @@ -30,6 +30,7 @@ __FBSDID("$FreeBSD$"); #include "opt_compat.h" +#include "opt_kdtrace.h" #include #include @@ -38,6 +39,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -53,9 +55,64 @@ __FBSDID("$FreeBSD$"); #include #endif +#include #include #include +/** + * Special DTrace provider for the linuxulator. + * + * In this file we define the provider for the entire linuxulator. All + * modules (= files of the linuxulator) use it. + * + * We define a different name depending on the emulated bitsize, see + * ../..//linux{,32}/linux.h, e.g.: + * native bitsize = linuxulator + * amd64, 32bit emulation = linuxulator32 + */ +LIN_SDT_PROVIDER_DEFINE(LINUX_DTRACE); + +/** + * Special DTrace module "locks", it covers some linuxulator internal + * locks. + */ +LIN_SDT_PROBE_DEFINE1(locks, emul_lock, locked, "struct mtx *"); +LIN_SDT_PROBE_DEFINE1(locks, emul_lock, unlock, "struct mtx *"); +LIN_SDT_PROBE_DEFINE1(locks, emul_shared_rlock, locked, "struct sx *"); +LIN_SDT_PROBE_DEFINE1(locks, emul_shared_rlock, unlock, "struct sx *"); +LIN_SDT_PROBE_DEFINE1(locks, emul_shared_wlock, locked, "struct sx *"); +LIN_SDT_PROBE_DEFINE1(locks, emul_shared_wlock, unlock, "struct sx *"); + +/** + * DTrace probes in this module. + */ +LIN_SDT_PROBE_DEFINE2(emul, em_find, entry, "struct proc *", "int"); +LIN_SDT_PROBE_DEFINE0(emul, em_find, return); +LIN_SDT_PROBE_DEFINE3(emul, proc_init, entry, "struct thread *", "pid_t", + "int"); +LIN_SDT_PROBE_DEFINE0(emul, proc_init, create_thread); +LIN_SDT_PROBE_DEFINE0(emul, proc_init, fork); +LIN_SDT_PROBE_DEFINE0(emul, proc_init, exec); +LIN_SDT_PROBE_DEFINE0(emul, proc_init, return); +LIN_SDT_PROBE_DEFINE1(emul, proc_exit, entry, "struct proc *"); +LIN_SDT_PROBE_DEFINE0(emul, proc_exit, futex_failed); +LIN_SDT_PROBE_DEFINE3(emul, proc_exit, reparent, "pid_t", "pid_t", + "struct proc *"); +LIN_SDT_PROBE_DEFINE1(emul, proc_exit, child_clear_tid_error, "int"); +LIN_SDT_PROBE_DEFINE0(emul, proc_exit, return); +LIN_SDT_PROBE_DEFINE2(emul, proc_exec, entry, "struct proc *", + "struct image_params *"); +LIN_SDT_PROBE_DEFINE0(emul, proc_exec, return); +LIN_SDT_PROBE_DEFINE0(emul, linux_schedtail, entry); +LIN_SDT_PROBE_DEFINE1(emul, linux_schedtail, copyout_error, "int"); +LIN_SDT_PROBE_DEFINE0(emul, linux_schedtail, return); +LIN_SDT_PROBE_DEFINE1(emul, linux_set_tid_address, entry, "int *"); +LIN_SDT_PROBE_DEFINE0(emul, linux_set_tid_address, return); +LIN_SDT_PROBE_DEFINE2(emul, linux_kill_threads, entry, "struct thread *", + "int"); +LIN_SDT_PROBE_DEFINE1(emul, linux_kill_threads, kill, "pid_t"); +LIN_SDT_PROBE_DEFINE0(emul, linux_kill_threads, return); + struct sx emul_shared_lock; struct mtx emul_lock; @@ -65,6 +122,8 @@ em_find(struct proc *p, int locked) { struct linux_emuldata *em; + LIN_SDT_PROBE2(emul, em_find, entry, p, locked); + if (locked == EMUL_DOLOCK) EMUL_LOCK(&emul_lock); @@ -73,6 +132,7 @@ em_find(struct proc *p, int locked) if (em == NULL && locked == EMUL_DOLOCK) EMUL_UNLOCK(&emul_lock); + LIN_SDT_PROBE1(emul, em_find, return, em); return (em); } @@ -82,8 +142,10 @@ linux_proc_init(struct thread *td, pid_t child, int flags) struct linux_emuldata *em, *p_em; struct proc *p; + LIN_SDT_PROBE3(emul, proc_init, entry, td, child, flags); + if (child != 0) { - /* non-exec call */ + /* fork or create a thread */ em = malloc(sizeof *em, M_LINUX, M_WAITOK | M_ZERO); em->pid = child; em->pdeath_signal = 0; @@ -91,9 +153,12 @@ linux_proc_init(struct thread *td, pid_t child, int flags) em->robust_futexes = NULL; if (flags & LINUX_CLONE_THREAD) { /* handled later in the code */ + LIN_SDT_PROBE0(emul, proc_init, create_thread); } else { struct linux_emuldata_shared *s; + LIN_SDT_PROBE0(emul, proc_init, fork); + s = malloc(sizeof *s, M_LINUX, M_WAITOK | M_ZERO); s->refs = 1; s->group_pid = child; @@ -102,6 +167,9 @@ linux_proc_init(struct thread *td, pid_t child, int flags) em->shared = s; } } else { + /* exec */ + LIN_SDT_PROBE0(emul, proc_init, exec); + /* lookup the old one */ em = em_find(td->td_proc, EMUL_DOLOCK); KASSERT(em != NULL, ("proc_init: emuldata not found in exec case.\n")); @@ -136,8 +204,7 @@ linux_proc_init(struct thread *td, pid_t child, int flags) * rwlock held */ } - } - if (child != 0) { + EMUL_SHARED_WLOCK(&emul_shared_lock); LIST_INSERT_HEAD(&em->shared->threads, em, threads); EMUL_SHARED_WUNLOCK(&emul_shared_lock); @@ -149,6 +216,7 @@ linux_proc_init(struct thread *td, pid_t child, int flags) } else EMUL_UNLOCK(&emul_lock); + LIN_SDT_PROBE0(emul, proc_init, return); return (0); } @@ -164,6 +232,8 @@ linux_proc_exit(void *arg __unused, struct proc *p) if (__predict_true(p->p_sysent != &elf_linux_sysvec)) return; + LIN_SDT_PROBE1(emul, proc_exit, entry, p); + release_futexes(p); /* find the emuldata */ @@ -173,6 +243,9 @@ linux_proc_exit(void *arg __unused, struct proc *p) /* reparent all procs that are not a thread leader to initproc */ if (em->shared->group_pid != p->p_pid) { + LIN_SDT_PROBE3(emul, proc_exit, reparent, + em->shared->group_pid, p->p_pid, p); + child_clear_tid = em->child_clear_tid; EMUL_UNLOCK(&emul_lock); sx_xlock(&proctree_lock); @@ -208,7 +281,12 @@ linux_proc_exit(void *arg __unused, struct proc *p) error = copyout(&null, child_clear_tid, sizeof(null)); if (error) { + LIN_SDT_PROBE1(emul, proc_exit, + child_clear_tid_error, error); + free(em, M_LINUX); + + LIN_SDT_PROBE0(emul, proc_exit, return); return; } @@ -224,8 +302,10 @@ linux_proc_exit(void *arg __unused, struct proc *p) * this cannot happen at the moment and if this happens it * probably means there is a user space bug */ - if (error) + if (error) { + LIN_SDT_PROBE0(emul, proc_exit, futex_failed); printf(LMSG("futex stuff in proc_exit failed.\n")); + } } /* clean the stuff up */ @@ -250,6 +330,8 @@ linux_proc_exit(void *arg __unused, struct proc *p) EMUL_UNLOCK(&emul_lock); } sx_xunlock(&proctree_lock); + + LIN_SDT_PROBE0(emul, proc_exit, return); } /* @@ -260,6 +342,9 @@ linux_proc_exit(void *arg __unused, struct proc *p) void linux_proc_exec(void *arg __unused, struct proc *p, struct image_params *imgp) { + if (__predict_false(imgp->sysent == &elf_linux_sysvec)) { + LIN_SDT_PROBE2(emul, proc_exec, entry, p, imgp); + } if (__predict_false(imgp->sysent == &elf_linux_sysvec && p->p_sysent != &elf_linux_sysvec)) linux_proc_init(FIRST_THREAD_IN_PROC(p), p->p_pid, 0); @@ -297,6 +382,10 @@ linux_proc_exec(void *arg __unused, struct proc *p, struct image_params *imgp) free(em, M_LINUX); } + + if (__predict_false(imgp->sysent == &elf_linux_sysvec)) { + LIN_SDT_PROBE0(emul, proc_exec, return); + } } void @@ -309,6 +398,8 @@ linux_schedtail(struct thread *td) p = td->td_proc; + LIN_SDT_PROBE1(emul, linux_schedtail, entry, p); + /* find the emuldata */ em = em_find(p, EMUL_DOLOCK); @@ -316,10 +407,18 @@ linux_schedtail(struct thread *td) child_set_tid = em->child_set_tid; EMUL_UNLOCK(&emul_lock); - if (child_set_tid != NULL) + if (child_set_tid != NULL) { error = copyout(&p->p_pid, (int *)child_set_tid, sizeof(p->p_pid)); + if (error != 0) { + LIN_SDT_PROBE1(emul, linux_schedtail, copyout_error, + error); + } + } + + LIN_SDT_PROBE0(emul, linux_schedtail, return); + return; } @@ -328,10 +427,7 @@ linux_set_tid_address(struct thread *td, struct linux_set_tid_address_args *args { struct linux_emuldata *em; -#ifdef DEBUG - if (ldebug(set_tid_address)) - printf(ARGS(set_tid_address, "%p"), args->tidptr); -#endif + LIN_SDT_PROBE1(emul, linux_set_tid_address, entry, args->tidptr); /* find the emuldata */ em = em_find(td->td_proc, EMUL_DOLOCK); @@ -342,6 +438,8 @@ linux_set_tid_address(struct thread *td, struct linux_set_tid_address_args *args td->td_retval[0] = td->td_proc->p_pid; EMUL_UNLOCK(&emul_lock); + + LIN_SDT_PROBE0(emul, linux_set_tid_address, return); return 0; } @@ -351,6 +449,8 @@ linux_kill_threads(struct thread *td, int sig) struct linux_emuldata *em, *td_em, *tmp_em; struct proc *sp; + LIN_SDT_PROBE2(emul, linux_kill_threads, entry, td, sig); + td_em = em_find(td->td_proc, EMUL_DONTLOCK); KASSERT(td_em != NULL, ("linux_kill_threads: emuldata not found.\n")); @@ -364,9 +464,10 @@ linux_kill_threads(struct thread *td, int sig) if ((sp->p_flag & P_WEXIT) == 0) kern_psignal(sp, sig); PROC_UNLOCK(sp); -#ifdef DEBUG - printf(LMSG("linux_kill_threads: kill PID %d\n"), em->pid); -#endif + + LIN_SDT_PROBE1(emul, linux_kill_threads, kill, em->pid); } EMUL_SHARED_RUNLOCK(&emul_shared_lock); + + LIN_SDT_PROBE0(emul, linux_kill_threads, return); } diff --git a/sys/compat/linux/linux_emul.h b/sys/compat/linux/linux_emul.h index 3acde64b68ce..f409a34da472 100644 --- a/sys/compat/linux/linux_emul.h +++ b/sys/compat/linux/linux_emul.h @@ -64,13 +64,42 @@ struct linux_emuldata { struct linux_emuldata *em_find(struct proc *, int locked); -#define EMUL_LOCK(l) mtx_lock(l) -#define EMUL_UNLOCK(l) mtx_unlock(l) +/* + * DTrace probes for locks should be fired after locking and before releasing + * to prevent races (to provide data/function stability in dtrace, see the + * output of "dtrace -v ..." and the corresponding dtrace docs). + */ +#define EMUL_LOCK(l) do { \ + mtx_lock(l); \ + LIN_SDT_PROBE1(locks, emul_lock, \ + locked, l); \ + } while (0) +#define EMUL_UNLOCK(l) do { \ + LIN_SDT_PROBE1(locks, emul_lock, \ + unlock, l); \ + mtx_unlock(l); \ + } while (0) -#define EMUL_SHARED_RLOCK(l) sx_slock(l) -#define EMUL_SHARED_RUNLOCK(l) sx_sunlock(l) -#define EMUL_SHARED_WLOCK(l) sx_xlock(l) -#define EMUL_SHARED_WUNLOCK(l) sx_xunlock(l) +#define EMUL_SHARED_RLOCK(l) do { \ + sx_slock(l); \ + LIN_SDT_PROBE1(locks, emul_shared_rlock, \ + locked, l); \ + } while (0) +#define EMUL_SHARED_RUNLOCK(l) do { \ + LIN_SDT_PROBE1(locks, emul_shared_rlock, \ + unlock, l); \ + sx_sunlock(l); \ + } while (0) +#define EMUL_SHARED_WLOCK(l) do { \ + sx_xlock(l); \ + LIN_SDT_PROBE1(locks, emul_shared_wlock, \ + locked, l); \ + } while (0) +#define EMUL_SHARED_WUNLOCK(l) do { \ + LIN_SDT_PROBE1(locks, emul_shared_wlock, \ + unlock, l); \ + sx_xunlock(l); \ + } while (0) /* for em_find use */ #define EMUL_DOLOCK 1 diff --git a/sys/compat/linux/linux_fork.c b/sys/compat/linux/linux_fork.c index 5d2ce5bdb0cb..f71063ca3a91 100644 --- a/sys/compat/linux/linux_fork.c +++ b/sys/compat/linux/linux_fork.c @@ -30,6 +30,7 @@ __FBSDID("$FreeBSD$"); #include "opt_compat.h" +#include "opt_kdtrace.h" #include #include @@ -38,6 +39,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -48,9 +50,17 @@ __FBSDID("$FreeBSD$"); #include #include #endif +#include #include #include +/* DTrace init */ +LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE); + +/* Linuxulator-global DTrace probes */ +LIN_SDT_PROBE_DECLARE(locks, emul_lock, locked); +LIN_SDT_PROBE_DECLARE(locks, emul_lock, unlock); + int linux_fork(struct thread *td, struct linux_fork_args *args) diff --git a/sys/compat/linux/linux_futex.c b/sys/compat/linux/linux_futex.c index 44d68f4260e2..c87fd00521c3 100644 --- a/sys/compat/linux/linux_futex.c +++ b/sys/compat/linux/linux_futex.c @@ -38,6 +38,7 @@ __KERNEL_RCSID(1, "$NetBSD: linux_futex.c,v 1.7 2006/07/24 19:01:49 manu Exp $") #endif #include "opt_compat.h" +#include "opt_kdtrace.h" #include #include @@ -51,6 +52,7 @@ __KERNEL_RCSID(1, "$NetBSD: linux_futex.c,v 1.7 2006/07/24 19:01:49 manu Exp $") #include #include #include +#include #include #include @@ -61,10 +63,131 @@ __KERNEL_RCSID(1, "$NetBSD: linux_futex.c,v 1.7 2006/07/24 19:01:49 manu Exp $") #include #include #endif +#include #include #include #include +/* DTrace init */ +LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE); + +/* Linuxulator-global DTrace probes */ +LIN_SDT_PROBE_DECLARE(locks, emul_lock, locked); +LIN_SDT_PROBE_DECLARE(locks, emul_lock, unlock); + +/** + * Futex part for the special DTrace module "locks". + */ +LIN_SDT_PROBE_DEFINE1(locks, futex_mtx, locked, "struct mtx *"); +LIN_SDT_PROBE_DEFINE1(locks, futex_mtx, unlock, "struct mtx *"); + +/** + * Per futex probes. + */ +LIN_SDT_PROBE_DEFINE1(futex, futex, create, "struct sx *"); +LIN_SDT_PROBE_DEFINE1(futex, futex, destroy, "struct sx *"); + +/** + * DTrace probes in this module. + */ +LIN_SDT_PROBE_DEFINE2(futex, futex_put, entry, "struct futex *", + "struct waiting_proc *"); +LIN_SDT_PROBE_DEFINE3(futex, futex_put, destroy, "uint32_t *", "uint32_t", + "int"); +LIN_SDT_PROBE_DEFINE3(futex, futex_put, unlock, "uint32_t *", "uint32_t", + "int"); +LIN_SDT_PROBE_DEFINE0(futex, futex_put, return); +LIN_SDT_PROBE_DEFINE3(futex, futex_get0, entry, "uint32_t *", "struct futex **", + "uint32_t"); +LIN_SDT_PROBE_DEFINE1(futex, futex_get0, umtx_key_get_error, "int"); +LIN_SDT_PROBE_DEFINE3(futex, futex_get0, shared, "uint32_t *", "uint32_t", + "int"); +LIN_SDT_PROBE_DEFINE1(futex, futex_get0, null, "uint32_t *"); +LIN_SDT_PROBE_DEFINE3(futex, futex_get0, new, "uint32_t *", "uint32_t", "int"); +LIN_SDT_PROBE_DEFINE1(futex, futex_get0, return, "int"); +LIN_SDT_PROBE_DEFINE3(futex, futex_get, entry, "uint32_t *", + "struct waiting_proc **", "struct futex **"); +LIN_SDT_PROBE_DEFINE0(futex, futex_get, error); +LIN_SDT_PROBE_DEFINE1(futex, futex_get, return, "int"); +LIN_SDT_PROBE_DEFINE3(futex, futex_sleep, entry, "struct futex *", + "struct waiting_proc **", "int"); +LIN_SDT_PROBE_DEFINE5(futex, futex_sleep, requeue_error, "int", "uint32_t *", + "struct waiting_proc *", "uint32_t *", "uint32_t"); +LIN_SDT_PROBE_DEFINE3(futex, futex_sleep, sleep_error, "int", "uint32_t *", + "struct waiting_proc *"); +LIN_SDT_PROBE_DEFINE1(futex, futex_sleep, return, "int"); +LIN_SDT_PROBE_DEFINE3(futex, futex_wake, entry, "struct futex *", "int", + "uint32_t"); +LIN_SDT_PROBE_DEFINE3(futex, futex_wake, iterate, "uint32_t", + "struct waiting_proc *", "uin32_t"); +LIN_SDT_PROBE_DEFINE1(futex, futex_wake, wakeup, "struct waiting_proc *"); +LIN_SDT_PROBE_DEFINE1(futex, futex_wake, return, "int"); +LIN_SDT_PROBE_DEFINE4(futex, futex_requeue, entry, "struct futex *", "int", + "struct futex *", "int"); +LIN_SDT_PROBE_DEFINE1(futex, futex_requeue, wakeup, "struct waiting_proc *"); +LIN_SDT_PROBE_DEFINE3(futex, futex_requeue, requeue, "uint32_t *", + "struct waiting_proc *", "uint32_t"); +LIN_SDT_PROBE_DEFINE1(futex, futex_requeue, return, "int"); +LIN_SDT_PROBE_DEFINE4(futex, futex_wait, entry, "struct futex *", + "struct waiting_proc **", "struct l_timespec *", "uint32_t"); +LIN_SDT_PROBE_DEFINE1(futex, futex_wait, copyin_error, "int"); +LIN_SDT_PROBE_DEFINE1(futex, futex_wait, itimerfix_error, "int"); +LIN_SDT_PROBE_DEFINE1(futex, futex_wait, sleep_error, "int"); +LIN_SDT_PROBE_DEFINE1(futex, futex_wait, return, "int"); +LIN_SDT_PROBE_DEFINE3(futex, futex_atomic_op, entry, "struct thread *", + "int", "uint32_t"); +LIN_SDT_PROBE_DEFINE4(futex, futex_atomic_op, decoded_op, "int", "int", "int", + "int"); +LIN_SDT_PROBE_DEFINE0(futex, futex_atomic_op, missing_access_check); +LIN_SDT_PROBE_DEFINE1(futex, futex_atomic_op, unimplemented_op, "int"); +LIN_SDT_PROBE_DEFINE1(futex, futex_atomic_op, unimplemented_cmp, "int"); +LIN_SDT_PROBE_DEFINE1(futex, futex_atomic_op, return, "int"); +LIN_SDT_PROBE_DEFINE2(futex, linux_sys_futex, entry, "struct thread *", + "struct linux_sys_futex_args *"); +LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_clockswitch); +LIN_SDT_PROBE_DEFINE1(futex, linux_sys_futex, copyin_error, "int"); +LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, invalid_cmp_requeue_use); +LIN_SDT_PROBE_DEFINE3(futex, linux_sys_futex, debug_wait, "uint32_t *", + "uint32_t", "uint32_t"); +LIN_SDT_PROBE_DEFINE4(futex, linux_sys_futex, debug_wait_value_neq, + "uint32_t *", "uint32_t", "int", "uint32_t"); +LIN_SDT_PROBE_DEFINE3(futex, linux_sys_futex, debug_wake, "uint32_t *", + "uint32_t", "uint32_t"); +LIN_SDT_PROBE_DEFINE5(futex, linux_sys_futex, debug_cmp_requeue, "uint32_t *", + "uint32_t", "uint32_t", "uint32_t *", "struct l_timespec *"); +LIN_SDT_PROBE_DEFINE2(futex, linux_sys_futex, debug_cmp_requeue_value_neq, + "uint32_t", "int"); +LIN_SDT_PROBE_DEFINE5(futex, linux_sys_futex, debug_wake_op, "uint32_t *", + "int", "uint32_t", "uint32_t *", "uint32_t"); +LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unhandled_efault); +LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_lock_pi); +LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_unlock_pi); +LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_trylock_pi); +LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, deprecated_requeue); +LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_wait_requeue_pi); +LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_cmp_requeue_pi); +LIN_SDT_PROBE_DEFINE1(futex, linux_sys_futex, unknown_operation, "int"); +LIN_SDT_PROBE_DEFINE1(futex, linux_sys_futex, return, "int"); +LIN_SDT_PROBE_DEFINE2(futex, linux_set_robust_list, entry, "struct thread *", + "struct linux_set_robust_list_args *"); +LIN_SDT_PROBE_DEFINE0(futex, linux_set_robust_list, size_error); +LIN_SDT_PROBE_DEFINE1(futex, linux_set_robust_list, return, "int"); +LIN_SDT_PROBE_DEFINE2(futex, linux_get_robust_list, entry, "struct thread *", + "struct linux_get_robust_list_args *"); +LIN_SDT_PROBE_DEFINE1(futex, linux_get_robust_list, copyout_error, "int"); +LIN_SDT_PROBE_DEFINE1(futex, linux_get_robust_list, return, "int"); +LIN_SDT_PROBE_DEFINE3(futex, handle_futex_death, entry, "struct proc *", + "uint32_t *", "int"); +LIN_SDT_PROBE_DEFINE1(futex, handle_futex_death, copyin_error, "int"); +LIN_SDT_PROBE_DEFINE1(futex, handle_futex_death, return, "int"); +LIN_SDT_PROBE_DEFINE3(futex, fetch_robust_entry, entry, + "struct linux_robust_list **", "struct linux_robust_list **", "int *"); +LIN_SDT_PROBE_DEFINE1(futex, fetch_robust_entry, copyin_error, "int"); +LIN_SDT_PROBE_DEFINE1(futex, fetch_robust_entry, return, "int"); +LIN_SDT_PROBE_DEFINE1(futex, release_futexes, entry, "struct proc *"); +LIN_SDT_PROBE_DEFINE1(futex, release_futexes, copyin_error, "int"); +LIN_SDT_PROBE_DEFINE0(futex, release_futexes, return); + static MALLOC_DEFINE(M_FUTEX, "futex", "Linux futexes"); static MALLOC_DEFINE(M_FUTEX_WP, "futex wp", "Linux futexes wp"); @@ -90,13 +213,30 @@ struct futex_list futex_list; #define FUTEX_LOCK(f) sx_xlock(&(f)->f_lck) #define FUTEX_UNLOCK(f) sx_xunlock(&(f)->f_lck) -#define FUTEX_INIT(f) sx_init_flags(&(f)->f_lck, "ftlk", SX_DUPOK) -#define FUTEX_DESTROY(f) sx_destroy(&(f)->f_lck) +#define FUTEX_INIT(f) do { \ + sx_init_flags(&(f)->f_lck, "ftlk", \ + SX_DUPOK); \ + LIN_SDT_PROBE1(futex, futex, create, \ + &(f)->f_lck); \ + } while (0) +#define FUTEX_DESTROY(f) do { \ + LIN_SDT_PROBE1(futex, futex, destroy, \ + &(f)->f_lck); \ + sx_destroy(&(f)->f_lck); \ + } while (0) #define FUTEX_ASSERT_LOCKED(f) sx_assert(&(f)->f_lck, SA_XLOCKED) struct mtx futex_mtx; /* protects the futex list */ -#define FUTEXES_LOCK mtx_lock(&futex_mtx) -#define FUTEXES_UNLOCK mtx_unlock(&futex_mtx) +#define FUTEXES_LOCK do { \ + mtx_lock(&futex_mtx); \ + LIN_SDT_PROBE1(locks, futex_mtx, \ + locked, &futex_mtx); \ + } while (0) +#define FUTEXES_UNLOCK do { \ + LIN_SDT_PROBE1(locks, futex_mtx, \ + unlock, &futex_mtx); \ + mtx_unlock(&futex_mtx); \ + } while (0) /* flags for futex_get() */ #define FUTEX_CREATE_WP 0x1 /* create waiting_proc */ @@ -123,6 +263,7 @@ int futex_xorl(int oparg, uint32_t *uaddr, int *oldval); static void futex_put(struct futex *f, struct waiting_proc *wp) { + LIN_SDT_PROBE2(futex, futex_put, entry, f, wp); FUTEX_ASSERT_LOCKED(f); if (wp != NULL) { @@ -137,18 +278,26 @@ futex_put(struct futex *f, struct waiting_proc *wp) FUTEXES_UNLOCK; FUTEX_UNLOCK(f); + LIN_SDT_PROBE3(futex, futex_put, destroy, f->f_uaddr, + f->f_refcount, f->f_key.shared); LINUX_CTR3(sys_futex, "futex_put destroy uaddr %p ref %d " "shared %d", f->f_uaddr, f->f_refcount, f->f_key.shared); umtx_key_release(&f->f_key); FUTEX_DESTROY(f); free(f, M_FUTEX); + + LIN_SDT_PROBE0(futex, futex_put, return); return; } + LIN_SDT_PROBE3(futex, futex_put, unlock, f->f_uaddr, f->f_refcount, + f->f_key.shared); LINUX_CTR3(sys_futex, "futex_put uaddr %p ref %d shared %d", f->f_uaddr, f->f_refcount, f->f_key.shared); FUTEXES_UNLOCK; FUTEX_UNLOCK(f); + + LIN_SDT_PROBE0(futex, futex_put, return); } static int @@ -158,12 +307,17 @@ futex_get0(uint32_t *uaddr, struct futex **newf, uint32_t flags) struct umtx_key key; int error; + LIN_SDT_PROBE3(futex, futex_get0, entry, uaddr, newf, flags); + *newf = tmpf = NULL; error = umtx_key_get(uaddr, TYPE_FUTEX, (flags & FUTEX_SHARED) ? AUTO_SHARE : THREAD_SHARE, &key); - if (error) + if (error) { + LIN_SDT_PROBE1(futex, futex_get0, umtx_key_get_error, error); + LIN_SDT_PROBE1(futex, futex_get0, return, error); return (error); + } retry: FUTEXES_LOCK; LIST_FOREACH(f, &futex_list, f_list) { @@ -176,6 +330,9 @@ futex_get0(uint32_t *uaddr, struct futex **newf, uint32_t flags) if (flags & FUTEX_DONTEXISTS) { FUTEXES_UNLOCK; umtx_key_release(&key); + + LIN_SDT_PROBE1(futex, futex_get0, return, + EINVAL); return (EINVAL); } @@ -189,8 +346,12 @@ futex_get0(uint32_t *uaddr, struct futex **newf, uint32_t flags) FUTEX_LOCK(f); *newf = f; + LIN_SDT_PROBE3(futex, futex_get0, shared, uaddr, + f->f_refcount, f->f_key.shared); LINUX_CTR3(sys_futex, "futex_get uaddr %p ref %d shared %d", uaddr, f->f_refcount, f->f_key.shared); + + LIN_SDT_PROBE1(futex, futex_get0, return, 0); return (0); } } @@ -198,7 +359,10 @@ futex_get0(uint32_t *uaddr, struct futex **newf, uint32_t flags) if (flags & FUTEX_DONTCREATE) { FUTEXES_UNLOCK; umtx_key_release(&key); + LIN_SDT_PROBE1(futex, futex_get0, null, uaddr); LINUX_CTR1(sys_futex, "futex_get uaddr %p null", uaddr); + + LIN_SDT_PROBE1(futex, futex_get0, return, 0); return (0); } @@ -223,9 +387,13 @@ futex_get0(uint32_t *uaddr, struct futex **newf, uint32_t flags) LIST_INSERT_HEAD(&futex_list, tmpf, f_list); FUTEXES_UNLOCK; + LIN_SDT_PROBE3(futex, futex_get0, new, uaddr, tmpf->f_refcount, + tmpf->f_key.shared); LINUX_CTR3(sys_futex, "futex_get uaddr %p ref %d shared %d new", uaddr, tmpf->f_refcount, tmpf->f_key.shared); *newf = tmpf; + + LIN_SDT_PROBE1(futex, futex_get0, return, 0); return (0); } @@ -235,14 +403,20 @@ futex_get(uint32_t *uaddr, struct waiting_proc **wp, struct futex **f, { int error; + LIN_SDT_PROBE3(futex, futex_get, entry, uaddr, wp, f); + if (flags & FUTEX_CREATE_WP) { *wp = malloc(sizeof(struct waiting_proc), M_FUTEX_WP, M_WAITOK); (*wp)->wp_flags = 0; } error = futex_get0(uaddr, f, flags); if (error) { + LIN_SDT_PROBE0(futex, futex_get, error); + if (flags & FUTEX_CREATE_WP) free(*wp, M_FUTEX_WP); + + LIN_SDT_PROBE1(futex, futex_get, return, error); return (error); } if (flags & FUTEX_CREATE_WP) { @@ -250,6 +424,7 @@ futex_get(uint32_t *uaddr, struct waiting_proc **wp, struct futex **f, (*wp)->wp_futex = *f; } + LIN_SDT_PROBE1(futex, futex_get, return, error); return (error); } @@ -259,23 +434,38 @@ futex_sleep(struct futex *f, struct waiting_proc *wp, int timeout) int error; FUTEX_ASSERT_LOCKED(f); + LIN_SDT_PROBE3(futex, futex_sleep, entry, f, wp, timeout); LINUX_CTR4(sys_futex, "futex_sleep enter uaddr %p wp %p timo %d ref %d", f->f_uaddr, wp, timeout, f->f_refcount); error = sx_sleep(wp, &f->f_lck, PCATCH, "futex", timeout); if (wp->wp_flags & FUTEX_WP_REQUEUED) { KASSERT(f != wp->wp_futex, ("futex != wp_futex")); - LINUX_CTR5(sys_futex, "futex_sleep out error %d uaddr %p w" + + if (error) { + LIN_SDT_PROBE5(futex, futex_sleep, requeue_error, error, + f->f_uaddr, wp, wp->wp_futex->f_uaddr, + wp->wp_futex->f_refcount); + } + + LINUX_CTR5(sys_futex, "futex_sleep out error %d uaddr %p wp" " %p requeued uaddr %p ref %d", error, f->f_uaddr, wp, wp->wp_futex->f_uaddr, wp->wp_futex->f_refcount); futex_put(f, NULL); f = wp->wp_futex; FUTEX_LOCK(f); - } else + } else { + if (error) { + LIN_SDT_PROBE3(futex, futex_sleep, sleep_error, error, + f->f_uaddr, wp); + } LINUX_CTR3(sys_futex, "futex_sleep out error %d uaddr %p wp %p", error, f->f_uaddr, wp); + } futex_put(f, wp); + + LIN_SDT_PROBE1(futex, futex_sleep, return, error); return (error); } @@ -285,11 +475,17 @@ futex_wake(struct futex *f, int n, uint32_t bitset) struct waiting_proc *wp, *wpt; int count = 0; - if (bitset == 0) + LIN_SDT_PROBE3(futex, futex_wake, entry, f, n, bitset); + + if (bitset == 0) { + LIN_SDT_PROBE1(futex, futex_wake, return, EINVAL); return (EINVAL); + } FUTEX_ASSERT_LOCKED(f); TAILQ_FOREACH_SAFE(wp, &f->f_waiting_proc, wp_list, wpt) { + LIN_SDT_PROBE3(futex, futex_wake, iterate, f->f_uaddr, wp, + f->f_refcount); LINUX_CTR3(sys_futex, "futex_wake uaddr %p wp %p ref %d", f->f_uaddr, wp, f->f_refcount); /* @@ -301,11 +497,13 @@ futex_wake(struct futex *f, int n, uint32_t bitset) wp->wp_flags |= FUTEX_WP_REMOVED; TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list); + LIN_SDT_PROBE1(futex, futex_wake, wakeup, wp); wakeup_one(wp); if (++count == n) break; } + LIN_SDT_PROBE1(futex, futex_wake, return, count); return (count); } @@ -315,6 +513,8 @@ futex_requeue(struct futex *f, int n, struct futex *f2, int n2) struct waiting_proc *wp, *wpt; int count = 0; + LIN_SDT_PROBE4(futex, futex_requeue, entry, f, n, f2, n2); + FUTEX_ASSERT_LOCKED(f); FUTEX_ASSERT_LOCKED(f2); @@ -324,8 +524,11 @@ futex_requeue(struct futex *f, int n, struct futex *f2, int n2) f->f_uaddr, wp); wp->wp_flags |= FUTEX_WP_REMOVED; TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list); + LIN_SDT_PROBE1(futex, futex_requeue, wakeup, wp); wakeup_one(wp); } else { + LIN_SDT_PROBE3(futex, futex_requeue, requeue, + f->f_uaddr, wp, f2->f_uaddr); LINUX_CTR3(sys_futex, "futex_requeue uaddr %p wp %p to %p", f->f_uaddr, wp, f2->f_uaddr); wp->wp_flags |= FUTEX_WP_REQUEUED; @@ -347,6 +550,7 @@ futex_requeue(struct futex *f, int n, struct futex *f2, int n2) } } + LIN_SDT_PROBE1(futex, futex_requeue, return, count); return (count); } @@ -359,26 +563,42 @@ futex_wait(struct futex *f, struct waiting_proc *wp, struct l_timespec *ts, int timeout_hz; int error; - if (bitset == 0) + LIN_SDT_PROBE4(futex, futex_wait, entry, f, wp, ts, bitset); + + if (bitset == 0) { + LIN_SDT_PROBE1(futex, futex_wait, return, EINVAL); return (EINVAL); + } + f->f_bitset = bitset; if (ts != NULL) { error = copyin(ts, &timeout, sizeof(timeout)); - if (error) + if (error) { + LIN_SDT_PROBE1(futex, futex_wait, copyin_error, error); + LIN_SDT_PROBE1(futex, futex_wait, return, error); return (error); + } TIMESPEC_TO_TIMEVAL(&tv, &timeout); error = itimerfix(&tv); - if (error) + if (error) { + LIN_SDT_PROBE1(futex, futex_wait, itimerfix_error, + error); + LIN_SDT_PROBE1(futex, futex_wait, return, error); return (error); + } timeout_hz = tvtohz(&tv); } else timeout_hz = 0; error = futex_sleep(f, wp, timeout_hz); + if (error) { + LIN_SDT_PROBE1(futex, futex_wait, sleep_error, error); + } if (error == EWOULDBLOCK) error = ETIMEDOUT; + LIN_SDT_PROBE1(futex, futex_wait, return, error); return (error); } @@ -391,16 +611,16 @@ futex_atomic_op(struct thread *td, int encoded_op, uint32_t *uaddr) int cmparg = (encoded_op << 20) >> 20; int oldval = 0, ret; + LIN_SDT_PROBE3(futex, futex_atomic_op, entry, td, encoded_op, uaddr); + if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) oparg = 1 << oparg; -#ifdef DEBUG - if (ldebug(sys_futex)) - printf("futex_atomic_op: op = %d, cmp = %d, oparg = %x, " - "cmparg = %x, uaddr = %p\n", - op, cmp, oparg, cmparg, uaddr); -#endif + LIN_SDT_PROBE4(futex, futex_atomic_op, decoded_op, op, cmp, oparg, + cmparg); + /* XXX: Linux verifies access here and returns EFAULT */ + LIN_SDT_PROBE0(futex, futex_atomic_op, missing_access_check); switch (op) { case FUTEX_OP_SET: @@ -419,29 +639,42 @@ futex_atomic_op(struct thread *td, int encoded_op, uint32_t *uaddr) ret = futex_xorl(oparg, uaddr, &oldval); break; default: + LIN_SDT_PROBE1(futex, futex_atomic_op, unimplemented_op, op); ret = -ENOSYS; break; } - if (ret) + if (ret) { + LIN_SDT_PROBE1(futex, futex_atomic_op, return, ret); return (ret); + } switch (cmp) { case FUTEX_OP_CMP_EQ: - return (oldval == cmparg); + ret = (oldval == cmparg); + break; case FUTEX_OP_CMP_NE: - return (oldval != cmparg); + ret = (oldval != cmparg); + break; case FUTEX_OP_CMP_LT: - return (oldval < cmparg); + ret = (oldval < cmparg); + break; case FUTEX_OP_CMP_GE: - return (oldval >= cmparg); + ret = (oldval >= cmparg); + break; case FUTEX_OP_CMP_LE: - return (oldval <= cmparg); + ret = (oldval <= cmparg); + break; case FUTEX_OP_CMP_GT: - return (oldval > cmparg); + ret = (oldval > cmparg); + break; default: - return (-ENOSYS); + LIN_SDT_PROBE1(futex, futex_atomic_op, unimplemented_cmp, cmp); + ret = -ENOSYS; } + + LIN_SDT_PROBE1(futex, futex_atomic_op, return, ret); + return (ret); } int @@ -454,6 +687,8 @@ linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args) int error; uint32_t flags; + LIN_SDT_PROBE2(futex, linux_sys_futex, entry, td, args); + if (args->op & LINUX_FUTEX_PRIVATE_FLAG) { flags = 0; args->op &= ~LINUX_FUTEX_PRIVATE_FLAG; @@ -469,8 +704,12 @@ linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args) clockrt = args->op & LINUX_FUTEX_CLOCK_REALTIME; args->op = args->op & ~LINUX_FUTEX_CLOCK_REALTIME; if (clockrt && args->op != LINUX_FUTEX_WAIT_BITSET && - args->op != LINUX_FUTEX_WAIT_REQUEUE_PI) + args->op != LINUX_FUTEX_WAIT_REQUEUE_PI) { + LIN_SDT_PROBE0(futex, linux_sys_futex, + unimplemented_clockswitch); + LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS); return (ENOSYS); + } error = 0; f = f2 = NULL; @@ -481,31 +720,40 @@ linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args) /* FALLTHROUGH */ case LINUX_FUTEX_WAIT_BITSET: - + LIN_SDT_PROBE3(futex, linux_sys_futex, debug_wait, args->uaddr, + args->val, args->val3); LINUX_CTR3(sys_futex, "WAIT uaddr %p val %d val3 %d", args->uaddr, args->val, args->val3); -#ifdef DEBUG - if (ldebug(sys_futex)) - printf(ARGS(sys_futex, - "futex_wait uaddr %p val %d val3 %d"), - args->uaddr, args->val, args->val3); -#endif + error = futex_get(args->uaddr, &wp, &f, flags | FUTEX_CREATE_WP); - if (error) + if (error) { + LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); return (error); + } + error = copyin(args->uaddr, &val, sizeof(val)); if (error) { + LIN_SDT_PROBE1(futex, linux_sys_futex, copyin_error, + error); LINUX_CTR1(sys_futex, "WAIT copyin failed %d", error); futex_put(f, wp); + + LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); return (error); } if (val != args->val) { + LIN_SDT_PROBE4(futex, linux_sys_futex, + debug_wait_value_neq, args->uaddr, args->val, val, + args->val3); LINUX_CTR4(sys_futex, "WAIT uaddr %p val %d != uval %d val3 %d", args->uaddr, args->val, val, args->val3); futex_put(f, wp); + + LIN_SDT_PROBE1(futex, linux_sys_futex, return, + EWOULDBLOCK); return (EWOULDBLOCK); } @@ -517,21 +765,22 @@ linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args) /* FALLTHROUGH */ case LINUX_FUTEX_WAKE_BITSET: - + LIN_SDT_PROBE3(futex, linux_sys_futex, debug_wake, args->uaddr, + args->val, args->val3); LINUX_CTR3(sys_futex, "WAKE uaddr %p val % d val3 %d", args->uaddr, args->val, args->val3); -#ifdef DEBUG - if (ldebug(sys_futex)) - printf(ARGS(sys_futex, "futex_wake uaddr %p val %d val3 %d"), - args->uaddr, args->val, args->val3); -#endif error = futex_get(args->uaddr, NULL, &f, flags | FUTEX_DONTCREATE); - if (error) + if (error) { + LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); return (error); + } + if (f == NULL) { td->td_retval[0] = 0; + + LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); return (error); } td->td_retval[0] = futex_wake(f, args->val, args->val3); @@ -539,29 +788,30 @@ linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args) break; case LINUX_FUTEX_CMP_REQUEUE: - + LIN_SDT_PROBE5(futex, linux_sys_futex, debug_cmp_requeue, + args->uaddr, args->val, args->val3, args->uaddr2, + args->timeout); LINUX_CTR5(sys_futex, "CMP_REQUEUE uaddr %p " "val %d val3 %d uaddr2 %p val2 %d", args->uaddr, args->val, args->val3, args->uaddr2, (int)(unsigned long)args->timeout); -#ifdef DEBUG - if (ldebug(sys_futex)) - printf(ARGS(sys_futex, "futex_cmp_requeue uaddr %p " - "val %d val3 %d uaddr2 %p val2 %d"), - args->uaddr, args->val, args->val3, args->uaddr2, - (int)(unsigned long)args->timeout); -#endif - /* * Linux allows this, we would not, it is an incorrect * usage of declared ABI, so return EINVAL. */ - if (args->uaddr == args->uaddr2) + if (args->uaddr == args->uaddr2) { + LIN_SDT_PROBE0(futex, linux_sys_futex, + invalid_cmp_requeue_use); + LIN_SDT_PROBE1(futex, linux_sys_futex, return, EINVAL); return (EINVAL); + } + error = futex_get(args->uaddr, NULL, &f, flags); - if (error) + if (error) { + LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); return (error); + } /* * To avoid deadlocks return EINVAL if second futex @@ -574,21 +824,31 @@ linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args) flags | FUTEX_DONTEXISTS); if (error) { futex_put(f, NULL); + + LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); return (error); } error = copyin(args->uaddr, &val, sizeof(val)); if (error) { + LIN_SDT_PROBE1(futex, linux_sys_futex, copyin_error, + error); LINUX_CTR1(sys_futex, "CMP_REQUEUE copyin failed %d", error); futex_put(f2, NULL); futex_put(f, NULL); + + LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); return (error); } if (val != args->val3) { + LIN_SDT_PROBE2(futex, linux_sys_futex, + debug_cmp_requeue_value_neq, args->val, val); LINUX_CTR2(sys_futex, "CMP_REQUEUE val %d != uval %d", args->val, val); futex_put(f2, NULL); futex_put(f, NULL); + + LIN_SDT_PROBE1(futex, linux_sys_futex, return, EAGAIN); return (EAGAIN); } @@ -599,26 +859,25 @@ linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args) break; case LINUX_FUTEX_WAKE_OP: - + LIN_SDT_PROBE5(futex, linux_sys_futex, debug_wake_op, + args->uaddr, args->op, args->val, args->uaddr2, args->val3); LINUX_CTR5(sys_futex, "WAKE_OP " "uaddr %p op %d val %x uaddr2 %p val3 %x", args->uaddr, args->op, args->val, args->uaddr2, args->val3); -#ifdef DEBUG - if (ldebug(sys_futex)) - printf(ARGS(sys_futex, "futex_wake_op " - "uaddr %p op %d val %x uaddr2 %p val3 %x"), - args->uaddr, args->op, args->val, - args->uaddr2, args->val3); -#endif error = futex_get(args->uaddr, NULL, &f, flags); - if (error) + if (error) { + LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); return (error); + } + if (args->uaddr != args->uaddr2) error = futex_get(args->uaddr2, NULL, &f2, flags); if (error) { futex_put(f, NULL); + + LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); return (error); } @@ -634,11 +893,19 @@ linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args) if (f2 != NULL) futex_put(f2, NULL); futex_put(f, NULL); + + LIN_SDT_PROBE1(futex, linux_sys_futex, return, + -op_ret); return (-op_ret); + } else { + LIN_SDT_PROBE0(futex, linux_sys_futex, + unhandled_efault); } if (f2 != NULL) futex_put(f2, NULL); futex_put(f, NULL); + + LIN_SDT_PROBE1(futex, linux_sys_futex, return, EFAULT); return (EFAULT); } @@ -666,6 +933,8 @@ linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args) linux_msg(td, "linux_sys_futex: " "op LINUX_FUTEX_LOCK_PI not implemented\n"); + LIN_SDT_PROBE0(futex, linux_sys_futex, unimplemented_lock_pi); + LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS); return (ENOSYS); case LINUX_FUTEX_UNLOCK_PI: @@ -673,6 +942,8 @@ linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args) linux_msg(td, "linux_sys_futex: " "op LINUX_FUTEX_UNLOCK_PI not implemented\n"); + LIN_SDT_PROBE0(futex, linux_sys_futex, unimplemented_unlock_pi); + LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS); return (ENOSYS); case LINUX_FUTEX_TRYLOCK_PI: @@ -680,6 +951,9 @@ linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args) linux_msg(td, "linux_sys_futex: " "op LINUX_FUTEX_TRYLOCK_PI not implemented\n"); + LIN_SDT_PROBE0(futex, linux_sys_futex, + unimplemented_trylock_pi); + LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS); return (ENOSYS); case LINUX_FUTEX_REQUEUE: @@ -696,7 +970,11 @@ linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args) "linux_sys_futex: " "unsupported futex_requeue op\n"); em->flags |= LINUX_XDEPR_REQUEUEOP; + LIN_SDT_PROBE0(futex, linux_sys_futex, + deprecated_requeue); } + + LIN_SDT_PROBE1(futex, linux_sys_futex, return, EINVAL); return (EINVAL); case LINUX_FUTEX_WAIT_REQUEUE_PI: @@ -704,6 +982,9 @@ linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args) linux_msg(td, "linux_sys_futex: " "op FUTEX_WAIT_REQUEUE_PI not implemented\n"); + LIN_SDT_PROBE0(futex, linux_sys_futex, + unimplemented_wait_requeue_pi); + LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS); return (ENOSYS); case LINUX_FUTEX_CMP_REQUEUE_PI: @@ -711,14 +992,21 @@ linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args) linux_msg(td, "linux_sys_futex: " "op LINUX_FUTEX_CMP_REQUEUE_PI not implemented\n"); + LIN_SDT_PROBE0(futex, linux_sys_futex, + unimplemented_cmp_requeue_pi); + LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS); return (ENOSYS); default: linux_msg(td, "linux_sys_futex: unknown op %d\n", args->op); + LIN_SDT_PROBE1(futex, linux_sys_futex, unknown_operation, + args->op); + LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS); return (ENOSYS); } + LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); return (error); } @@ -727,19 +1015,19 @@ linux_set_robust_list(struct thread *td, struct linux_set_robust_list_args *args { struct linux_emuldata *em; -#ifdef DEBUG - if (ldebug(set_robust_list)) - printf(ARGS(set_robust_list, "head %p len %d"), - args->head, args->len); -#endif + LIN_SDT_PROBE2(futex, linux_set_robust_list, entry, td, args); - if (args->len != sizeof(struct linux_robust_list_head)) + if (args->len != sizeof(struct linux_robust_list_head)) { + LIN_SDT_PROBE0(futex, linux_set_robust_list, size_error); + LIN_SDT_PROBE1(futex, linux_set_robust_list, return, EINVAL); return (EINVAL); + } em = em_find(td->td_proc, EMUL_DOLOCK); em->robust_futexes = args->head; EMUL_UNLOCK(&emul_lock); + LIN_SDT_PROBE1(futex, linux_set_robust_list, return, 0); return (0); } @@ -751,10 +1039,7 @@ linux_get_robust_list(struct thread *td, struct linux_get_robust_list_args *args l_size_t len = sizeof(struct linux_robust_list_head); int error = 0; -#ifdef DEBUG - if (ldebug(get_robust_list)) - printf(ARGS(get_robust_list, "")); -#endif + LIN_SDT_PROBE2(futex, linux_get_robust_list, entry, td, args); if (!args->pid) { em = em_find(td->td_proc, EMUL_DONTLOCK); @@ -763,8 +1048,11 @@ linux_get_robust_list(struct thread *td, struct linux_get_robust_list_args *args struct proc *p; p = pfind(args->pid); - if (p == NULL) + if (p == NULL) { + LIN_SDT_PROBE1(futex, linux_get_robust_list, return, + ESRCH); return (ESRCH); + } em = em_find(p, EMUL_DONTLOCK); /* XXX: ptrace? */ @@ -772,6 +1060,9 @@ linux_get_robust_list(struct thread *td, struct linux_get_robust_list_args *args priv_check(td, PRIV_CRED_SETEUID) || p_candebug(td, p)) { PROC_UNLOCK(p); + + LIN_SDT_PROBE1(futex, linux_get_robust_list, return, + EPERM); return (EPERM); } head = em->robust_futexes; @@ -780,11 +1071,20 @@ linux_get_robust_list(struct thread *td, struct linux_get_robust_list_args *args } error = copyout(&len, args->len, sizeof(l_size_t)); - if (error) + if (error) { + LIN_SDT_PROBE1(futex, linux_get_robust_list, copyout_error, + error); + LIN_SDT_PROBE1(futex, linux_get_robust_list, return, EFAULT); return (EFAULT); + } error = copyout(head, args->head, sizeof(struct linux_robust_list_head)); + if (error) { + LIN_SDT_PROBE1(futex, linux_get_robust_list, copyout_error, + error); + } + LIN_SDT_PROBE1(futex, linux_get_robust_list, return, error); return (error); } @@ -795,15 +1095,24 @@ handle_futex_death(struct proc *p, uint32_t *uaddr, int pi) struct futex *f; int error; + LIN_SDT_PROBE3(futex, handle_futex_death, entry, p, uaddr, pi); + retry: - if (copyin(uaddr, &uval, 4)) + error = copyin(uaddr, &uval, 4); + if (error) { + LIN_SDT_PROBE1(futex, handle_futex_death, copyin_error, error); + LIN_SDT_PROBE1(futex, handle_futex_death, return, EFAULT); return (EFAULT); + } if ((uval & FUTEX_TID_MASK) == p->p_pid) { mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED; nval = casuword32(uaddr, uval, mval); - if (nval == -1) + if (nval == -1) { + LIN_SDT_PROBE1(futex, handle_futex_death, return, + EFAULT); return (EFAULT); + } if (nval != uval) goto retry; @@ -811,8 +1120,11 @@ handle_futex_death(struct proc *p, uint32_t *uaddr, int pi) if (!pi && (uval & FUTEX_WAITERS)) { error = futex_get(uaddr, NULL, &f, FUTEX_DONTCREATE | FUTEX_SHARED); - if (error) + if (error) { + LIN_SDT_PROBE1(futex, handle_futex_death, + return, error); return (error); + } if (f != NULL) { futex_wake(f, 1, FUTEX_BITSET_MATCH_ANY); futex_put(f, NULL); @@ -820,6 +1132,7 @@ handle_futex_death(struct proc *p, uint32_t *uaddr, int pi) } } + LIN_SDT_PROBE1(futex, handle_futex_death, return, 0); return (0); } @@ -828,13 +1141,21 @@ fetch_robust_entry(struct linux_robust_list **entry, struct linux_robust_list **head, int *pi) { l_ulong uentry; + int error; - if (copyin((const void *)head, &uentry, sizeof(l_ulong))) + LIN_SDT_PROBE3(futex, fetch_robust_entry, entry, entry, head, pi); + + error = copyin((const void *)head, &uentry, sizeof(l_ulong)); + if (error) { + LIN_SDT_PROBE1(futex, fetch_robust_entry, copyin_error, error); + LIN_SDT_PROBE1(futex, fetch_robust_entry, return, EFAULT); return (EFAULT); + } *entry = (void *)(uentry & ~1UL); *pi = uentry & 1; + LIN_SDT_PROBE1(futex, fetch_robust_entry, return, 0); return (0); } @@ -847,31 +1168,49 @@ release_futexes(struct proc *p) unsigned int limit = 2048, pi, next_pi, pip; struct linux_emuldata *em; l_long futex_offset; - int rc; + int rc, error; + + LIN_SDT_PROBE1(futex, release_futexes, entry, p); em = em_find(p, EMUL_DONTLOCK); head = em->robust_futexes; - if (head == NULL) + if (head == NULL) { + LIN_SDT_PROBE0(futex, release_futexes, return); return; + } - if (fetch_robust_entry(&entry, PTRIN(&head->list.next), &pi)) + if (fetch_robust_entry(&entry, PTRIN(&head->list.next), &pi)) { + LIN_SDT_PROBE0(futex, release_futexes, return); return; + } - if (copyin(&head->futex_offset, &futex_offset, sizeof(futex_offset))) + error = copyin(&head->futex_offset, &futex_offset, + sizeof(futex_offset)); + if (error) { + LIN_SDT_PROBE1(futex, release_futexes, copyin_error, error); + LIN_SDT_PROBE0(futex, release_futexes, return); return; + } - if (fetch_robust_entry(&pending, PTRIN(&head->pending_list), &pip)) + if (fetch_robust_entry(&pending, PTRIN(&head->pending_list), &pip)) { + LIN_SDT_PROBE0(futex, release_futexes, return); return; + } while (entry != &head->list) { rc = fetch_robust_entry(&next_entry, PTRIN(&entry->next), &next_pi); if (entry != pending) - if (handle_futex_death(p, (uint32_t *)entry + futex_offset, pi)) + if (handle_futex_death(p, + (uint32_t *)entry + futex_offset, pi)) { + LIN_SDT_PROBE0(futex, release_futexes, return); return; - if (rc) + } + if (rc) { + LIN_SDT_PROBE0(futex, release_futexes, return); return; + } entry = next_entry; pi = next_pi; @@ -884,4 +1223,6 @@ release_futexes(struct proc *p) if (pending) handle_futex_death(p, (uint32_t *)pending + futex_offset, pip); + + LIN_SDT_PROBE0(futex, release_futexes, return); } diff --git a/sys/compat/linux/linux_mib.c b/sys/compat/linux/linux_mib.c index 16f9ac3c2d86..08566623f094 100644 --- a/sys/compat/linux/linux_mib.c +++ b/sys/compat/linux/linux_mib.c @@ -29,8 +29,12 @@ #include __FBSDID("$FreeBSD$"); +#include "opt_compat.h" +#include "opt_kdtrace.h" + #include #include +#include #include #include #include @@ -41,15 +45,81 @@ __FBSDID("$FreeBSD$"); #include #include -#include "opt_compat.h" - #ifdef COMPAT_LINUX32 #include #else #include #endif +#include #include +/* DTrace init */ +LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE); + +/** + * DTrace probes in this module. + */ +LIN_SDT_PROBE_DEFINE0(mib, linux_sysctl_osname, entry); +LIN_SDT_PROBE_DEFINE1(mib, linux_sysctl_osname, sysctl_string_error, "int"); +LIN_SDT_PROBE_DEFINE1(mib, linux_sysctl_osname, return, "int"); + +LIN_SDT_PROBE_DEFINE0(mib, linux_sysctl_osrelease, entry); +LIN_SDT_PROBE_DEFINE1(mib, linux_sysctl_osrelease, sysctl_string_error, "int"); +LIN_SDT_PROBE_DEFINE1(mib, linux_sysctl_osrelease, return, "int"); +LIN_SDT_PROBE_DEFINE0(mib, linux_sysctl_oss_version, entry); +LIN_SDT_PROBE_DEFINE1(mib, linux_sysctl_oss_version, sysctl_string_error, + "int"); +LIN_SDT_PROBE_DEFINE1(mib, linux_sysctl_oss_version, return, "int"); +LIN_SDT_PROBE_DEFINE2(mib, linux_map_osrel, entry, "char *", "int *"); +LIN_SDT_PROBE_DEFINE1(mib, linux_map_osrel, return, "int"); +LIN_SDT_PROBE_DEFINE2(mib, linux_get_prison, entry, "struct prison *", + "struct prison **"); +LIN_SDT_PROBE_DEFINE1(mib, linux_get_prison, return, "struct linux_prison *"); +LIN_SDT_PROBE_DEFINE2(mib, linux_alloc_prison, entry, "struct prison *", + "struct linux_prison **"); +LIN_SDT_PROBE_DEFINE1(mib, linux_alloc_prison, return, "int"); +LIN_SDT_PROBE_DEFINE2(mib, linux_prison_create, entry, "void *", "void *"); +LIN_SDT_PROBE_DEFINE1(mib, linux_prison_create, vfs_copyopt_error, "int"); +LIN_SDT_PROBE_DEFINE1(mib, linux_prison_create, return, "int"); +LIN_SDT_PROBE_DEFINE2(mib, linux_prison_check, entry, "void *", "void *"); +LIN_SDT_PROBE_DEFINE1(mib, linux_prison_check, vfs_copyopt_error, "int"); +LIN_SDT_PROBE_DEFINE1(mib, linux_prison_check, vfs_getopt_error, "int"); +LIN_SDT_PROBE_DEFINE1(mib, linux_prison_check, return, "int"); +LIN_SDT_PROBE_DEFINE2(mib, linux_prison_set, entry, "void *", "void *"); +LIN_SDT_PROBE_DEFINE1(mib, linux_prison_set, vfs_copyopt_error, "int"); +LIN_SDT_PROBE_DEFINE1(mib, linux_prison_set, vfs_getopt_error, "int"); +LIN_SDT_PROBE_DEFINE1(mib, linux_prison_set, return, "int"); +LIN_SDT_PROBE_DEFINE2(mib, linux_prison_get, entry, "void *", "void *"); +LIN_SDT_PROBE_DEFINE1(mib, linux_prison_get, vfs_setopt_error, "int"); +LIN_SDT_PROBE_DEFINE1(mib, linux_prison_get, vfs_setopts_error, "int"); +LIN_SDT_PROBE_DEFINE1(mib, linux_prison_get, return, "int"); +LIN_SDT_PROBE_DEFINE1(mib, linux_prison_destructor, entry, "void *"); +LIN_SDT_PROBE_DEFINE0(mib, linux_prison_destructor, return); +LIN_SDT_PROBE_DEFINE0(mib, linux_osd_jail_register, entry); +LIN_SDT_PROBE_DEFINE0(mib, linux_osd_jail_register, return); +LIN_SDT_PROBE_DEFINE0(mib, linux_osd_jail_deregister, entry); +LIN_SDT_PROBE_DEFINE0(mib, linux_osd_jail_deregister, return); +LIN_SDT_PROBE_DEFINE2(mib, linux_get_osname, entry, "struct thread *", + "char *"); +LIN_SDT_PROBE_DEFINE0(mib, linux_get_osname, return); +LIN_SDT_PROBE_DEFINE2(mib, linux_set_osname, entry, "struct thread *", + "char *"); +LIN_SDT_PROBE_DEFINE1(mib, linux_set_osname, return, "int"); +LIN_SDT_PROBE_DEFINE2(mib, linux_get_osrelease, entry, "struct thread *", + "char *"); +LIN_SDT_PROBE_DEFINE0(mib, linux_get_osrelease, return); +LIN_SDT_PROBE_DEFINE1(mib, linux_kernver, entry, "struct thread *"); +LIN_SDT_PROBE_DEFINE1(mib, linux_kernver, return, "int"); +LIN_SDT_PROBE_DEFINE2(mib, linux_set_osrelease, entry, "struct thread *", + "char *"); +LIN_SDT_PROBE_DEFINE1(mib, linux_set_osrelease, return, "int"); +LIN_SDT_PROBE_DEFINE1(mib, linux_get_oss_version, entry, "struct thread *"); +LIN_SDT_PROBE_DEFINE1(mib, linux_get_oss_version, return, "int"); + +LIN_SDT_PROBE_DEFINE2(mib, linux_set_oss_version, entry, "struct thread *", + "int"); +LIN_SDT_PROBE_DEFINE1(mib, linux_set_oss_version, return, "int"); + struct linux_prison { char pr_osname[LINUX_MAX_UTSNAME]; char pr_osrelease[LINUX_MAX_UTSNAME]; @@ -79,11 +149,19 @@ linux_sysctl_osname(SYSCTL_HANDLER_ARGS) char osname[LINUX_MAX_UTSNAME]; int error; + LIN_SDT_PROBE0(mib, linux_sysctl_osname, entry); + linux_get_osname(req->td, osname); error = sysctl_handle_string(oidp, osname, LINUX_MAX_UTSNAME, req); - if (error || req->newptr == NULL) + if (error != 0 || req->newptr == NULL) { + LIN_SDT_PROBE1(mib, linux_sysctl_osname, sysctl_string_error, + error); + LIN_SDT_PROBE1(mib, linux_sysctl_osname, return, error); return (error); + } error = linux_set_osname(req->td, osname); + + LIN_SDT_PROBE1(mib, linux_sysctl_osname, return, error); return (error); } @@ -98,11 +176,19 @@ linux_sysctl_osrelease(SYSCTL_HANDLER_ARGS) char osrelease[LINUX_MAX_UTSNAME]; int error; + LIN_SDT_PROBE0(mib, linux_sysctl_osrelease, entry); + linux_get_osrelease(req->td, osrelease); error = sysctl_handle_string(oidp, osrelease, LINUX_MAX_UTSNAME, req); - if (error || req->newptr == NULL) + if (error != 0 || req->newptr == NULL) { + LIN_SDT_PROBE1(mib, linux_sysctl_osrelease, sysctl_string_error, + error); + LIN_SDT_PROBE1(mib, linux_sysctl_osrelease, return, error); return (error); + } error = linux_set_osrelease(req->td, osrelease); + + LIN_SDT_PROBE1(mib, linux_sysctl_osrelease, return, error); return (error); } @@ -117,11 +203,19 @@ linux_sysctl_oss_version(SYSCTL_HANDLER_ARGS) int oss_version; int error; + LIN_SDT_PROBE0(mib, linux_sysctl_oss_version, entry); + oss_version = linux_get_oss_version(req->td); error = sysctl_handle_int(oidp, &oss_version, 0, req); - if (error || req->newptr == NULL) + if (error != 0 || req->newptr == NULL) { + LIN_SDT_PROBE1(mib, linux_sysctl_oss_version, + sysctl_string_error, error); + LIN_SDT_PROBE1(mib, linux_sysctl_oss_version, return, error); return (error); + } error = linux_set_oss_version(req->td, oss_version); + + LIN_SDT_PROBE1(mib, linux_sysctl_oss_version, return, error); return (error); } @@ -139,25 +233,37 @@ linux_map_osrel(char *osrelease, int *osrel) char *sep, *eosrelease; int len, v0, v1, v2, v; + LIN_SDT_PROBE2(mib, linux_map_osrel, entry, osrelease, osrel); + len = strlen(osrelease); eosrelease = osrelease + len; v0 = strtol(osrelease, &sep, 10); - if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.') + if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.') { + LIN_SDT_PROBE1(mib, linux_map_osrel, return, EINVAL); return (EINVAL); + } osrelease = sep + 1; v1 = strtol(osrelease, &sep, 10); - if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.') + if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.') { + LIN_SDT_PROBE1(mib, linux_map_osrel, return, EINVAL); return (EINVAL); + } osrelease = sep + 1; v2 = strtol(osrelease, &sep, 10); - if (osrelease == sep || sep != eosrelease) + if (osrelease == sep || sep != eosrelease) { + LIN_SDT_PROBE1(mib, linux_map_osrel, return, EINVAL); return (EINVAL); + } v = v0 * 1000000 + v1 * 1000 + v2; - if (v < 1000000) + if (v < 1000000) { + LIN_SDT_PROBE1(mib, linux_map_osrel, return, EINVAL); return (EINVAL); + } *osrel = v; + + LIN_SDT_PROBE1(mib, linux_map_osrel, return, 0); return (0); } @@ -171,6 +277,8 @@ linux_find_prison(struct prison *spr, struct prison **prp) struct prison *pr; struct linux_prison *lpr; + LIN_SDT_PROBE2(mib, linux_get_prison, entry, spr, prp); + if (!linux_osd_jail_slot) /* In case osd_register failed. */ spr = &prison0; @@ -184,6 +292,8 @@ linux_find_prison(struct prison *spr, struct prison **prp) mtx_unlock(&pr->pr_mtx); } *prp = pr; + + LIN_SDT_PROBE1(mib, linux_get_prison, return, lpr); return (lpr); } @@ -198,6 +308,8 @@ linux_alloc_prison(struct prison *pr, struct linux_prison **lprp) struct linux_prison *lpr, *nlpr; int error; + LIN_SDT_PROBE2(mib, linux_alloc_prison, entry, pr, lprp); + /* If this prison already has Linux info, return that. */ error = 0; lpr = linux_find_prison(pr, &ppr); @@ -230,6 +342,8 @@ linux_alloc_prison(struct prison *pr, struct linux_prison **lprp) *lprp = lpr; else mtx_unlock(&pr->pr_mtx); + + LIN_SDT_PROBE1(mib, linux_alloc_prison, return, error); return (error); } @@ -241,16 +355,26 @@ linux_prison_create(void *obj, void *data) { struct prison *pr = obj; struct vfsoptlist *opts = data; - int jsys; + int jsys, error; - if (vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)) == 0 && - jsys == JAIL_SYS_INHERIT) + LIN_SDT_PROBE2(mib, linux_prison_create, entry, obj, data); + + error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)); + if (error != 0) { + LIN_SDT_PROBE1(mib, linux_prison_create, vfs_copyopt_error, + error); + } else if (jsys == JAIL_SYS_INHERIT) { + LIN_SDT_PROBE1(mib, linux_prison_create, return, 0); return (0); + } /* * Inherit a prison's initial values from its parent * (different from JAIL_SYS_INHERIT which also inherits changes). */ - return linux_alloc_prison(pr, NULL); + error = linux_alloc_prison(pr, NULL); + + LIN_SDT_PROBE1(mib, linux_prison_create, return, error); + return (error); } static int @@ -260,44 +384,81 @@ linux_prison_check(void *obj __unused, void *data) char *osname, *osrelease; int error, jsys, len, osrel, oss_version; + LIN_SDT_PROBE2(mib, linux_prison_check, entry, obj, data); + /* Check that the parameters are correct. */ error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)); + if (error != 0) { + LIN_SDT_PROBE1(mib, linux_prison_check, vfs_copyopt_error, + error); + } if (error != ENOENT) { - if (error != 0) + if (error != 0) { + LIN_SDT_PROBE1(mib, linux_prison_check, return, error); return (error); - if (jsys != JAIL_SYS_NEW && jsys != JAIL_SYS_INHERIT) + } + if (jsys != JAIL_SYS_NEW && jsys != JAIL_SYS_INHERIT) { + LIN_SDT_PROBE1(mib, linux_prison_check, return, EINVAL); return (EINVAL); + } } error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len); + if (error != 0) { + LIN_SDT_PROBE1(mib, linux_prison_check, vfs_getopt_error, + error); + } if (error != ENOENT) { - if (error != 0) + if (error != 0) { + LIN_SDT_PROBE1(mib, linux_prison_check, return, error); return (error); - if (len == 0 || osname[len - 1] != '\0') + } + if (len == 0 || osname[len - 1] != '\0') { + LIN_SDT_PROBE1(mib, linux_prison_check, return, EINVAL); return (EINVAL); + } if (len > LINUX_MAX_UTSNAME) { vfs_opterror(opts, "linux.osname too long"); + LIN_SDT_PROBE1(mib, linux_prison_check, return, + ENAMETOOLONG); return (ENAMETOOLONG); } } error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len); + if (error != 0) { + LIN_SDT_PROBE1(mib, linux_prison_check, vfs_getopt_error, + error); + } if (error != ENOENT) { - if (error != 0) + if (error != 0) { + LIN_SDT_PROBE1(mib, linux_prison_check, return, error); return (error); - if (len == 0 || osrelease[len - 1] != '\0') + } + if (len == 0 || osrelease[len - 1] != '\0') { + LIN_SDT_PROBE1(mib, linux_prison_check, return, EINVAL); return (EINVAL); + } if (len > LINUX_MAX_UTSNAME) { vfs_opterror(opts, "linux.osrelease too long"); + LIN_SDT_PROBE1(mib, linux_prison_check, return, + ENAMETOOLONG); return (ENAMETOOLONG); } error = linux_map_osrel(osrelease, &osrel); if (error != 0) { vfs_opterror(opts, "linux.osrelease format error"); + LIN_SDT_PROBE1(mib, linux_prison_check, return, error); return (error); } } error = vfs_copyopt(opts, "linux.oss_version", &oss_version, sizeof(oss_version)); - return (error == ENOENT ? 0 : error); + if (error != 0) + LIN_SDT_PROBE1(mib, linux_prison_check, vfs_copyopt_error, error); + + if (error == ENOENT) + error = 0; + LIN_SDT_PROBE1(mib, linux_prison_check, return, error); + return (error); } static int @@ -309,22 +470,32 @@ linux_prison_set(void *obj, void *data) char *osname, *osrelease; int error, gotversion, jsys, len, oss_version; + LIN_SDT_PROBE2(mib, linux_prison_set, entry, obj, data); + /* Set the parameters, which should be correct. */ error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)); + if (error != 0) + LIN_SDT_PROBE1(mib, linux_prison_set, vfs_copyopt_error, error); if (error == ENOENT) jsys = -1; error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len); + if (error != 0) + LIN_SDT_PROBE1(mib, linux_prison_set, vfs_getopt_error, error); if (error == ENOENT) osname = NULL; else jsys = JAIL_SYS_NEW; error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len); + if (error != 0) + LIN_SDT_PROBE1(mib, linux_prison_set, vfs_getopt_error, error); if (error == ENOENT) osrelease = NULL; else jsys = JAIL_SYS_NEW; error = vfs_copyopt(opts, "linux.oss_version", &oss_version, sizeof(oss_version)); + if (error != 0) + LIN_SDT_PROBE1(mib, linux_prison_set, vfs_copyopt_error, error); if (error == ENOENT) gotversion = 0; else { @@ -346,12 +517,15 @@ linux_prison_set(void *obj, void *data) error = linux_alloc_prison(pr, &lpr); if (error) { mtx_unlock(&pr->pr_mtx); + LIN_SDT_PROBE1(mib, linux_prison_set, return, error); return (error); } if (osrelease) { error = linux_map_osrel(osrelease, &lpr->pr_osrel); if (error) { mtx_unlock(&pr->pr_mtx); + LIN_SDT_PROBE1(mib, linux_prison_set, return, + error); return (error); } strlcpy(lpr->pr_osrelease, osrelease, @@ -363,6 +537,8 @@ linux_prison_set(void *obj, void *data) lpr->pr_oss_version = oss_version; mtx_unlock(&pr->pr_mtx); } + + LIN_SDT_PROBE1(mib, linux_prison_set, return, 0); return (0); } @@ -385,43 +561,74 @@ linux_prison_get(void *obj, void *data) static int version0; + LIN_SDT_PROBE2(mib, linux_prison_get, entry, obj, data); + /* See if this prison is the one with the Linux info. */ lpr = linux_find_prison(pr, &ppr); i = (ppr == pr) ? JAIL_SYS_NEW : JAIL_SYS_INHERIT; error = vfs_setopt(opts, "linux", &i, sizeof(i)); - if (error != 0 && error != ENOENT) - goto done; + if (error != 0) { + LIN_SDT_PROBE1(mib, linux_prison_get, vfs_setopt_error, error); + if (error != ENOENT) + goto done; + } if (i) { error = vfs_setopts(opts, "linux.osname", lpr->pr_osname); - if (error != 0 && error != ENOENT) - goto done; + if (error != 0) { + LIN_SDT_PROBE1(mib, linux_prison_get, vfs_setopts_error, + error); + if (error != ENOENT) + goto done; + } error = vfs_setopts(opts, "linux.osrelease", lpr->pr_osrelease); - if (error != 0 && error != ENOENT) - goto done; + if (error != 0) { + LIN_SDT_PROBE1(mib, linux_prison_get, vfs_setopts_error, + error); + if (error != ENOENT) + goto done; + } error = vfs_setopt(opts, "linux.oss_version", &lpr->pr_oss_version, sizeof(lpr->pr_oss_version)); - if (error != 0 && error != ENOENT) - goto done; + if (error != 0) { + LIN_SDT_PROBE1(mib, linux_prison_get, vfs_setopt_error, + error); + if(error != ENOENT) + goto done; + } } else { /* * If this prison is inheriting its Linux info, report * empty/zero parameters. */ error = vfs_setopts(opts, "linux.osname", ""); - if (error != 0 && error != ENOENT) - goto done; + if (error != 0) { + LIN_SDT_PROBE1(mib, linux_prison_get, vfs_setopts_error, + error); + if(error != ENOENT) + goto done; + } error = vfs_setopts(opts, "linux.osrelease", ""); - if (error != 0 && error != ENOENT) - goto done; + if (error != 0) { + LIN_SDT_PROBE1(mib, linux_prison_get, vfs_setopts_error, + error); + if(error != ENOENT) + goto done; + } error = vfs_setopt(opts, "linux.oss_version", &version0, sizeof(lpr->pr_oss_version)); - if (error != 0 && error != ENOENT) - goto done; + if (error != 0) { + LIN_SDT_PROBE1(mib, linux_prison_get, vfs_setopt_error, + error); + if(error != ENOENT) + goto done; + } } error = 0; done: mtx_unlock(&ppr->pr_mtx); + + LIN_SDT_PROBE1(mib, linux_prison_get, return, error); return (error); } @@ -429,7 +636,9 @@ static void linux_prison_destructor(void *data) { + LIN_SDT_PROBE1(mib, linux_prison_destructor, entry, data); free(data, M_PRISON); + LIN_SDT_PROBE0(mib, linux_prison_destructor, return); } void @@ -443,6 +652,8 @@ linux_osd_jail_register(void) [PR_METHOD_CHECK] = linux_prison_check }; + LIN_SDT_PROBE0(mib, linux_osd_jail_register, entry); + linux_osd_jail_slot = osd_jail_register(linux_prison_destructor, methods); if (linux_osd_jail_slot > 0) { @@ -452,14 +663,20 @@ linux_osd_jail_register(void) (void)linux_alloc_prison(pr, NULL); sx_xunlock(&allprison_lock); } + + LIN_SDT_PROBE0(mib, linux_osd_jail_register, return); } void linux_osd_jail_deregister(void) { + LIN_SDT_PROBE0(mib, linux_osd_jail_register, entry); + if (linux_osd_jail_slot) osd_jail_deregister(linux_osd_jail_slot); + + LIN_SDT_PROBE0(mib, linux_osd_jail_register, return); } void @@ -468,9 +685,13 @@ linux_get_osname(struct thread *td, char *dst) struct prison *pr; struct linux_prison *lpr; + LIN_SDT_PROBE2(mib, linux_get_osname, entry, td, dst); + lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); bcopy(lpr->pr_osname, dst, LINUX_MAX_UTSNAME); mtx_unlock(&pr->pr_mtx); + + LIN_SDT_PROBE0(mib, linux_get_osname, return); } static int @@ -479,9 +700,13 @@ linux_set_osname(struct thread *td, char *osname) struct prison *pr; struct linux_prison *lpr; + LIN_SDT_PROBE2(mib, linux_set_osname, entry, td, osname); + lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME); mtx_unlock(&pr->pr_mtx); + + LIN_SDT_PROBE1(mib, linux_set_osname, return, 0); return (0); } @@ -491,9 +716,13 @@ linux_get_osrelease(struct thread *td, char *dst) struct prison *pr; struct linux_prison *lpr; + LIN_SDT_PROBE2(mib, linux_get_osrelease, entry, td, dst); + lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); bcopy(lpr->pr_osrelease, dst, LINUX_MAX_UTSNAME); mtx_unlock(&pr->pr_mtx); + + LIN_SDT_PROBE0(mib, linux_get_osrelease, return); } int @@ -503,9 +732,13 @@ linux_kernver(struct thread *td) struct linux_prison *lpr; int osrel; + LIN_SDT_PROBE1(mib, linux_kernver, entry, td); + lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); osrel = lpr->pr_osrel; mtx_unlock(&pr->pr_mtx); + + LIN_SDT_PROBE1(mib, linux_kernver, return, osrel); return (osrel); } @@ -516,11 +749,15 @@ linux_set_osrelease(struct thread *td, char *osrelease) struct linux_prison *lpr; int error; + LIN_SDT_PROBE2(mib, linux_set_osrelease, entry, td, osrelease); + lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); error = linux_map_osrel(osrelease, &lpr->pr_osrel); if (error == 0) strlcpy(lpr->pr_osrelease, osrelease, LINUX_MAX_UTSNAME); mtx_unlock(&pr->pr_mtx); + + LIN_SDT_PROBE1(mib, linux_set_osrelease, return, error); return (error); } @@ -531,9 +768,13 @@ linux_get_oss_version(struct thread *td) struct linux_prison *lpr; int version; + LIN_SDT_PROBE1(mib, linux_get_oss_version, entry, td); + lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); version = lpr->pr_oss_version; mtx_unlock(&pr->pr_mtx); + + LIN_SDT_PROBE1(mib, linux_get_oss_version, return, version); return (version); } @@ -543,13 +784,18 @@ linux_set_oss_version(struct thread *td, int oss_version) struct prison *pr; struct linux_prison *lpr; + LIN_SDT_PROBE2(mib, linux_set_oss_version, entry, td, oss_version); + lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); lpr->pr_oss_version = oss_version; mtx_unlock(&pr->pr_mtx); + + LIN_SDT_PROBE1(mib, linux_set_oss_version, return, 0); return (0); } #if defined(DEBUG) || defined(KTR) +/* XXX: can be removed when every ldebug(...) and KTR stuff are removed. */ u_char linux_debug_map[howmany(LINUX_SYS_MAXSYSCALL, sizeof(u_char))]; diff --git a/sys/compat/linux/linux_misc.c b/sys/compat/linux/linux_misc.c index d8ce2a870cb1..4365b10bdabe 100644 --- a/sys/compat/linux/linux_misc.c +++ b/sys/compat/linux/linux_misc.c @@ -31,6 +31,7 @@ __FBSDID("$FreeBSD$"); #include "opt_compat.h" +#include "opt_kdtrace.h" #include #include @@ -53,6 +54,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -83,6 +85,7 @@ __FBSDID("$FreeBSD$"); #include #endif +#include #include #include #include @@ -91,6 +94,17 @@ __FBSDID("$FreeBSD$"); #include #include +/* DTrace init */ +LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE); + +/* Linuxulator-global DTrace probes */ +LIN_SDT_PROBE_DECLARE(locks, emul_lock, locked); +LIN_SDT_PROBE_DECLARE(locks, emul_lock, unlock); +LIN_SDT_PROBE_DECLARE(locks, emul_shared_rlock, locked); +LIN_SDT_PROBE_DECLARE(locks, emul_shared_rlock, unlock); +LIN_SDT_PROBE_DECLARE(locks, emul_shared_wlock, locked); +LIN_SDT_PROBE_DECLARE(locks, emul_shared_wlock, unlock); + int stclohz; /* Statistics clock frequency */ static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = { diff --git a/sys/compat/linux/linux_sysctl.c b/sys/compat/linux/linux_sysctl.c index 9111dbec7805..b2c10e1ccc1c 100644 --- a/sys/compat/linux/linux_sysctl.c +++ b/sys/compat/linux/linux_sysctl.c @@ -30,12 +30,15 @@ __FBSDID("$FreeBSD$"); #include "opt_compat.h" +#include "opt_kdtrace.h" #include +#include #include #include #include #include +#include #include #include #include @@ -48,6 +51,7 @@ __FBSDID("$FreeBSD$"); #include #endif +#include #include #define LINUX_CTL_KERN 1 @@ -65,23 +69,49 @@ __FBSDID("$FreeBSD$"); #define LINUX_KERN_OSREV 3 #define LINUX_KERN_VERSION 4 +/* DTrace init */ +LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE); + +/** + * DTrace probes in this module. + */ +LIN_SDT_PROBE_DEFINE2(sysctl, handle_string, entry, "struct l___sysctl_args *", + "char *"); +LIN_SDT_PROBE_DEFINE1(sysctl, handle_string, copyout_error, "int"); +LIN_SDT_PROBE_DEFINE1(sysctl, handle_string, return, "int"); +LIN_SDT_PROBE_DEFINE2(sysctl, linux_sysctl, entry, "struct l___sysctl_args *", + "struct thread *"); +LIN_SDT_PROBE_DEFINE1(sysctl, linux_sysctl, copyin_error, "int"); +LIN_SDT_PROBE_DEFINE2(sysctl, linux_sysctl, wrong_length, "int", "int"); +LIN_SDT_PROBE_DEFINE1(sysctl, linux_sysctl, unsupported_sysctl, "char *"); +LIN_SDT_PROBE_DEFINE1(sysctl, linux_sysctl, return, "int"); + static int handle_string(struct l___sysctl_args *la, char *value) { int error; + LIN_SDT_PROBE2(sysctl, handle_string, entry, la, value); + if (la->oldval != 0) { l_int len = strlen(value); error = copyout(value, PTRIN(la->oldval), len + 1); if (!error && la->oldlenp != 0) error = copyout(&len, PTRIN(la->oldlenp), sizeof(len)); - if (error) + if (error) { + LIN_SDT_PROBE1(sysctl, handle_string, copyout_error, + error); + LIN_SDT_PROBE1(sysctl, handle_string, return, error); return (error); + } } - if (la->newval != 0) + if (la->newval != 0) { + LIN_SDT_PROBE1(sysctl, handle_string, return, ENOTDIR); return (ENOTDIR); + } + LIN_SDT_PROBE1(sysctl, handle_string, return, 0); return (0); } @@ -91,18 +121,30 @@ linux_sysctl(struct thread *td, struct linux_sysctl_args *args) struct l___sysctl_args la; struct sbuf *sb; l_int *mib; + char *sysctl_string; int error, i; - error = copyin(args->args, &la, sizeof(la)); - if (error) - return (error); + LIN_SDT_PROBE2(sysctl, linux_sysctl, entry, td, args->args); - if (la.nlen <= 0 || la.nlen > LINUX_CTL_MAXNAME) + error = copyin(args->args, &la, sizeof(la)); + if (error) { + LIN_SDT_PROBE1(sysctl, linux_sysctl, copyin_error, error); + LIN_SDT_PROBE1(sysctl, linux_sysctl, return, error); + return (error); + } + + if (la.nlen <= 0 || la.nlen > LINUX_CTL_MAXNAME) { + LIN_SDT_PROBE2(sysctl, linux_sysctl, wrong_length, la.nlen, + LINUX_CTL_MAXNAME); + LIN_SDT_PROBE1(sysctl, linux_sysctl, return, ENOTDIR); return (ENOTDIR); + } mib = malloc(la.nlen * sizeof(l_int), M_TEMP, M_WAITOK); error = copyin(PTRIN(la.name), mib, la.nlen * sizeof(l_int)); if (error) { + LIN_SDT_PROBE1(sysctl, linux_sysctl, copyin_error, error); + LIN_SDT_PROBE1(sysctl, linux_sysctl, return, error); free(mib, M_TEMP); return (error); } @@ -116,6 +158,7 @@ linux_sysctl(struct thread *td, struct linux_sysctl_args *args) case LINUX_KERN_VERSION: error = handle_string(&la, version); free(mib, M_TEMP); + LIN_SDT_PROBE1(sysctl, linux_sysctl, return, error); return (error); default: break; @@ -128,16 +171,23 @@ linux_sysctl(struct thread *td, struct linux_sysctl_args *args) sb = sbuf_new(NULL, NULL, 20 + la.nlen * 5, SBUF_AUTOEXTEND); if (sb == NULL) { linux_msg(td, "sysctl is not implemented"); + LIN_SDT_PROBE1(sysctl, linux_sysctl, unsupported_sysctl, + "unknown sysctl, ENOMEM during lookup"); } else { sbuf_printf(sb, "sysctl "); for (i = 0; i < la.nlen; i++) sbuf_printf(sb, "%c%d", (i) ? ',' : '{', mib[i]); sbuf_printf(sb, "} is not implemented"); sbuf_finish(sb); + sysctl_string = sbuf_data(sb); linux_msg(td, "%s", sbuf_data(sb)); + LIN_SDT_PROBE1(sysctl, linux_sysctl, unsupported_sysctl, + sysctl_string); sbuf_delete(sb); } free(mib, M_TEMP); + + LIN_SDT_PROBE1(sysctl, linux_sysctl, return, ENOTDIR); return (ENOTDIR); } diff --git a/sys/compat/linux/linux_time.c b/sys/compat/linux/linux_time.c index 8800d674c319..135902520767 100644 --- a/sys/compat/linux/linux_time.c +++ b/sys/compat/linux/linux_time.c @@ -36,10 +36,13 @@ __KERNEL_RCSID(0, "$NetBSD: linux_time.c,v 1.14 2006/05/14 03:40:54 christos Exp #endif #include "opt_compat.h" +#include "opt_kdtrace.h" #include +#include #include #include +#include #include #include #include @@ -56,6 +59,63 @@ __KERNEL_RCSID(0, "$NetBSD: linux_time.c,v 1.14 2006/05/14 03:40:54 christos Exp #include #endif +#include + +/* DTrace init */ +LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE); + +/** + * DTrace probes in this module. + */ +LIN_SDT_PROBE_DEFINE2(time, native_to_linux_timespec, entry, + "struct l_timespec *", "struct timespec *"); +LIN_SDT_PROBE_DEFINE0(time, native_to_linux_timespec, return); +LIN_SDT_PROBE_DEFINE2(time, linux_to_native_timespec, entry, + "struct timespec *", "struct l_timespec *"); +LIN_SDT_PROBE_DEFINE1(time, linux_to_native_timespec, return, "int"); +LIN_SDT_PROBE_DEFINE2(time, linux_to_native_clockid, entry, "clockid_t *", + "clockid_t"); +LIN_SDT_PROBE_DEFINE1(time, linux_to_native_clockid, unsupported_clockid, + "clockid_t"); +LIN_SDT_PROBE_DEFINE1(time, linux_to_native_clockid, unknown_clockid, + "clockid_t"); +LIN_SDT_PROBE_DEFINE1(time, linux_to_native_clockid, return, "int"); +LIN_SDT_PROBE_DEFINE2(time, linux_clock_gettime, entry, "clockid_t", + "struct l_timespec *"); +LIN_SDT_PROBE_DEFINE1(time, linux_clock_gettime, conversion_error, "int"); +LIN_SDT_PROBE_DEFINE1(time, linux_clock_gettime, gettime_error, "int"); +LIN_SDT_PROBE_DEFINE1(time, linux_clock_gettime, copyout_error, "int"); +LIN_SDT_PROBE_DEFINE1(time, linux_clock_gettime, return, "int"); +LIN_SDT_PROBE_DEFINE2(time, linux_clock_settime, entry, "clockid_t", + "struct l_timespec *"); +LIN_SDT_PROBE_DEFINE1(time, linux_clock_settime, conversion_error, "int"); +LIN_SDT_PROBE_DEFINE1(time, linux_clock_settime, settime_error, "int"); +LIN_SDT_PROBE_DEFINE1(time, linux_clock_settime, copyin_error, "int"); +LIN_SDT_PROBE_DEFINE1(time, linux_clock_settime, return, "int"); +LIN_SDT_PROBE_DEFINE2(time, linux_clock_getres, entry, "clockid_t", + "struct l_timespec *"); +LIN_SDT_PROBE_DEFINE0(time, linux_clock_getres, nullcall); +LIN_SDT_PROBE_DEFINE1(time, linux_clock_getres, conversion_error, "int"); +LIN_SDT_PROBE_DEFINE1(time, linux_clock_getres, getres_error, "int"); +LIN_SDT_PROBE_DEFINE1(time, linux_clock_getres, copyout_error, "int"); +LIN_SDT_PROBE_DEFINE1(time, linux_clock_getres, return, "int"); +LIN_SDT_PROBE_DEFINE2(time, linux_nanosleep, entry, "const struct l_timespec *", + "struct l_timespec *"); +LIN_SDT_PROBE_DEFINE1(time, linux_nanosleep, conversion_error, "int"); +LIN_SDT_PROBE_DEFINE1(time, linux_nanosleep, nanosleep_error, "int"); +LIN_SDT_PROBE_DEFINE1(time, linux_nanosleep, copyout_error, "int"); +LIN_SDT_PROBE_DEFINE1(time, linux_nanosleep, copyin_error, "int"); +LIN_SDT_PROBE_DEFINE1(time, linux_nanosleep, return, "int"); +LIN_SDT_PROBE_DEFINE4(time, linux_clock_nanosleep, entry, "clockid_t", "int", + "struct l_timespec *", "struct l_timespec *"); +LIN_SDT_PROBE_DEFINE1(time, linux_clock_nanosleep, conversion_error, "int"); +LIN_SDT_PROBE_DEFINE1(time, linux_clock_nanosleep, nanosleep_error, "int"); +LIN_SDT_PROBE_DEFINE1(time, linux_clock_nanosleep, copyout_error, "int"); +LIN_SDT_PROBE_DEFINE1(time, linux_clock_nanosleep, copyin_error, "int"); +LIN_SDT_PROBE_DEFINE1(time, linux_clock_nanosleep, unsupported_flags, "int"); +LIN_SDT_PROBE_DEFINE1(time, linux_clock_nanosleep, unsupported_clockid, "int"); +LIN_SDT_PROBE_DEFINE1(time, linux_clock_nanosleep, return, "int"); + static void native_to_linux_timespec(struct l_timespec *, struct timespec *); static int linux_to_native_timespec(struct timespec *, @@ -65,24 +125,38 @@ static int linux_to_native_clockid(clockid_t *, clockid_t); static void native_to_linux_timespec(struct l_timespec *ltp, struct timespec *ntp) { + + LIN_SDT_PROBE2(time, native_to_linux_timespec, entry, ltp, ntp); + ltp->tv_sec = ntp->tv_sec; ltp->tv_nsec = ntp->tv_nsec; + + LIN_SDT_PROBE0(time, native_to_linux_timespec, return); } static int linux_to_native_timespec(struct timespec *ntp, struct l_timespec *ltp) { - if (ltp->tv_sec < 0 || ltp->tv_nsec > (l_long)999999999L) + + LIN_SDT_PROBE2(time, linux_to_native_timespec, entry, ntp, ltp); + + if (ltp->tv_sec < 0 || ltp->tv_nsec > (l_long)999999999L) { + LIN_SDT_PROBE1(time, linux_to_native_timespec, return, EINVAL); return (EINVAL); + } ntp->tv_sec = ltp->tv_sec; ntp->tv_nsec = ltp->tv_nsec; + LIN_SDT_PROBE1(time, linux_to_native_timespec, return, 0); return (0); } static int linux_to_native_clockid(clockid_t *n, clockid_t l) { + + LIN_SDT_PROBE2(time, linux_to_native_clockid, entry, n, l); + switch (l) { case LINUX_CLOCK_REALTIME: *n = CLOCK_REALTIME; @@ -94,11 +168,20 @@ linux_to_native_clockid(clockid_t *n, clockid_t l) case LINUX_CLOCK_THREAD_CPUTIME_ID: case LINUX_CLOCK_REALTIME_HR: case LINUX_CLOCK_MONOTONIC_HR: + LIN_SDT_PROBE1(time, linux_to_native_clockid, + unsupported_clockid, l); + LIN_SDT_PROBE1(time, linux_to_native_clockid, return, EINVAL); + return (EINVAL); + break; default: + LIN_SDT_PROBE1(time, linux_to_native_clockid, + unknown_clockid, l); + LIN_SDT_PROBE1(time, linux_to_native_clockid, return, EINVAL); return (EINVAL); break; } + LIN_SDT_PROBE1(time, linux_to_native_clockid, return, 0); return (0); } @@ -110,15 +193,29 @@ linux_clock_gettime(struct thread *td, struct linux_clock_gettime_args *args) clockid_t nwhich = 0; /* XXX: GCC */ struct timespec tp; + LIN_SDT_PROBE2(time, linux_clock_gettime, entry, args->which, args->tp); + error = linux_to_native_clockid(&nwhich, args->which); - if (error != 0) + if (error != 0) { + LIN_SDT_PROBE1(time, linux_clock_gettime, conversion_error, + error); + LIN_SDT_PROBE1(time, linux_clock_gettime, return, error); return (error); + } error = kern_clock_gettime(td, nwhich, &tp); - if (error != 0) + if (error != 0) { + LIN_SDT_PROBE1(time, linux_clock_gettime, gettime_error, error); + LIN_SDT_PROBE1(time, linux_clock_gettime, return, error); return (error); + } native_to_linux_timespec(<s, &tp); - return (copyout(<s, args->tp, sizeof lts)); + error = copyout(<s, args->tp, sizeof lts); + if (error != 0) + LIN_SDT_PROBE1(time, linux_clock_gettime, copyout_error, error); + + LIN_SDT_PROBE1(time, linux_clock_gettime, return, error); + return (error); } int @@ -129,17 +226,35 @@ linux_clock_settime(struct thread *td, struct linux_clock_settime_args *args) int error; clockid_t nwhich = 0; /* XXX: GCC */ - error = linux_to_native_clockid(&nwhich, args->which); - if (error != 0) - return (error); - error = copyin(args->tp, <s, sizeof lts); - if (error != 0) - return (error); - error = linux_to_native_timespec(&ts, <s); - if (error != 0) - return (error); + LIN_SDT_PROBE2(time, linux_clock_settime, entry, args->which, args->tp); - return (kern_clock_settime(td, nwhich, &ts)); + error = linux_to_native_clockid(&nwhich, args->which); + if (error != 0) { + LIN_SDT_PROBE1(time, linux_clock_settime, conversion_error, + error); + LIN_SDT_PROBE1(time, linux_clock_settime, return, error); + return (error); + } + error = copyin(args->tp, <s, sizeof lts); + if (error != 0) { + LIN_SDT_PROBE1(time, linux_clock_settime, copyin_error, error); + LIN_SDT_PROBE1(time, linux_clock_settime, return, error); + return (error); + } + error = linux_to_native_timespec(&ts, <s); + if (error != 0) { + LIN_SDT_PROBE1(time, linux_clock_settime, conversion_error, + error); + LIN_SDT_PROBE1(time, linux_clock_settime, return, error); + return (error); + } + + error = kern_clock_settime(td, nwhich, &ts); + if (error != 0) + LIN_SDT_PROBE1(time, linux_clock_settime, settime_error, error); + + LIN_SDT_PROBE1(time, linux_clock_settime, return, error); + return (error); } int @@ -150,18 +265,35 @@ linux_clock_getres(struct thread *td, struct linux_clock_getres_args *args) int error; clockid_t nwhich = 0; /* XXX: GCC */ - if (args->tp == NULL) + LIN_SDT_PROBE2(time, linux_clock_getres, entry, args->which, args->tp); + + if (args->tp == NULL) { + LIN_SDT_PROBE0(time, linux_clock_getres, nullcall); + LIN_SDT_PROBE1(time, linux_clock_getres, return, 0); return (0); + } error = linux_to_native_clockid(&nwhich, args->which); - if (error != 0) + if (error != 0) { + LIN_SDT_PROBE1(time, linux_clock_getres, conversion_error, + error); + LIN_SDT_PROBE1(time, linux_clock_getres, return, error); return (error); + } error = kern_clock_getres(td, nwhich, &ts); - if (error != 0) + if (error != 0) { + LIN_SDT_PROBE1(time, linux_clock_getres, getres_error, error); + LIN_SDT_PROBE1(time, linux_clock_getres, return, error); return (error); + } native_to_linux_timespec(<s, &ts); - return (copyout(<s, args->tp, sizeof lts)); + error = copyout(<s, args->tp, sizeof lts); + if (error != 0) + LIN_SDT_PROBE1(time, linux_clock_getres, copyout_error, error); + + LIN_SDT_PROBE1(time, linux_clock_getres, return, error); + return (error); } int @@ -172,9 +304,14 @@ linux_nanosleep(struct thread *td, struct linux_nanosleep_args *args) struct timespec rqts, rmts; int error; + LIN_SDT_PROBE2(time, linux_nanosleep, entry, args->rqtp, args->rmtp); + error = copyin(args->rqtp, &lrqts, sizeof lrqts); - if (error != 0) + if (error != 0) { + LIN_SDT_PROBE1(time, linux_nanosleep, copyin_error, error); + LIN_SDT_PROBE1(time, linux_nanosleep, return, error); return (error); + } if (args->rmtp != NULL) rmtp = &rmts; @@ -182,19 +319,30 @@ linux_nanosleep(struct thread *td, struct linux_nanosleep_args *args) rmtp = NULL; error = linux_to_native_timespec(&rqts, &lrqts); - if (error != 0) + if (error != 0) { + LIN_SDT_PROBE1(time, linux_nanosleep, conversion_error, error); + LIN_SDT_PROBE1(time, linux_nanosleep, return, error); return (error); + } error = kern_nanosleep(td, &rqts, rmtp); - if (error != 0) + if (error != 0) { + LIN_SDT_PROBE1(time, linux_nanosleep, nanosleep_error, error); + LIN_SDT_PROBE1(time, linux_nanosleep, return, error); return (error); + } if (args->rmtp != NULL) { native_to_linux_timespec(&lrmts, rmtp); error = copyout(&lrmts, args->rmtp, sizeof(lrmts)); - if (error != 0) + if (error != 0) { + LIN_SDT_PROBE1(time, linux_nanosleep, copyout_error, + error); + LIN_SDT_PROBE1(time, linux_nanosleep, return, error); return (error); + } } + LIN_SDT_PROBE1(time, linux_nanosleep, return, 0); return (0); } @@ -206,15 +354,31 @@ linux_clock_nanosleep(struct thread *td, struct linux_clock_nanosleep_args *args struct timespec rqts, rmts; int error; - if (args->flags != 0) - return (EINVAL); /* XXX deal with TIMER_ABSTIME */ + LIN_SDT_PROBE4(time, linux_clock_nanosleep, entry, args->which, + args->flags, args->rqtp, args->rmtp); - if (args->which != LINUX_CLOCK_REALTIME) + if (args->flags != 0) { + /* XXX deal with TIMER_ABSTIME */ + LIN_SDT_PROBE1(time, linux_clock_nanosleep, unsupported_flags, + args->flags); + LIN_SDT_PROBE1(time, linux_clock_nanosleep, return, EINVAL); + return (EINVAL); /* XXX deal with TIMER_ABSTIME */ + } + + if (args->which != LINUX_CLOCK_REALTIME) { + LIN_SDT_PROBE1(time, linux_clock_nanosleep, unsupported_clockid, + args->which); + LIN_SDT_PROBE1(time, linux_clock_nanosleep, return, EINVAL); return (EINVAL); + } error = copyin(args->rqtp, &lrqts, sizeof lrqts); - if (error != 0) + if (error != 0) { + LIN_SDT_PROBE1(time, linux_clock_nanosleep, copyin_error, + error); + LIN_SDT_PROBE1(time, linux_clock_nanosleep, return, error); return (error); + } if (args->rmtp != NULL) rmtp = &rmts; @@ -222,18 +386,31 @@ linux_clock_nanosleep(struct thread *td, struct linux_clock_nanosleep_args *args rmtp = NULL; error = linux_to_native_timespec(&rqts, &lrqts); - if (error != 0) + if (error != 0) { + LIN_SDT_PROBE1(time, linux_clock_nanosleep, conversion_error, + error); + LIN_SDT_PROBE1(time, linux_clock_nanosleep, return, error); return (error); + } error = kern_nanosleep(td, &rqts, rmtp); - if (error != 0) + if (error != 0) { + LIN_SDT_PROBE1(time, linux_clock_nanosleep, nanosleep_error, + error); + LIN_SDT_PROBE1(time, linux_clock_nanosleep, return, error); return (error); + } if (args->rmtp != NULL) { native_to_linux_timespec(&lrmts, rmtp); error = copyout(&lrmts, args->rmtp, sizeof lrmts ); - if (error != 0) + if (error != 0) { + LIN_SDT_PROBE1(time, linux_clock_nanosleep, + copyout_error, error); + LIN_SDT_PROBE1(time, linux_nanosleep, return, error); return (error); + } } + LIN_SDT_PROBE1(time, linux_clock_nanosleep, return, 0); return (0); } diff --git a/sys/compat/linux/linux_uid16.c b/sys/compat/linux/linux_uid16.c index 31950ffb2286..b66fb5c1c25a 100644 --- a/sys/compat/linux/linux_uid16.c +++ b/sys/compat/linux/linux_uid16.c @@ -28,14 +28,17 @@ __FBSDID("$FreeBSD$"); #include "opt_compat.h" +#include "opt_kdtrace.h" #include #include +#include #include #include #include #include #include +#include #include #include #include @@ -48,8 +51,53 @@ __FBSDID("$FreeBSD$"); #include #endif +#include #include +/* DTrace init */ +LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE); + +/** + * DTrace probes in this module. + */ +LIN_SDT_PROBE_DEFINE3(uid16, linux_chown16, entry, "char *", "l_uid16_t", + "l_gid16_t"); +LIN_SDT_PROBE_DEFINE1(uid16, linux_chown16, conv_path, "char *"); +LIN_SDT_PROBE_DEFINE1(uid16, linux_chown16, return, "int"); +LIN_SDT_PROBE_DEFINE3(uid16, linux_lchown16, entry, "char *", "l_uid16_t", + "l_gid16_t"); +LIN_SDT_PROBE_DEFINE1(uid16, linux_lchown16, conv_path, "char *"); +LIN_SDT_PROBE_DEFINE1(uid16, linux_lchown16, return, "int"); +LIN_SDT_PROBE_DEFINE2(uid16, linux_setgroups16, entry, "l_uint", "l_gid16_t *"); +LIN_SDT_PROBE_DEFINE1(uid16, linux_setgroups16, copyin_error, "int"); +LIN_SDT_PROBE_DEFINE1(uid16, linux_setgroups16, priv_check_cred_error, "int"); +LIN_SDT_PROBE_DEFINE1(uid16, linux_setgroups16, return, "int"); +LIN_SDT_PROBE_DEFINE2(uid16, linux_getgroups16, entry, "l_uint", "l_gid16_t *"); +LIN_SDT_PROBE_DEFINE1(uid16, linux_getgroups16, copyout_error, "int"); +LIN_SDT_PROBE_DEFINE1(uid16, linux_getgroups16, return, "int"); +LIN_SDT_PROBE_DEFINE0(uid16, linux_getgid16, entry); +LIN_SDT_PROBE_DEFINE1(uid16, linux_getgid16, return, "int"); +LIN_SDT_PROBE_DEFINE0(uid16, linux_getuid16, entry); +LIN_SDT_PROBE_DEFINE1(uid16, linux_getuid16, return, "int"); +LIN_SDT_PROBE_DEFINE0(uid16, linux_getegid16, entry); +LIN_SDT_PROBE_DEFINE1(uid16, linux_getegid16, return, "int"); +LIN_SDT_PROBE_DEFINE0(uid16, linux_geteuid16, entry); +LIN_SDT_PROBE_DEFINE1(uid16, linux_geteuid16, return, "int"); +LIN_SDT_PROBE_DEFINE1(uid16, linux_setgid16, entry, "l_gid16_t"); +LIN_SDT_PROBE_DEFINE1(uid16, linux_setgid16, return, "int"); +LIN_SDT_PROBE_DEFINE1(uid16, linux_setuid16, entry, "l_uid16_t"); +LIN_SDT_PROBE_DEFINE1(uid16, linux_setuid16, return, "int"); +LIN_SDT_PROBE_DEFINE2(uid16, linux_setregid16, entry, "l_git16_t", "l_git16_t"); +LIN_SDT_PROBE_DEFINE1(uid16, linux_setregid16, return, "int"); +LIN_SDT_PROBE_DEFINE2(uid16, linux_setreuid16, entry, "l_uid16_t", "l_uid16_t"); +LIN_SDT_PROBE_DEFINE1(uid16, linux_setreuid16, return, "int"); +LIN_SDT_PROBE_DEFINE3(uid16, linux_setresgid16, entry, "l_gid16_t", "l_gid16_t", + "l_gid16_t"); +LIN_SDT_PROBE_DEFINE1(uid16, linux_setresgid16, return, "int"); +LIN_SDT_PROBE_DEFINE3(uid16, linux_setresuid16, entry, "l_uid16_t", "l_uid16_t", + "l_uid16_t"); +LIN_SDT_PROBE_DEFINE1(uid16, linux_setresuid16, return, "int"); + DUMMY(setfsuid16); DUMMY(setfsgid16); DUMMY(getresuid16); @@ -65,13 +113,20 @@ linux_chown16(struct thread *td, struct linux_chown16_args *args) LCONVPATHEXIST(td, args->path, &path); -#ifdef DEBUG - if (ldebug(chown16)) - printf(ARGS(chown16, "%s, %d, %d"), path, args->uid, args->gid); -#endif + /* + * The DTrace probes have to be after the LCONVPATHEXIST, as + * LCONVPATHEXIST may return on its own and we do not want to + * have a stray entry without the corresponding return. + */ + LIN_SDT_PROBE3(uid16, linux_chown16, entry, args->path, args->uid, + args->gid); + LIN_SDT_PROBE1(uid16, linux_chown16, conv_path, path); + error = kern_chown(td, path, UIO_SYSSPACE, CAST_NOCHG(args->uid), CAST_NOCHG(args->gid)); LFREEPATH(path); + + LIN_SDT_PROBE1(uid16, linux_chown16, return, error); return (error); } @@ -83,14 +138,20 @@ linux_lchown16(struct thread *td, struct linux_lchown16_args *args) LCONVPATHEXIST(td, args->path, &path); -#ifdef DEBUG - if (ldebug(lchown16)) - printf(ARGS(lchown16, "%s, %d, %d"), path, args->uid, - args->gid); -#endif + /* + * The DTrace probes have to be after the LCONVPATHEXIST, as + * LCONVPATHEXIST may return on its own and we do not want to + * have a stray entry without the corresponding return. + */ + LIN_SDT_PROBE3(uid16, linux_lchown16, entry, args->path, args->uid, + args->gid); + LIN_SDT_PROBE1(uid16, linux_lchown16, conv_path, path); + error = kern_lchown(td, path, UIO_SYSSPACE, CAST_NOCHG(args->uid), CAST_NOCHG(args->gid)); LFREEPATH(path); + + LIN_SDT_PROBE1(uid16, linux_lchown16, return, error); return (error); } @@ -103,17 +164,19 @@ linux_setgroups16(struct thread *td, struct linux_setgroups16_args *args) int ngrp, error; struct proc *p; -#ifdef DEBUG - if (ldebug(setgroups16)) - printf(ARGS(setgroups16, "%d, *"), args->gidsetsize); -#endif + LIN_SDT_PROBE2(uid16, linux_setgroups16, entry, args->gidsetsize, + args->gidset); ngrp = args->gidsetsize; - if (ngrp < 0 || ngrp >= ngroups_max + 1) + if (ngrp < 0 || ngrp >= ngroups_max + 1) { + LIN_SDT_PROBE1(uid16, linux_setgroups16, return, EINVAL); return (EINVAL); + } linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_TEMP, M_WAITOK); error = copyin(args->gidset, linux_gidset, ngrp * sizeof(l_gid16_t)); if (error) { + LIN_SDT_PROBE1(uid16, linux_setgroups16, copyin_error, error); + LIN_SDT_PROBE1(uid16, linux_setgroups16, return, error); free(linux_gidset, M_TEMP); return (error); } @@ -131,6 +194,9 @@ linux_setgroups16(struct thread *td, struct linux_setgroups16_args *args) if ((error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS, 0)) != 0) { PROC_UNLOCK(p); crfree(newcred); + + LIN_SDT_PROBE1(uid16, linux_setgroups16, priv_check_cred_error, + error); goto out; } @@ -154,6 +220,8 @@ linux_setgroups16(struct thread *td, struct linux_setgroups16_args *args) error = 0; out: free(linux_gidset, M_TEMP); + + LIN_SDT_PROBE1(uid16, linux_setgroups16, return, error); return (error); } @@ -165,10 +233,8 @@ linux_getgroups16(struct thread *td, struct linux_getgroups16_args *args) gid_t *bsd_gidset; int bsd_gidsetsz, ngrp, error; -#ifdef DEBUG - if (ldebug(getgroups16)) - printf(ARGS(getgroups16, "%d, *"), args->gidsetsize); -#endif + LIN_SDT_PROBE2(uid16, linux_getgroups16, entry, args->gidsetsize, + args->gidset); cred = td->td_ucred; bsd_gidset = cred->cr_groups; @@ -182,11 +248,15 @@ linux_getgroups16(struct thread *td, struct linux_getgroups16_args *args) if ((ngrp = args->gidsetsize) == 0) { td->td_retval[0] = bsd_gidsetsz; + + LIN_SDT_PROBE1(uid16, linux_getgroups16, return, 0); return (0); } - if (ngrp < bsd_gidsetsz) + if (ngrp < bsd_gidsetsz) { + LIN_SDT_PROBE1(uid16, linux_getgroups16, return, EINVAL); return (EINVAL); + } ngrp = 0; linux_gidset = malloc(bsd_gidsetsz * sizeof(*linux_gidset), @@ -198,10 +268,15 @@ linux_getgroups16(struct thread *td, struct linux_getgroups16_args *args) error = copyout(linux_gidset, args->gidset, ngrp * sizeof(l_gid16_t)); free(linux_gidset, M_TEMP); - if (error) + if (error) { + LIN_SDT_PROBE1(uid16, linux_getgroups16, copyout_error, error); + LIN_SDT_PROBE1(uid16, linux_getgroups16, return, error); return (error); + } td->td_retval[0] = ngrp; + + LIN_SDT_PROBE1(uid16, linux_getgroups16, return, 0); return (0); } @@ -219,7 +294,11 @@ int linux_getgid16(struct thread *td, struct linux_getgid16_args *args) { + LIN_SDT_PROBE0(uid16, linux_getgid16, entry); + td->td_retval[0] = td->td_ucred->cr_rgid; + + LIN_SDT_PROBE1(uid16, linux_getgid16, return, 0); return (0); } @@ -227,7 +306,11 @@ int linux_getuid16(struct thread *td, struct linux_getuid16_args *args) { + LIN_SDT_PROBE0(uid16, linux_getuid16, entry); + td->td_retval[0] = td->td_ucred->cr_ruid; + + LIN_SDT_PROBE1(uid16, linux_getuid16, return, 0); return (0); } @@ -235,74 +318,124 @@ int linux_getegid16(struct thread *td, struct linux_getegid16_args *args) { struct getegid_args bsd; + int error; - return (sys_getegid(td, &bsd)); + LIN_SDT_PROBE0(uid16, linux_getegid16, entry); + + error = sys_getegid(td, &bsd); + + LIN_SDT_PROBE1(uid16, linux_getegid16, return, error); + return (error); } int linux_geteuid16(struct thread *td, struct linux_geteuid16_args *args) { struct geteuid_args bsd; + int error; - return (sys_geteuid(td, &bsd)); + LIN_SDT_PROBE0(uid16, linux_geteuid16, entry); + + error = sys_geteuid(td, &bsd); + + LIN_SDT_PROBE1(uid16, linux_geteuid16, return, error); + return (error); } int linux_setgid16(struct thread *td, struct linux_setgid16_args *args) { struct setgid_args bsd; + int error; + + LIN_SDT_PROBE1(uid16, linux_setgid16, entry, args->gid); bsd.gid = args->gid; - return (sys_setgid(td, &bsd)); + error = sys_setgid(td, &bsd); + + LIN_SDT_PROBE1(uid16, linux_setgid16, return, error); + return (error); } int linux_setuid16(struct thread *td, struct linux_setuid16_args *args) { struct setuid_args bsd; + int error; + + LIN_SDT_PROBE1(uid16, linux_setuid16, entry, args->uid); bsd.uid = args->uid; - return (sys_setuid(td, &bsd)); + error = sys_setuid(td, &bsd); + + LIN_SDT_PROBE1(uid16, linux_setuid16, return, error); + return (error); } int linux_setregid16(struct thread *td, struct linux_setregid16_args *args) { struct setregid_args bsd; + int error; + + LIN_SDT_PROBE2(uid16, linux_setregid16, entry, args->rgid, args->egid); bsd.rgid = CAST_NOCHG(args->rgid); bsd.egid = CAST_NOCHG(args->egid); - return (sys_setregid(td, &bsd)); + error = sys_setregid(td, &bsd); + + LIN_SDT_PROBE1(uid16, linux_setregid16, return, error); + return (error); } int linux_setreuid16(struct thread *td, struct linux_setreuid16_args *args) { struct setreuid_args bsd; + int error; + + LIN_SDT_PROBE2(uid16, linux_setreuid16, entry, args->ruid, args->euid); bsd.ruid = CAST_NOCHG(args->ruid); bsd.euid = CAST_NOCHG(args->euid); - return (sys_setreuid(td, &bsd)); + error = sys_setreuid(td, &bsd); + + LIN_SDT_PROBE1(uid16, linux_setreuid16, return, error); + return (error); } int linux_setresgid16(struct thread *td, struct linux_setresgid16_args *args) { struct setresgid_args bsd; + int error; + + LIN_SDT_PROBE3(uid16, linux_setresgid16, entry, args->rgid, args->egid, + args->sgid); bsd.rgid = CAST_NOCHG(args->rgid); bsd.egid = CAST_NOCHG(args->egid); bsd.sgid = CAST_NOCHG(args->sgid); - return (sys_setresgid(td, &bsd)); + error = sys_setresgid(td, &bsd); + + LIN_SDT_PROBE1(uid16, linux_setresgid16, return, error); + return (error); } int linux_setresuid16(struct thread *td, struct linux_setresuid16_args *args) { struct setresuid_args bsd; + int error; + + LIN_SDT_PROBE3(uid16, linux_setresuid16, entry, args->ruid, args->euid, + args->suid); bsd.ruid = CAST_NOCHG(args->ruid); bsd.euid = CAST_NOCHG(args->euid); bsd.suid = CAST_NOCHG(args->suid); - return (sys_setresuid(td, &bsd)); + error = sys_setresuid(td, &bsd); + + LIN_SDT_PROBE1(uid16, linux_setresuid16, return, error); + return (error); } diff --git a/sys/compat/linux/linux_util.c b/sys/compat/linux/linux_util.c index 3c26f88d6983..76c210c591fe 100644 --- a/sys/compat/linux/linux_util.c +++ b/sys/compat/linux/linux_util.c @@ -33,16 +33,19 @@ __FBSDID("$FreeBSD$"); #include "opt_compat.h" +#include "opt_kdtrace.h" #include #include #include #include #include +#include #include #include #include #include +#include #include #include #include @@ -56,8 +59,42 @@ __FBSDID("$FreeBSD$"); #include #endif +#include + const char linux_emul_path[] = "/compat/linux"; +/* DTrace init */ +LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE); + +/** + * DTrace probes in this module. + */ +LIN_SDT_PROBE_DEFINE5(util, linux_emul_convpath, entry, "const char *", + "enum uio_seg", "char **", "int", "int"); +LIN_SDT_PROBE_DEFINE1(util, linux_emul_convpath, return, "int"); +LIN_SDT_PROBE_DEFINE1(util, linux_msg, entry, "const char *"); +LIN_SDT_PROBE_DEFINE0(util, linux_msg, return); +LIN_SDT_PROBE_DEFINE2(util, linux_driver_get_name_dev, entry, "device_t", + "const char *"); +LIN_SDT_PROBE_DEFINE0(util, linux_driver_get_name_dev, nullcall); +LIN_SDT_PROBE_DEFINE1(util, linux_driver_get_name_dev, return, "char *"); +LIN_SDT_PROBE_DEFINE3(util, linux_driver_get_major_minor, entry, "char *", + "int *", "int *"); +LIN_SDT_PROBE_DEFINE0(util, linux_driver_get_major_minor, nullcall); +LIN_SDT_PROBE_DEFINE1(util, linux_driver_get_major_minor, notfound, "char *"); +LIN_SDT_PROBE_DEFINE3(util, linux_driver_get_major_minor, return, "int", + "int", "int"); +LIN_SDT_PROBE_DEFINE0(util, linux_get_char_devices, entry); +LIN_SDT_PROBE_DEFINE1(util, linux_get_char_devices, return, "char *"); +LIN_SDT_PROBE_DEFINE1(util, linux_free_get_char_devices, entry, "char *"); +LIN_SDT_PROBE_DEFINE0(util, linux_free_get_char_devices, return); +LIN_SDT_PROBE_DEFINE1(util, linux_device_register_handler, entry, + "struct linux_device_handler *"); +LIN_SDT_PROBE_DEFINE1(util, linux_device_register_handler, return, "int"); +LIN_SDT_PROBE_DEFINE1(util, linux_device_unregister_handler, entry, + "struct linux_device_handler *"); +LIN_SDT_PROBE_DEFINE1(util, linux_device_unregister_handler, return, "int"); + /* * Search an alternate path before passing pathname arguments on to * system calls. Useful for keeping a separate 'emulation tree'. @@ -66,17 +103,19 @@ const char linux_emul_path[] = "/compat/linux"; * named file, i.e. we check if the directory it should be in exists. */ int -linux_emul_convpath(td, path, pathseg, pbuf, cflag, dfd) - struct thread *td; - const char *path; - enum uio_seg pathseg; - char **pbuf; - int cflag; - int dfd; +linux_emul_convpath(struct thread *td, const char *path, enum uio_seg pathseg, + char **pbuf, int cflag, int dfd) { + int retval; - return (kern_alternate_path(td, linux_emul_path, path, pathseg, pbuf, - cflag, dfd)); + LIN_SDT_PROBE5(util, linux_emul_convpath, entry, path, pathseg, pbuf, + cflag, dfd); + + retval = kern_alternate_path(td, linux_emul_path, path, pathseg, pbuf, + cflag, dfd); + + LIN_SDT_PROBE1(util, linux_emul_convpath, return, retval); + return (retval); } void @@ -85,12 +124,16 @@ linux_msg(const struct thread *td, const char *fmt, ...) va_list ap; struct proc *p; + LIN_SDT_PROBE1(util, linux_msg, entry, fmt); + p = td->td_proc; printf("linux: pid %d (%s): ", (int)p->p_pid, p->p_comm); va_start(ap, fmt); vprintf(fmt, ap); va_end(ap); printf("\n"); + + LIN_SDT_PROBE0(util, linux_msg, return); } struct device_element @@ -113,13 +156,23 @@ linux_driver_get_name_dev(device_t dev) struct device_element *de; const char *device_name = device_get_name(dev); - if (device_name == NULL) + LIN_SDT_PROBE2(util, linux_driver_get_name_dev, entry, dev, + device_name); + + if (device_name == NULL) { + LIN_SDT_PROBE0(util, linux_driver_get_name_dev, nullcall); + LIN_SDT_PROBE1(util, linux_driver_get_name_dev, return, NULL); return NULL; + } TAILQ_FOREACH(de, &devices, list) { - if (strcmp(device_name, de->entry.bsd_driver_name) == 0) + if (strcmp(device_name, de->entry.bsd_driver_name) == 0) { + LIN_SDT_PROBE1(util, linux_driver_get_name_dev, return, + de->entry.linux_driver_name); return (de->entry.linux_driver_name); + } } + LIN_SDT_PROBE1(util, linux_driver_get_name_dev, return, NULL); return NULL; } @@ -128,8 +181,15 @@ linux_driver_get_major_minor(const char *node, int *major, int *minor) { struct device_element *de; - if (node == NULL || major == NULL || minor == NULL) + LIN_SDT_PROBE3(util, linux_driver_get_major_minor, entry, node, major, + minor); + + if (node == NULL || major == NULL || minor == NULL) { + LIN_SDT_PROBE0(util, linux_driver_get_major_minor, nullcall); + LIN_SDT_PROBE3(util, linux_driver_get_major_minor, return, 1, + 0, 0); return 1; + } if (strlen(node) > strlen("pts/") && strncmp(node, "pts/", strlen("pts/")) == 0) { @@ -143,6 +203,9 @@ linux_driver_get_major_minor(const char *node, int *major, int *minor) devno = strtoul(node + strlen("pts/"), NULL, 10); *major = 136 + (devno / 256); *minor = devno % 256; + + LIN_SDT_PROBE3(util, linux_driver_get_major_minor, return, 0, + *major, *minor); return 0; } @@ -150,10 +213,15 @@ linux_driver_get_major_minor(const char *node, int *major, int *minor) if (strcmp(node, de->entry.bsd_device_name) == 0) { *major = de->entry.linux_major; *minor = de->entry.linux_minor; + + LIN_SDT_PROBE3(util, linux_driver_get_major_minor, + return, 0, *major, *minor); return 0; } } + LIN_SDT_PROBE1(util, linux_driver_get_major_minor, notfound, node); + LIN_SDT_PROBE3(util, linux_driver_get_major_minor, return, 1, 0, 0); return 1; } @@ -165,6 +233,8 @@ linux_get_char_devices() char formated[256]; int current_size = 0, string_size = 1024; + LIN_SDT_PROBE0(util, linux_get_char_devices, entry); + string = malloc(string_size, M_LINUX, M_WAITOK); string[0] = '\000'; last = ""; @@ -191,13 +261,19 @@ linux_get_char_devices() } } + LIN_SDT_PROBE1(util, linux_get_char_devices, return, string); return string; } void linux_free_get_char_devices(char *string) { + + LIN_SDT_PROBE1(util, linux_get_char_devices, entry, string); + free(string, M_LINUX); + + LIN_SDT_PROBE0(util, linux_get_char_devices, return); } static int linux_major_starting = 200; @@ -207,11 +283,15 @@ linux_device_register_handler(struct linux_device_handler *d) { struct device_element *de; - if (d == NULL) - return (EINVAL); + LIN_SDT_PROBE1(util, linux_device_register_handler, entry, d); - de = malloc(sizeof(*de), - M_LINUX, M_WAITOK); + if (d == NULL) { + LIN_SDT_PROBE1(util, linux_device_register_handler, return, + EINVAL); + return (EINVAL); + } + + de = malloc(sizeof(*de), M_LINUX, M_WAITOK); if (d->linux_major < 0) { d->linux_major = linux_major_starting++; } @@ -220,6 +300,7 @@ linux_device_register_handler(struct linux_device_handler *d) /* Add the element to the list, sorted on span. */ TAILQ_INSERT_TAIL(&devices, de, list); + LIN_SDT_PROBE1(util, linux_device_register_handler, return, 0); return (0); } @@ -228,16 +309,25 @@ linux_device_unregister_handler(struct linux_device_handler *d) { struct device_element *de; - if (d == NULL) + LIN_SDT_PROBE1(util, linux_device_unregister_handler, entry, d); + + if (d == NULL) { + LIN_SDT_PROBE1(util, linux_device_unregister_handler, return, + EINVAL); return (EINVAL); + } TAILQ_FOREACH(de, &devices, list) { if (bcmp(d, &de->entry, sizeof(*d)) == 0) { TAILQ_REMOVE(&devices, de, list); free(de, M_LINUX); + + LIN_SDT_PROBE1(util, linux_device_unregister_handler, + return, 0); return (0); } } + LIN_SDT_PROBE1(util, linux_device_unregister_handler, return, EINVAL); return (EINVAL); } diff --git a/sys/compat/linux/linux_util.h b/sys/compat/linux/linux_util.h index 2908a0fe4e2a..6be0392f75ac 100644 --- a/sys/compat/linux/linux_util.h +++ b/sys/compat/linux/linux_util.h @@ -68,15 +68,23 @@ int linux_emul_convpath(struct thread *, const char *, enum uio_seg, char **, in #define LFREEPATH(path) free(path, M_TEMP) #define DUMMY(s) \ +LIN_SDT_PROBE_DEFINE0(dummy, s, entry); \ +LIN_SDT_PROBE_DEFINE0(dummy, s, not_implemented); \ +LIN_SDT_PROBE_DEFINE1(dummy, s, return, "int"); \ int \ linux_ ## s(struct thread *td, struct linux_ ## s ## _args *args) \ { \ static pid_t pid; \ \ + LIN_SDT_PROBE0(dummy, s, entry); \ + \ if (pid != td->td_proc->p_pid) { \ linux_msg(td, "syscall %s not implemented", #s); \ + LIN_SDT_PROBE0(dummy, s, not_implemented); \ pid = td->td_proc->p_pid; \ }; \ + \ + LIN_SDT_PROBE1(dummy, s, return, ENOSYS); \ return (ENOSYS); \ } \ struct __hack diff --git a/sys/compat/linux/stats_timing.d b/sys/compat/linux/stats_timing.d new file mode 100644 index 000000000000..d0b6f73c654c --- /dev/null +++ b/sys/compat/linux/stats_timing.d @@ -0,0 +1,94 @@ +#!/usr/sbin/dtrace -qs + +/*- + * Copyright (c) 2008-2012 Alexander Leidinger + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +/** + * Some statistics (all per provider): + * - number of calls to a function per executable binary (not per PID!) + * - allows to see where an optimization would be beneficial for a given + * application + * - graph of CPU time spend in functions per executable binary + * - together with the number of calls to this function this allows + * to determine if a kernel optimization would be beneficial / is + * possible for a given application + * - graph of longest running (CPU-time!) function in total + * - may help finding problem cases in the kernel code + * - timing statistics for the emul_lock + * - graph of longest held (CPU-time!) locks + */ + +#pragma D option dynvarsize=32m + +linuxulator*:::entry +{ + self->time[probefunc] = vtimestamp; + @calls[probeprov, execname, probefunc] = count(); +} + +linuxulator*:::return +/self->time[probefunc] != 0/ +{ + this->timediff = self->time[probefunc] - vtimestamp; + + @stats[probeprov, execname, probefunc] = quantize(this->timediff); + @longest[probeprov, probefunc] = max(this->timediff); + + self->time[probefunc] = 0; +} + +linuxulator*:::locked +{ + self->lock[arg0] = vtimestamp; +} + +linuxulator*:::unlock +/self->lock[arg0] != 0/ +{ + this->timediff = self->lock[arg0] - vtimestamp; + + @lockstats[probefunc] = quantize(this->timediff); + @longlock[probefunc] = max(this->timediff); + + self->lock[arg0] = 0; +} + +END +{ + printf("Number of calls per provider/application/kernel function:"); + printa(@calls); + printf("CPU-timing statistics per provider/application/kernel function (in ns):"); + printa(@stats); + printf("Longest running (CPU-time!) functions per provider (in ns):"); + printa(@longest); + printf("Lock CPU-timing statistics:"); + printa(@lockstats); + printf("Longest running (CPU-time!) locks:"); + printa(@longlock); +} + diff --git a/sys/compat/linux/trace_futexes.d b/sys/compat/linux/trace_futexes.d new file mode 100644 index 000000000000..bd9dac6b1c52 --- /dev/null +++ b/sys/compat/linux/trace_futexes.d @@ -0,0 +1,182 @@ +#!/usr/sbin/dtrace -qs + +/*- + * Copyright (c) 2011-2012 Alexander Leidinger + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +/** + * Trace futex operations: + * - internal locks + * - size of the futex list + * - report error conditions (emulation errors, kernel errors, + * programming errors) + * - execution time (wallclock) of futex related functions + */ + +#pragma D option specsize=32m + +/* Error conditions */ +linuxulator*:futex:futex_get:error, +linuxulator*:futex:futex_sleep:requeue_error, +linuxulator*:futex:futex_sleep:sleep_error, +linuxulator*:futex:futex_wait:copyin_error, +linuxulator*:futex:futex_wait:itimerfix_error, +linuxulator*:futex:futex_wait:sleep_error, +linuxulator*:futex:futex_atomic_op:missing_access_check, +linuxulator*:futex:futex_atomic_op:unimplemented_op, +linuxulator*:futex:futex_atomic_op:unimplemented_cmp, +linuxulator*:futex:linux_sys_futex:unimplemented_clockswitch, +linuxulator*:futex:linux_sys_futex:copyin_error, +linuxulator*:futex:linux_sys_futex:unhandled_efault, +linuxulator*:futex:linux_sys_futex:unimplemented_lock_pi, +linuxulator*:futex:linux_sys_futex:unimplemented_unlock_pi, +linuxulator*:futex:linux_sys_futex:unimplemented_trylock_pi, +linuxulator*:futex:linux_sys_futex:unimplemented_wait_requeue_pi, +linuxulator*:futex:linux_sys_futex:unimplemented_cmp_requeue_pi, +linuxulator*:futex:linux_sys_futex:unknown_operation, +linuxulator*:futex:linux_get_robust_list:copyout_error, +linuxulator*:futex:handle_futex_death:copyin_error, +linuxulator*:futex:fetch_robust_entry:copyin_error, +linuxulator*:futex:release_futexes:copyin_error +{ + printf("ERROR: %s in %s:%s:%s\n", probename, probeprov, probemod, + probefunc); + stack(); + ustack(); +} + +linuxulator*:futex:linux_sys_futex:invalid_cmp_requeue_use, +linuxulator*:futex:linux_sys_futex:deprecated_requeue, +linuxulator*:futex:linux_set_robust_list:size_error +{ + printf("WARNING: %s:%s:%s:%s in application %s, maybe an application error?\n", + probename, probeprov, probemod, probefunc, execname); + stack(); + ustack(); +} + + +/* Per futex checks/statistics */ + +linuxulator*:futex:futex:create +{ + ++futex_count; + @max_futexes = max(futex_count); +} + +linuxulator*:futex:futex:destroy +/futex_count == 0/ +{ + printf("ERROR: Request to destroy a futex which was not created,\n"); + printf(" or this script was started after some futexes where\n"); + printf(" created. Stack trace:\n"); + stack(); + ustack(); +} + +linuxulator*:futex:futex:destroy +{ + --futex_count; +} + + +/* Internal locks */ + +linuxulator*:locks:futex_mtx:locked +{ + ++check[probefunc, arg0]; + @stats[probefunc] = count(); + + ts[probefunc] = timestamp; + spec[probefunc] = speculation(); + printf("Stacktrace of last lock operation of the %s:\n", probefunc); + stack(); +} + +linuxulator*:locks:futex_mtx:unlock +/check[probefunc, arg0] == 0/ +{ + printf("ERROR: unlock attemt of unlocked %s (%p),", probefunc, arg0); + printf(" missing SDT probe in kernel, or dtrace program started"); + printf(" while the %s was already held (race condition).", probefunc); + printf(" Stack trace follows:"); + stack(); +} + +linuxulator*:locks:futex_mtx:unlock +{ + discard(spec[probefunc]); + spec[probefunc] = 0; + --check[probefunc, arg0]; +} + +/* Timeout handling for internal locks */ + +tick-10s +/spec["futex_mtx"] != 0 && timestamp - ts["futex_mtx"] >= 9999999000/ +{ + commit(spec["futex_mtx"]); + spec["futex_mtx"] = 0; +} + + +/* Timing statistings */ + +linuxulator*:futex::entry +{ + self->time[probefunc] = timestamp; + @calls[probeprov, execname, probefunc] = count(); +} + +linuxulator*:futex::return +/self->time[probefunc] != 0/ +{ + this->timediff = self->time[probefunc] - timestamp; + + @timestats[probeprov, execname, probefunc] = quantize(this->timediff); + @longest[probeprov, probefunc] = max(this->timediff); + + self->time[probefunc] = 0; +} + + +/* Statistics */ + +END +{ + printf("Number of locks per type:"); + printa(@stats); + printf("Number of maximum number of futexes in the futex list:"); + printa(@max_futexes); + printf("Number of futexes still existing: %d", futex_count); + printf("Number of calls per provider/application/kernel function:"); + printa(@calls); + printf("Wallclock-timing statistics per provider/application/kernel function (in ns):"); + printa(@timestats); + printf("Longest running (wallclock!) functions per provider (in ns):"); + printa(@longest); +} diff --git a/sys/i386/linux/linux.h b/sys/i386/linux/linux.h index 8b5bcbfd87d5..799259c653d7 100644 --- a/sys/i386/linux/linux.h +++ b/sys/i386/linux/linux.h @@ -42,6 +42,7 @@ extern u_char linux_debug_map[]; #define ldebug(name) isclr(linux_debug_map, LINUX_SYS_linux_ ## name) #define ARGS(nm, fmt) "linux(%ld): "#nm"("fmt")\n", (long)td->td_proc->p_pid #define LMSG(fmt) "linux(%ld): "fmt"\n", (long)td->td_proc->p_pid +#define LINUX_DTRACE linuxulator #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_LINUX); diff --git a/sys/i386/linux/linux_dummy.c b/sys/i386/linux/linux_dummy.c index 31bbf6fb32b8..ab77790c3e5b 100644 --- a/sys/i386/linux/linux_dummy.c +++ b/sys/i386/linux/linux_dummy.c @@ -29,14 +29,23 @@ #include __FBSDID("$FreeBSD$"); +#include "opt_compat.h" +#include "opt_kdtrace.h" + #include +#include +#include #include #include #include #include +#include #include +/* DTrace init */ +LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE); + DUMMY(stime); DUMMY(fstat); DUMMY(olduname);