548 lines
13 KiB
Plaintext
548 lines
13 KiB
Plaintext
|
#!/usr/bin/ksh
|
||
|
#
|
||
|
# dexplorer - DTrace system explorer, runs a collection of scripts.
|
||
|
# Written using DTrace (Solaris 10 3/05).
|
||
|
#
|
||
|
# This program automatically runs a collection of DTrace scripts to examine
|
||
|
# many areas of the system, and places the output in a meaningful directory
|
||
|
# structure that is tar'd and gzip'd.
|
||
|
#
|
||
|
# $Id: dexplorer 3 2007-08-01 10:50:08Z brendan $
|
||
|
#
|
||
|
# USAGE: dexplorer [-yDT] [-d outputdir] [-i interval]
|
||
|
#
|
||
|
# -q # quiet mode
|
||
|
# -y # "yes", don't prompt for confirmation
|
||
|
# -D # don't delete output dir
|
||
|
# -T # don't create output tar.gz
|
||
|
# -d outputdir # output directory
|
||
|
# -i interval # interval for each sample
|
||
|
# eg,
|
||
|
# dexplorer # default is 5 second samples
|
||
|
# dexplorer -y -i30 # no prompting, with 30 second samples
|
||
|
#
|
||
|
# SEE ALSO: DTraceToolkit
|
||
|
#
|
||
|
# THANKS: David Visser, et all. for the idea and encouragement.
|
||
|
#
|
||
|
# COPYRIGHT: Copyright (c) 2005 Brendan Gregg.
|
||
|
#
|
||
|
# CDDL HEADER START
|
||
|
#
|
||
|
# The contents of this file are subject to the terms of the
|
||
|
# Common Development and Distribution License, Version 1.0 only
|
||
|
# (the "License"). You may not use this file except in compliance
|
||
|
# with the License.
|
||
|
#
|
||
|
# You can obtain a copy of the license at Docs/cddl1.txt
|
||
|
# or http://www.opensolaris.org/os/licensing.
|
||
|
# See the License for the specific language governing permissions
|
||
|
# and limitations under the License.
|
||
|
#
|
||
|
# CDDL HEADER END
|
||
|
#
|
||
|
# CODE:
|
||
|
#
|
||
|
# This is currently a monolithic script, and while it contains only
|
||
|
# a few dozen straigftforward DTrace scripts I think it's desirable to
|
||
|
# keep it that way. The scripts themselves have designed to be very
|
||
|
# generic (eg, switching on all sdt:::), and are aggregations to keep a
|
||
|
# limit on the size of the output.
|
||
|
#
|
||
|
# Author: Brendan Gregg [Sydney, Australia]
|
||
|
#
|
||
|
# 23-Jun-2005 Brendan Gregg Created this.
|
||
|
# 28-Jun-2005 " " Last update.
|
||
|
|
||
|
#
|
||
|
# Default variables
|
||
|
#
|
||
|
interval=5 # time of each sample
|
||
|
verbose=1 # print screen output
|
||
|
prompt=1 # prompt before run
|
||
|
tar=1 # create tar file
|
||
|
delete=1 # delete output dirs
|
||
|
dtrace=/usr/sbin/dtrace # path to dtrace
|
||
|
root=. # default output dir
|
||
|
PATH=/usr/bin:/usr/sbin # safe path
|
||
|
dir=de_`uname -n`_`date +%Y%m%d%H%M` # OUTPUT FILENAME
|
||
|
samples=20 # max number of tests
|
||
|
current=0 # current sample
|
||
|
|
||
|
#
|
||
|
# Process options
|
||
|
#
|
||
|
while getopts d:hi:qyDT name
|
||
|
do
|
||
|
case $name in
|
||
|
d) root=$OPTARG ;;
|
||
|
i) interval=$OPTARG ;;
|
||
|
q) verbose=0 ;;
|
||
|
y) prompt=0 ;;
|
||
|
D) delete=0 ;;
|
||
|
T) tar=0 ;;
|
||
|
h|?) cat <<-END >&2
|
||
|
USAGE: dexplorer [-qyDT] [-d outputdir] [-i interval]
|
||
|
|
||
|
-q # quiet mode
|
||
|
-y # "yes", don't prompt for confirmation
|
||
|
-D # don't delete output dir
|
||
|
-T # don't create output tar.gz
|
||
|
-d outputdir # output directory
|
||
|
-i interval # interval for each sample
|
||
|
eg,
|
||
|
dexplorer # default is 5 second samples
|
||
|
dexplorer -y -i30 # no prompting, with 30 second samples
|
||
|
END
|
||
|
exit 1
|
||
|
esac
|
||
|
done
|
||
|
shift $(( OPTIND - 1 ))
|
||
|
|
||
|
#
|
||
|
# Confirm path
|
||
|
#
|
||
|
if [[ "$prompt" == "1" ]] ; then
|
||
|
if [[ "$root" == "." ]]; then
|
||
|
print "Output dir will be the current dir ($PWD)."
|
||
|
else
|
||
|
print "Output dir will be $root"
|
||
|
fi
|
||
|
print -n "Hit enter for yes, or type path: "
|
||
|
read ans junk
|
||
|
if [[ "$ans" == [yY] || "$ans" == [yY]es ]]; then
|
||
|
print "WARNING: I didn't ask for \"$ans\"!"
|
||
|
print "\tI was asking for the path or just enter."
|
||
|
print "\tignoring \"$ans\"..."
|
||
|
fi
|
||
|
if [[ "$ans" != "" ]]; then
|
||
|
root=$ans
|
||
|
print "Output is now $root."
|
||
|
fi
|
||
|
fi
|
||
|
|
||
|
#
|
||
|
# Sanity checks
|
||
|
#
|
||
|
if [[ "$interval" == *[a-zA-Z]* ]]; then
|
||
|
print "ERROR2: Invalid interval $interval.\n"
|
||
|
print "Please use a number of seconds."
|
||
|
exit 2
|
||
|
fi
|
||
|
if (( ${#interval} < 1 )); then
|
||
|
print "ERROR3: Length of interval $interval too short.\n"
|
||
|
print "Minimum 1 second."
|
||
|
exit 3
|
||
|
fi
|
||
|
if [[ ! -d "$root" ]]; then
|
||
|
print "ERROR4: Output directory \"$root\" does not exist.\n"
|
||
|
print "Perhaps try a mkdir first?"
|
||
|
print "or use an existing dir, eg \"/tmp\""
|
||
|
exit 4
|
||
|
fi
|
||
|
if [[ ! -w "$root" ]]; then
|
||
|
print "ERROR5: Can't write to output directory \"$root\".\n"
|
||
|
print "Are you logged in as root?"
|
||
|
print "Perhaps try another directory, eg \"/tmp\""
|
||
|
exit 5
|
||
|
fi
|
||
|
if [[ `$dtrace -b1k -qn 'BEGIN { trace(pid); exit(0); }'` == "" ]]; then
|
||
|
print "ERROR6: Unable to run dtrace!\n"
|
||
|
print "Perhaps this is a permission problem? Try running as root."
|
||
|
exit 6
|
||
|
fi
|
||
|
|
||
|
# calculate total time
|
||
|
(( total = interval * samples ))
|
||
|
if (( total > 180 )); then
|
||
|
(( total = total / 60 ))
|
||
|
total="$total minutes"
|
||
|
else
|
||
|
total="$total seconds"
|
||
|
fi
|
||
|
|
||
|
#
|
||
|
# Common Functions
|
||
|
#
|
||
|
function decho {
|
||
|
if (( verbose )); then print "$*"; fi
|
||
|
}
|
||
|
clean="sed /^\$/d"
|
||
|
header='dtrace:::BEGIN {
|
||
|
printf("%Y, ", walltimestamp);
|
||
|
printf("%s %s %s %s %s, ", `utsname.sysname, `utsname.nodename,
|
||
|
`utsname.release, `utsname.version, `utsname.machine);
|
||
|
printf("%d secs\n",'$interval');
|
||
|
}
|
||
|
profile:::tick-'$interval'sec { exit(0); }
|
||
|
'
|
||
|
function dstatus {
|
||
|
if (( verbose )); then
|
||
|
(( percent = current * 100 / samples ))
|
||
|
printf "%3d%% $*\n" $percent
|
||
|
(( current = current + 1 ))
|
||
|
fi
|
||
|
}
|
||
|
|
||
|
########################################
|
||
|
# START #
|
||
|
########################################
|
||
|
|
||
|
#
|
||
|
# Make dirs
|
||
|
#
|
||
|
err=0
|
||
|
cd $root
|
||
|
(( err = err + $? ))
|
||
|
mkdir $dir
|
||
|
(( err = err + $? ))
|
||
|
cd $dir
|
||
|
(( err = err + $? ))
|
||
|
base1=${PWD##*/}
|
||
|
base2=${dir##*/}
|
||
|
if [[ "$base1" != "$base2" || "$err" != "0" ]]; then
|
||
|
print "ERROR7: tried to mkdir $dir from $root, but something failed.\n"
|
||
|
print "Check directories before rerunning."
|
||
|
exit 7
|
||
|
fi
|
||
|
mkdir Cpu
|
||
|
mkdir Disk
|
||
|
mkdir Mem
|
||
|
mkdir Net
|
||
|
mkdir Proc
|
||
|
mkdir Info
|
||
|
|
||
|
#
|
||
|
# Create Log
|
||
|
#
|
||
|
decho "Starting dexplorer ver 0.76."
|
||
|
decho "Sample interval is $interval seconds. Total run is > $total."
|
||
|
( print "dexplorer ver 0.76\n------------------"
|
||
|
print -n "System: "
|
||
|
uname -a
|
||
|
print -n "Start: "
|
||
|
date ) > log
|
||
|
|
||
|
#
|
||
|
# Capture Standard Info
|
||
|
#
|
||
|
args='pid,ppid,uid,gid,projid,zoneid,pset,pri,nice,'
|
||
|
args=$args'class,vsz,rss,time,pcpu,pmem,args'
|
||
|
uname -a > Info/uname-a # System
|
||
|
psrinfo -v > Info/psrinfo-v # CPU
|
||
|
prtconf > Info/prtconf # Memory (+ devices)
|
||
|
df -k > Info/df-k # Disk
|
||
|
ifconfig -a > Info/ifconfig-a # Network
|
||
|
ps -eo $args > Info/ps-o # Processes
|
||
|
uptime > Info/uptime # Load
|
||
|
|
||
|
#
|
||
|
# Cpu Tests, DTrace
|
||
|
#
|
||
|
|
||
|
dstatus "Interrupts by CPU..."
|
||
|
$dtrace -qn "$header"'
|
||
|
sdt:::interrupt-start { @num[cpu] = count(); }
|
||
|
dtrace:::END
|
||
|
{
|
||
|
printf("%-16s %16s\n", "CPU", "INTERRUPTS");
|
||
|
printa("%-16d %@16d\n", @num);
|
||
|
}
|
||
|
' | $clean > Cpu/interrupt_by_cpu
|
||
|
|
||
|
dstatus "Interrupt times..."
|
||
|
$dtrace -qn "$header"'
|
||
|
sdt:::interrupt-start { self->ts = vtimestamp; }
|
||
|
sdt:::interrupt-complete
|
||
|
/self->ts && arg0 != 0/
|
||
|
{
|
||
|
this->devi = (struct dev_info *)arg0;
|
||
|
self->name = this->devi != 0 ?
|
||
|
stringof(`devnamesp[this->devi->devi_major].dn_name) : "?";
|
||
|
this->inst = this->devi != 0 ? this->devi->devi_instance : 0;
|
||
|
@num[self->name, this->inst] = sum(vtimestamp - self->ts);
|
||
|
self->name = 0;
|
||
|
}
|
||
|
sdt:::interrupt-complete { self->ts = 0; }
|
||
|
dtrace:::END
|
||
|
{
|
||
|
printf("%11s %16s\n", "DEVICE", "TIME (ns)");
|
||
|
printa("%10s%-3d %@16d\n", @num);
|
||
|
}
|
||
|
' | $clean > Cpu/interrupt_time
|
||
|
|
||
|
dstatus "Dispatcher queue length by CPU..."
|
||
|
$dtrace -qn "$header"'
|
||
|
profile:::profile-1000
|
||
|
{
|
||
|
this->num = curthread->t_cpu->cpu_disp->disp_nrunnable;
|
||
|
@length[cpu] = lquantize(this->num, 0, 100, 1);
|
||
|
}
|
||
|
dtrace:::END { printa(" CPU %d%@d\n", @length); }
|
||
|
' | $clean > Cpu/dispqlen_by_cpu
|
||
|
|
||
|
dstatus "Sdt counts..."
|
||
|
$dtrace -qn "$header"'
|
||
|
sdt:::{ @num[probefunc, probename] = count(); }
|
||
|
dtrace:::END
|
||
|
{
|
||
|
printf("%-32s %-32s %10s\n", "FUNC", "NAME", "COUNT");
|
||
|
printa("%-32s %-32s %@10d\n", @num);
|
||
|
}
|
||
|
' | $clean > Cpu/sdt_count
|
||
|
|
||
|
#
|
||
|
# Disk Tests, DTrace
|
||
|
#
|
||
|
|
||
|
dstatus "Pages paged in by process..."
|
||
|
$dtrace -qn "$header"'
|
||
|
vminfo:::pgpgin { @pg[pid, execname] = sum(arg0); }
|
||
|
dtrace:::END
|
||
|
{
|
||
|
printf("%6s %-16s %16s\n", "PID", "CMD", "PAGES");
|
||
|
printa("%6d %-16s %@16d\n", @pg);
|
||
|
}
|
||
|
' | $clean > Disk/pgpgin_by_process
|
||
|
|
||
|
dstatus "Files opened successfully count..."
|
||
|
$dtrace -qn "$header"'
|
||
|
syscall::open*:entry { self->file = copyinstr(arg0); self->ok = 1; }
|
||
|
syscall::open*:return /self->ok && arg0 != -1/
|
||
|
{
|
||
|
@num[self->file] = count();
|
||
|
}
|
||
|
syscall::open*:return /self->ok/ { self->file = 0; self->ok = 0; }
|
||
|
dtrace:::END
|
||
|
{
|
||
|
printf("%-64s %8s\n", "FILE", "COUNT");
|
||
|
printa("%-64s %@8d\n", @num);
|
||
|
}
|
||
|
' | $clean > Disk/fileopen_count
|
||
|
|
||
|
dstatus "Disk I/O size distribution by process..."
|
||
|
$dtrace -qn "$header"'
|
||
|
io:::start { @size[pid, execname] = quantize(args[0]->b_bcount); }
|
||
|
' | $clean > Disk/sizedist_by_process
|
||
|
|
||
|
#
|
||
|
# Mem Tests, DTrace
|
||
|
#
|
||
|
|
||
|
dstatus "Minor faults by process..."
|
||
|
$dtrace -qn "$header"'
|
||
|
vminfo:::as_fault { @mem[pid, execname] = sum(arg0); }
|
||
|
dtrace:::END
|
||
|
{
|
||
|
printf("%6s %-16s %16s\n", "PID", "CMD", "MINFAULTS");
|
||
|
printa("%6d %-16s %@16d\n", @mem);
|
||
|
}
|
||
|
' | $clean > Mem/minf_by_process
|
||
|
|
||
|
|
||
|
dstatus "Vminfo data by process..."
|
||
|
$dtrace -qn "$header"'
|
||
|
vminfo::: { @data[pid, execname, probename] = sum(arg0); }
|
||
|
dtrace:::END
|
||
|
{
|
||
|
printf("%6s %-16s %-16s %16s\n",
|
||
|
"PID", "CMD", "STATISTIC", "VALUE");
|
||
|
printa("%6d %-16s %-16s %@16d\n", @data);
|
||
|
}
|
||
|
' | $clean > Mem/vminfo_by_process
|
||
|
|
||
|
#
|
||
|
# Net Tests, DTrace
|
||
|
#
|
||
|
|
||
|
dstatus "Mib data by mib statistic..."
|
||
|
$dtrace -qn "$header"'
|
||
|
mib::: { @data[probename] = sum(arg0); }
|
||
|
dtrace:::END
|
||
|
{
|
||
|
printf("%-32s %16s\n", "STATISTIC", "VALUE");
|
||
|
printa("%-32s %@16d\n", @data);
|
||
|
}
|
||
|
' | $clean > Net/mib_data
|
||
|
|
||
|
dstatus "TCP write bytes by process..."
|
||
|
$dtrace -qn "$header"'
|
||
|
fbt:ip:tcp_output:entry
|
||
|
{
|
||
|
this->size = msgdsize(args[1]);
|
||
|
@size[pid, execname] = sum(this->size);
|
||
|
}
|
||
|
dtrace:::END
|
||
|
{
|
||
|
printf("%6s %-16s %12s\n", "PID", "CMD", "BYTES");
|
||
|
printa("%6d %-16s %@12d\n", @size);
|
||
|
}
|
||
|
' | $clean > Net/tcpw_by_process
|
||
|
|
||
|
#
|
||
|
# Proc Tests, DTrace
|
||
|
#
|
||
|
|
||
|
dstatus "Sample process @ 1000 Hz..."
|
||
|
$dtrace -qn "$header"'
|
||
|
profile:::profile-1000
|
||
|
{
|
||
|
@num[pid, curpsinfo->pr_psargs] = count();
|
||
|
}
|
||
|
dtrace:::END
|
||
|
{
|
||
|
printf("%6s %12s %s\n", "PID", "SAMPLES", "ARGS");
|
||
|
printa("%6d %@12d %S\n", @num);
|
||
|
}
|
||
|
' | $clean > Proc/sample_process
|
||
|
|
||
|
dstatus "Syscall count by process..."
|
||
|
$dtrace -qn "$header"'
|
||
|
syscall:::entry { @num[pid, execname, probefunc] = count(); }
|
||
|
dtrace:::END
|
||
|
{
|
||
|
printf("%6s %-24s %-24s %8s\n",
|
||
|
"PID", "CMD", "SYSCALL", "COUNT");
|
||
|
printa("%6d %-24s %-24s %@8d\n", @num);
|
||
|
}
|
||
|
' | $clean > Proc/syscall_by_process
|
||
|
|
||
|
dstatus "Syscall count by syscall..."
|
||
|
$dtrace -qn "$header"'
|
||
|
syscall:::entry { @num[probefunc] = count(); }
|
||
|
dtrace:::END
|
||
|
{
|
||
|
printf("%-32s %16s\n", "SYSCALL", "COUNT");
|
||
|
printa("%-32s %@16d\n", @num);
|
||
|
}
|
||
|
' | $clean > Proc/syscall_count
|
||
|
|
||
|
dstatus "Read bytes by process..."
|
||
|
$dtrace -qn "$header"'
|
||
|
sysinfo:::readch { @bytes[pid, execname] = sum(arg0); }
|
||
|
dtrace:::END
|
||
|
{
|
||
|
printf("%6s %-16s %16s\n", "PID", "CMD", "BYTES");
|
||
|
printa("%6d %-16s %@16d\n", @bytes);
|
||
|
}
|
||
|
' | $clean > Proc/readb_by_process
|
||
|
|
||
|
dstatus "Write bytes by process..."
|
||
|
$dtrace -qn "$header"'
|
||
|
sysinfo:::writech { @bytes[pid, execname] = sum(arg0); }
|
||
|
dtrace:::END
|
||
|
{
|
||
|
printf("%6s %-16s %16s\n", "PID", "CMD", "BYTES");
|
||
|
printa("%6d %-16s %@16d\n", @bytes);
|
||
|
}
|
||
|
' | $clean > Proc/writeb_by_process
|
||
|
|
||
|
dstatus "Sysinfo counts by process..."
|
||
|
$dtrace -qn "$header"'
|
||
|
sysinfo::: { @num[pid, execname, probename] = sum(arg0); }
|
||
|
dtrace:::END
|
||
|
{
|
||
|
printf("%6s %-16s %-16s %16s\n",
|
||
|
"PID", "CMD", "STATISTIC", "COUNT");
|
||
|
printa("%6d %-16s %-16s %@16d\n", @num);
|
||
|
}
|
||
|
' | $clean > Proc/sysinfo_by_process
|
||
|
|
||
|
dstatus "New process counts with arguments..."
|
||
|
$dtrace -qn "$header"'
|
||
|
proc:::exec-success
|
||
|
{
|
||
|
@num[pid, ppid, curpsinfo->pr_psargs] = count();
|
||
|
}
|
||
|
dtrace:::END
|
||
|
{
|
||
|
printf("%6s %6s %8s %s\n", "PID", "PPID", "COUNT", "ARGS");
|
||
|
printa("%6d %6d %@8d %S\n", @num);
|
||
|
}
|
||
|
' | $clean > Proc/newprocess_count
|
||
|
|
||
|
dstatus "Signal counts..."
|
||
|
$dtrace -qn "$header"'
|
||
|
proc:::signal-send {
|
||
|
@num[execname,args[2],stringof(args[1]->pr_fname)] = count();
|
||
|
}
|
||
|
dtrace:::END
|
||
|
{
|
||
|
printf("%-16s %-8s %-16s %8s\n",
|
||
|
"FROM", "SIG", "TO", "COUNT");
|
||
|
printa("%-16s %-8d %-16s %@8d\n", @num);
|
||
|
}
|
||
|
' | $clean > Proc/signal_count
|
||
|
|
||
|
dstatus "Syscall error counts..."
|
||
|
$dtrace -qn "$header"'
|
||
|
syscall:::return /(int)arg0 == -1/
|
||
|
{
|
||
|
@num[pid, execname, probefunc, errno] = count();
|
||
|
}
|
||
|
dtrace:::END
|
||
|
{
|
||
|
printf("%6s %-16s %-32s %-6s %8s\n",
|
||
|
"PID", "CMD", "SYSCALL", "ERRNO", "COUNT");
|
||
|
printa("%6d %-16s %-32s %-6d %@8d\n", @num);
|
||
|
}
|
||
|
' | $clean > Proc/syscall_errors
|
||
|
|
||
|
|
||
|
###########
|
||
|
# Done
|
||
|
#
|
||
|
( print -n "End: "
|
||
|
date ) >> log
|
||
|
decho "100% Done."
|
||
|
if (( tar )); then
|
||
|
cd ..
|
||
|
tar cf $dir.tar $dir
|
||
|
gzip $dir.tar
|
||
|
decho "File is $dir.tar.gz"
|
||
|
fi
|
||
|
if (( delete && tar )); then
|
||
|
cd $dir
|
||
|
# this could be all an "rm -r $dir", but since it will be run
|
||
|
# as root on production servers - lets be analy cautious,
|
||
|
rm Cpu/interrupt_by_cpu
|
||
|
rm Cpu/interrupt_time
|
||
|
rm Cpu/dispqlen_by_cpu
|
||
|
rm Cpu/sdt_count
|
||
|
rm Disk/pgpgin_by_process
|
||
|
rm Disk/fileopen_count
|
||
|
rm Disk/sizedist_by_process
|
||
|
rm Mem/minf_by_process
|
||
|
rm Mem/vminfo_by_process
|
||
|
rm Net/mib_data
|
||
|
rm Net/tcpw_by_process
|
||
|
rm Proc/sample_process
|
||
|
rm Proc/syscall_by_process
|
||
|
rm Proc/syscall_count
|
||
|
rm Proc/readb_by_process
|
||
|
rm Proc/writeb_by_process
|
||
|
rm Proc/sysinfo_by_process
|
||
|
rm Proc/newprocess_count
|
||
|
rm Proc/signal_count
|
||
|
rm Proc/syscall_errors
|
||
|
rmdir Cpu
|
||
|
rmdir Disk
|
||
|
rmdir Mem
|
||
|
rmdir Net
|
||
|
rmdir Proc
|
||
|
rm Info/uname-a
|
||
|
rm Info/psrinfo-v
|
||
|
rm Info/prtconf
|
||
|
rm Info/df-k
|
||
|
rm Info/ifconfig-a
|
||
|
rm Info/ps-o
|
||
|
rm Info/uptime
|
||
|
rmdir Info
|
||
|
rm log
|
||
|
cd ..
|
||
|
rmdir $dir
|
||
|
else
|
||
|
decho "Directory is $dir"
|
||
|
fi
|
||
|
|