Improved non-statistical (GUPROF) profiling:

- use a more accurate and more efficient method of compensating for
  overheads.  The old method counted too much time against leaf
  functions.
- normally use the Pentium timestamp counter if available.
  On Pentiums, the times are now accurate to within a couple of cpu
  clock cycles per function call in the (unlikely) event that there
  are no cache misses in or caused by the profiling code.
- optionally use an arbitrary Pentium event counter if available.
- optionally regress to using the i8254 counter.
- scaled the i8254 counter by a factor of 128.  Now the i8254 counters
  overflow slightly faster than the TSC counters for a 150MHz Pentium :-)
  (after about 16 seconds).  This is to avoid fractional overheads.

files.i386:
permon.c temporarily has to be classified as a profiling-routine
because a couple of functions in it may be called from profiling code.

options.i386:
- I586_CTR_GUPROF is currently unused (oops).
- I586_PMC_GUPROF should be something like 0x70000 to enable (but not
  use unless prof_machdep.c is changed) support for Pentium event
  counters.  7 is a control mode and the counter number 0 is somewhere
  in the 0000 bits (see perfmon.h for the encoding).

profile.h:
- added declarations.
- cleaned up separation of user mode declarations.

prof_machdep.c:
Mostly clock-select changes.  The default clock can be changed by
editing kmem.  There should be a sysctl for this.

subr_prof.c:
- added copyright.
- calibrate overheads for the new method.
- documented new method.
- fixed races and and machine dependencies in start/stop code.

mcount.c:
Use the new overhead compensation method.

gmon.h:
- changed GPROF4 counter type from unsigned to int.  Oops, this should
  be machine-dependent and/or int32_t.
- reorganized overhead counters.

Submitted by:	Pentium event counter changes mostly by wollman
This commit is contained in:
Bruce Evans 1996-10-17 19:32:31 +00:00
parent cf3c4df72d
commit d6b9e17eb5
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=19000
11 changed files with 605 additions and 174 deletions

View File

@ -1,17 +1,64 @@
/*
* NEED A COPYRIGHT NOPTICE HERE
/*-
* Copyright (c) 1996 Bruce D. Evans.
* All rights reserved.
*
* $Id$
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $Id: prof_machdep.c,v 1.2 1996/04/08 16:41:06 wollman Exp $
*/
#ifdef GUPROF
#include "opt_cpu.h"
#include "opt_i586_guprof.h"
#include "opt_perfmon.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/gmon.h>
#include <machine/clock.h>
#include <machine/perfmon.h>
#include <machine/profile.h>
#endif
#include <i386/isa/isa.h>
#include <i386/isa/timerreg.h>
#ifdef GUPROF
extern u_int cputime __P((void));
#define CPUTIME_CLOCK_UNINITIALIZED 0
#define CPUTIME_CLOCK_I8254 1
#define CPUTIME_CLOCK_I586_CTR 2
#define CPUTIME_CLOCK_I586_PMC 3
#define CPUTIME_CLOCK_I8254_SHIFT 7
int cputime_bias = 1; /* initialize for locality of reference */
static int cputime_clock = CPUTIME_CLOCK_UNINITIALIZED;
#ifdef I586_PMC_GUPROF
static u_int cputime_clock_pmc_conf = I586_PMC_GUPROF;
static int cputime_clock_pmc_init;
static struct gmonparam saved_gmp;
#endif
#endif /* GUPROF */
#ifdef __GNUC__
asm("
@ -52,13 +99,13 @@ GMON_PROF_OFF = 3
#
movl (%esp),%eax
pushf
pushfl
pushl %eax
pushl %edx
cli
call _mcount
addl $8,%esp
popf
popfl
Lmcount_exit:
ret
");
@ -94,12 +141,12 @@ GMON_PROF_HIRES = 4
pushl %edx
pushl %eax
movl 8(%esp),%eax
pushf
pushfl
pushl %eax
cli
call _mexitcount
addl $4,%esp
popf
popfl
popl %eax
popl %edx
Lmexitcount_exit:
@ -113,20 +160,48 @@ GMON_PROF_HIRES = 4
* Return the time elapsed since the last call. The units are machine-
* dependent.
*/
u_int
int
cputime()
{
u_int count;
u_int delta;
u_char low;
int delta;
#ifdef I586_PMC_GUPROF
u_quad_t event_count;
#endif
u_char high, low;
static u_int prev_count;
#if defined(I586_CPU) || defined(I686_CPU)
if (cputime_clock == CPUTIME_CLOCK_I586_CTR) {
count = (u_int)rdtsc();
delta = (int)(count - prev_count);
prev_count = count;
return (delta);
}
#ifdef I586_PMC_GUPROF
if (cputime_clock == CPUTIME_CLOCK_I586_PMC) {
/*
* XXX permon_read() should be inlined so that the
* perfmon module doesn't need to be compiled with
* profiling disabled and so that it is fast.
*/
perfmon_read(0, &event_count);
count = (u_int)event_count;
delta = (int)(count - prev_count);
prev_count = count;
return (delta);
}
#endif /* I586_PMC_GUPROF */
#endif /* I586_CPU or I686_CPU */
/*
* Read the current value of the 8254 timer counter 0.
*/
outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);
low = inb(TIMER_CNTR0);
count = low | (inb(TIMER_CNTR0) << 8);
high = inb(TIMER_CNTR0);
count = ((high << 8) | low) << CPUTIME_CLOCK_I8254_SHIFT;
/*
* The timer counts down from TIMER_CNTR0_MAX to 0 and then resets.
@ -140,10 +215,75 @@ cputime()
delta = prev_count - count;
prev_count = count;
if ((int) delta <= 0)
return (delta + timer0_max_count);
return (delta + (timer0_max_count << CPUTIME_CLOCK_I8254_SHIFT));
return (delta);
}
#else /* not GUPROF */
/*
* The start and stop routines need not be here since we turn off profiling
* before calling them. They are here for convenience.
*/
void
startguprof(gp)
struct gmonparam *gp;
{
if (cputime_clock == CPUTIME_CLOCK_UNINITIALIZED) {
cputime_clock = CPUTIME_CLOCK_I8254;
#if defined(I586_CPU) || defined(I686_CPU)
if (i586_ctr_freq != 0)
cputime_clock = CPUTIME_CLOCK_I586_CTR;
#endif
}
gp->profrate = timer_freq << CPUTIME_CLOCK_I8254_SHIFT;
#if defined(I586_CPU) || defined(I686_CPU)
if (cputime_clock == CPUTIME_CLOCK_I586_CTR)
gp->profrate = i586_ctr_freq;
#ifdef I586_PMC_GUPROF
else if (cputime_clock == CPUTIME_CLOCK_I586_PMC) {
if (perfmon_avail() &&
perfmon_setup(0, cputime_clock_pmc_conf) == 0) {
if (perfmon_start(0) != 0)
perfmon_fini(0);
else {
/* XXX 1 event == 1 us. */
gp->profrate = 1000000;
saved_gmp = *gp;
/* Zap overheads. They are invalid. */
gp->cputime_overhead = 0;
gp->mcount_overhead = 0;
gp->mcount_post_overhead = 0;
gp->mcount_pre_overhead = 0;
gp->mexitcount_overhead = 0;
gp->mexitcount_post_overhead = 0;
gp->mexitcount_pre_overhead = 0;
cputime_clock_pmc_init = TRUE;
}
}
}
#endif /* I586_PMC_GUPROF */
#endif /* I586_CPU or I686_CPU */
cputime_bias = 0;
cputime();
}
void
stopguprof(gp)
struct gmonparam *gp;
{
#if defined(PERFMON) && defined(I586_PMC_GUPROF)
if (cputime_clock_pmc_init) {
*gp = saved_gmp;
perfmon_fini(0);
cputime_clock_pmc_init = FALSE;
}
#endif
}
#else /* !GUPROF */
#ifdef __GNUC__
asm("
.text

View File

@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)profile.h 8.1 (Berkeley) 6/11/93
* $Id: profile.h,v 1.6 1996/01/01 17:11:21 bde Exp $
* $Id: profile.h,v 1.7 1996/08/28 20:15:25 bde Exp $
*/
#ifndef _MACHINE_PROFILE_H_
@ -97,18 +97,44 @@ typedef u_int fptrint_t;
*/
typedef int fptrdiff_t;
__BEGIN_DECLS
#ifdef KERNEL
void mcount __P((fptrint_t frompc, fptrint_t selfpc));
#else
void mcount __P((void)) __asm("mcount");
static void _mcount __P((fptrint_t frompc, fptrint_t selfpc));
#endif
#ifdef GUPROF
u_int cputime __P((void));
void mexitcount __P((fptrint_t selfpc));
#endif
struct gmonparam;
void nullfunc_loop_profiled __P((void));
void nullfunc_profiled __P((void));
void startguprof __P((struct gmonparam *p));
void stopguprof __P((struct gmonparam *p));
#else
#define startguprof(p)
#define stopguprof(p)
#endif /* GUPROF */
#else /* !KERNEL */
#include <sys/cdefs.h>
__BEGIN_DECLS
void mcount __P((void)) __asm("mcount");
static void _mcount __P((fptrint_t frompc, fptrint_t selfpc));
__END_DECLS
#endif /* KERNEL */
#ifdef GUPROF
/* XXX doesn't quite work outside kernel yet. */
extern int cputime_bias;
__BEGIN_DECLS
int cputime __P((void));
void empty_loop __P((void));
void mexitcount __P((fptrint_t selfpc));
void nullfunc __P((void));
void nullfunc_loop __P((void));
__END_DECLS
#endif
#endif /* !_MACHINE_PROFILE_H_ */

View File

@ -1,7 +1,7 @@
# This file tells config what files go into building a kernel,
# files marked standard are always included.
#
# $Id: files.i386,v 1.139 1996/08/27 19:45:54 pst Exp $
# $Id: files.i386,v 1.140 1996/09/11 19:53:30 phk Exp $
#
aic7xxx_asm optional ahc device-driver \
dependency "$S/dev/aic7xxx/aic7xxx_asm.c" \
@ -51,6 +51,7 @@ i386/i386/machdep.c standard
i386/i386/math_emulate.c optional math_emulate
i386/i386/mem.c standard
i386/i386/microtime.s standard
i386/i386/perfmon.c optional perfmon profiling-routine
i386/i386/perfmon.c optional perfmon
i386/i386/pmap.c standard
i386/i386/procfs_machdep.c standard

View File

@ -1,4 +1,4 @@
# $Id: options.i386,v 1.22 1996/10/09 18:36:44 bde Exp $
# $Id: options.i386,v 1.23 1996/10/09 19:47:07 bde Exp $
BOUNCEPAGES opt_bounce.h
USER_LDT
MATH_EMULATE opt_math_emulate.h
@ -18,6 +18,8 @@ COMCONSOLE opt_comconsole.h
COM_ESP opt_sio.h
COM_MULTIPORT opt_sio.h
DSI_SOFT_MODEM opt_sio.h
I586_CTR_GUPROF opt_i586_guprof.h
I586_PMC_GUPROF opt_i586_guprof.h
FAT_CURSOR opt_pcvt.h
PCVT_FREEBSD opt_pcvt.h
PCVT_SCANSET opt_pcvt.h

View File

@ -1,7 +1,7 @@
# This file tells config what files go into building a kernel,
# files marked standard are always included.
#
# $Id: files.i386,v 1.139 1996/08/27 19:45:54 pst Exp $
# $Id: files.i386,v 1.140 1996/09/11 19:53:30 phk Exp $
#
aic7xxx_asm optional ahc device-driver \
dependency "$S/dev/aic7xxx/aic7xxx_asm.c" \
@ -51,6 +51,7 @@ i386/i386/machdep.c standard
i386/i386/math_emulate.c optional math_emulate
i386/i386/mem.c standard
i386/i386/microtime.s standard
i386/i386/perfmon.c optional perfmon profiling-routine
i386/i386/perfmon.c optional perfmon
i386/i386/pmap.c standard
i386/i386/procfs_machdep.c standard

View File

@ -1,4 +1,4 @@
# $Id: options.i386,v 1.22 1996/10/09 18:36:44 bde Exp $
# $Id: options.i386,v 1.23 1996/10/09 19:47:07 bde Exp $
BOUNCEPAGES opt_bounce.h
USER_LDT
MATH_EMULATE opt_math_emulate.h
@ -18,6 +18,8 @@ COMCONSOLE opt_comconsole.h
COM_ESP opt_sio.h
COM_MULTIPORT opt_sio.h
DSI_SOFT_MODEM opt_sio.h
I586_CTR_GUPROF opt_i586_guprof.h
I586_PMC_GUPROF opt_i586_guprof.h
FAT_CURSOR opt_pcvt.h
PCVT_FREEBSD opt_pcvt.h
PCVT_SCANSET opt_pcvt.h

View File

@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)profile.h 8.1 (Berkeley) 6/11/93
* $Id: profile.h,v 1.6 1996/01/01 17:11:21 bde Exp $
* $Id: profile.h,v 1.7 1996/08/28 20:15:25 bde Exp $
*/
#ifndef _MACHINE_PROFILE_H_
@ -97,18 +97,44 @@ typedef u_int fptrint_t;
*/
typedef int fptrdiff_t;
__BEGIN_DECLS
#ifdef KERNEL
void mcount __P((fptrint_t frompc, fptrint_t selfpc));
#else
void mcount __P((void)) __asm("mcount");
static void _mcount __P((fptrint_t frompc, fptrint_t selfpc));
#endif
#ifdef GUPROF
u_int cputime __P((void));
void mexitcount __P((fptrint_t selfpc));
#endif
struct gmonparam;
void nullfunc_loop_profiled __P((void));
void nullfunc_profiled __P((void));
void startguprof __P((struct gmonparam *p));
void stopguprof __P((struct gmonparam *p));
#else
#define startguprof(p)
#define stopguprof(p)
#endif /* GUPROF */
#else /* !KERNEL */
#include <sys/cdefs.h>
__BEGIN_DECLS
void mcount __P((void)) __asm("mcount");
static void _mcount __P((fptrint_t frompc, fptrint_t selfpc));
__END_DECLS
#endif /* KERNEL */
#ifdef GUPROF
/* XXX doesn't quite work outside kernel yet. */
extern int cputime_bias;
__BEGIN_DECLS
int cputime __P((void));
void empty_loop __P((void));
void mexitcount __P((fptrint_t selfpc));
void nullfunc __P((void));
void nullfunc_loop __P((void));
__END_DECLS
#endif
#endif /* !_MACHINE_PROFILE_H_ */

View File

@ -1,17 +1,64 @@
/*
* NEED A COPYRIGHT NOPTICE HERE
/*-
* Copyright (c) 1996 Bruce D. Evans.
* All rights reserved.
*
* $Id$
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $Id: prof_machdep.c,v 1.2 1996/04/08 16:41:06 wollman Exp $
*/
#ifdef GUPROF
#include "opt_cpu.h"
#include "opt_i586_guprof.h"
#include "opt_perfmon.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/gmon.h>
#include <machine/clock.h>
#include <machine/perfmon.h>
#include <machine/profile.h>
#endif
#include <i386/isa/isa.h>
#include <i386/isa/timerreg.h>
#ifdef GUPROF
extern u_int cputime __P((void));
#define CPUTIME_CLOCK_UNINITIALIZED 0
#define CPUTIME_CLOCK_I8254 1
#define CPUTIME_CLOCK_I586_CTR 2
#define CPUTIME_CLOCK_I586_PMC 3
#define CPUTIME_CLOCK_I8254_SHIFT 7
int cputime_bias = 1; /* initialize for locality of reference */
static int cputime_clock = CPUTIME_CLOCK_UNINITIALIZED;
#ifdef I586_PMC_GUPROF
static u_int cputime_clock_pmc_conf = I586_PMC_GUPROF;
static int cputime_clock_pmc_init;
static struct gmonparam saved_gmp;
#endif
#endif /* GUPROF */
#ifdef __GNUC__
asm("
@ -52,13 +99,13 @@ GMON_PROF_OFF = 3
#
movl (%esp),%eax
pushf
pushfl
pushl %eax
pushl %edx
cli
call _mcount
addl $8,%esp
popf
popfl
Lmcount_exit:
ret
");
@ -94,12 +141,12 @@ GMON_PROF_HIRES = 4
pushl %edx
pushl %eax
movl 8(%esp),%eax
pushf
pushfl
pushl %eax
cli
call _mexitcount
addl $4,%esp
popf
popfl
popl %eax
popl %edx
Lmexitcount_exit:
@ -113,20 +160,48 @@ GMON_PROF_HIRES = 4
* Return the time elapsed since the last call. The units are machine-
* dependent.
*/
u_int
int
cputime()
{
u_int count;
u_int delta;
u_char low;
int delta;
#ifdef I586_PMC_GUPROF
u_quad_t event_count;
#endif
u_char high, low;
static u_int prev_count;
#if defined(I586_CPU) || defined(I686_CPU)
if (cputime_clock == CPUTIME_CLOCK_I586_CTR) {
count = (u_int)rdtsc();
delta = (int)(count - prev_count);
prev_count = count;
return (delta);
}
#ifdef I586_PMC_GUPROF
if (cputime_clock == CPUTIME_CLOCK_I586_PMC) {
/*
* XXX permon_read() should be inlined so that the
* perfmon module doesn't need to be compiled with
* profiling disabled and so that it is fast.
*/
perfmon_read(0, &event_count);
count = (u_int)event_count;
delta = (int)(count - prev_count);
prev_count = count;
return (delta);
}
#endif /* I586_PMC_GUPROF */
#endif /* I586_CPU or I686_CPU */
/*
* Read the current value of the 8254 timer counter 0.
*/
outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);
low = inb(TIMER_CNTR0);
count = low | (inb(TIMER_CNTR0) << 8);
high = inb(TIMER_CNTR0);
count = ((high << 8) | low) << CPUTIME_CLOCK_I8254_SHIFT;
/*
* The timer counts down from TIMER_CNTR0_MAX to 0 and then resets.
@ -140,10 +215,75 @@ cputime()
delta = prev_count - count;
prev_count = count;
if ((int) delta <= 0)
return (delta + timer0_max_count);
return (delta + (timer0_max_count << CPUTIME_CLOCK_I8254_SHIFT));
return (delta);
}
#else /* not GUPROF */
/*
* The start and stop routines need not be here since we turn off profiling
* before calling them. They are here for convenience.
*/
void
startguprof(gp)
struct gmonparam *gp;
{
if (cputime_clock == CPUTIME_CLOCK_UNINITIALIZED) {
cputime_clock = CPUTIME_CLOCK_I8254;
#if defined(I586_CPU) || defined(I686_CPU)
if (i586_ctr_freq != 0)
cputime_clock = CPUTIME_CLOCK_I586_CTR;
#endif
}
gp->profrate = timer_freq << CPUTIME_CLOCK_I8254_SHIFT;
#if defined(I586_CPU) || defined(I686_CPU)
if (cputime_clock == CPUTIME_CLOCK_I586_CTR)
gp->profrate = i586_ctr_freq;
#ifdef I586_PMC_GUPROF
else if (cputime_clock == CPUTIME_CLOCK_I586_PMC) {
if (perfmon_avail() &&
perfmon_setup(0, cputime_clock_pmc_conf) == 0) {
if (perfmon_start(0) != 0)
perfmon_fini(0);
else {
/* XXX 1 event == 1 us. */
gp->profrate = 1000000;
saved_gmp = *gp;
/* Zap overheads. They are invalid. */
gp->cputime_overhead = 0;
gp->mcount_overhead = 0;
gp->mcount_post_overhead = 0;
gp->mcount_pre_overhead = 0;
gp->mexitcount_overhead = 0;
gp->mexitcount_post_overhead = 0;
gp->mexitcount_pre_overhead = 0;
cputime_clock_pmc_init = TRUE;
}
}
}
#endif /* I586_PMC_GUPROF */
#endif /* I586_CPU or I686_CPU */
cputime_bias = 0;
cputime();
}
void
stopguprof(gp)
struct gmonparam *gp;
{
#if defined(PERFMON) && defined(I586_PMC_GUPROF)
if (cputime_clock_pmc_init) {
*gp = saved_gmp;
perfmon_fini(0);
cputime_clock_pmc_init = FALSE;
}
#endif
}
#else /* !GUPROF */
#ifdef __GNUC__
asm("
.text

View File

@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)subr_prof.c 8.3 (Berkeley) 9/23/93
* $Id: subr_prof.c,v 1.15 1995/12/26 01:21:39 bde Exp $
* $Id: subr_prof.c,v 1.16 1995/12/29 15:29:08 bde Exp $
*/
#include <sys/param.h>
@ -56,6 +56,22 @@ struct gmonparam _gmonparam = { GMON_PROF_OFF };
extern char btext[];
extern char etext[];
#ifdef GUPROF
void
nullfunc_loop_profiled()
{
int i;
for (i = 0; i < CALIB_SCALE; i++)
nullfunc_profiled();
}
void
nullfunc_profiled()
{
}
#endif /* GUPROF */
static void
kmstartup(dummy)
void *dummy;
@ -63,8 +79,14 @@ kmstartup(dummy)
char *cp;
struct gmonparam *p = &_gmonparam;
#ifdef GUPROF
fptrint_t kmstartup_addr;
int cputime_overhead;
int empty_loop_time;
int i;
fptrint_t kmstartup_addr;
int mcount_overhead;
int mexitcount_overhead;
int nullfunc_loop_overhead;
int nullfunc_loop_profiled_time;
#endif
/*
@ -74,7 +96,7 @@ kmstartup(dummy)
p->lowpc = ROUNDDOWN((u_long)btext, HISTFRACTION * sizeof(HISTCOUNTER));
p->highpc = ROUNDUP((u_long)etext, HISTFRACTION * sizeof(HISTCOUNTER));
p->textsize = p->highpc - p->lowpc;
printf("Profiling kernel, textsize=%d [%x..%x]\n",
printf("Profiling kernel, textsize=%lu [%x..%x]\n",
p->textsize, p->lowpc, p->highpc);
p->kcountsize = p->textsize / HISTFRACTION;
p->hashfraction = HASHFRACTION;
@ -99,41 +121,56 @@ kmstartup(dummy)
p->froms = (u_short *)cp;
#ifdef GUPROF
/*
* Initialize pointers to overhead counters.
*/
/* Initialize pointers to overhead counters. */
p->cputime_count = &KCOUNT(p, PC_TO_I(p, cputime));
p->mcount_count = &KCOUNT(p, PC_TO_I(p, mcount));
p->mexitcount_count = &KCOUNT(p, PC_TO_I(p, mexitcount));
/*
* Determine overheads.
* Disable interrupts to avoid interference while we calibrate
* things.
*/
disable_intr();
/*
* Determine overheads.
* XXX this needs to be repeated for each useful timer/counter.
*/
cputime_overhead = 0;
startguprof(p);
for (i = 0; i < CALIB_SCALE; i++)
cputime_overhead += cputime();
empty_loop();
startguprof(p);
empty_loop();
empty_loop_time = cputime();
nullfunc_loop_profiled();
/*
* Start profiling. There won't be any normal function calls since
* interrupts are disabled, but we will call the profiling routines
* directly to determine their overheads.
*/
p->state = GMON_PROF_HIRES;
p->cputime_overhead = 0;
(void)cputime();
for (i = 0; i < CALIB_SCALE; i++)
p->cputime_overhead += cputime();
startguprof(p);
nullfunc_loop_profiled();
(void)cputime();
startguprof(p);
for (i = 0; i < CALIB_SCALE; i++)
#if defined(i386) && __GNUC__ >= 2
/*
* Underestimate slightly by always calling __mcount, never
* mcount.
*/
asm("pushl %0; call __mcount; popl %%ecx"
:
: "i" (kmstartup)
: "i" (profil)
: "ax", "bx", "cx", "dx", "memory");
#else
#error
#endif
p->mcount_overhead = KCOUNT(p, PC_TO_I(p, kmstartup));
mcount_overhead = KCOUNT(p, PC_TO_I(p, profil));
(void)cputime();
startguprof(p);
for (i = 0; i < CALIB_SCALE; i++)
#if defined(i386) && __GNUC__ >= 2
asm("call mexitcount; 1:"
@ -142,25 +179,96 @@ kmstartup(dummy)
#else
#error
#endif
p->mexitcount_overhead = KCOUNT(p, PC_TO_I(p, kmstartup_addr));
mexitcount_overhead = KCOUNT(p, PC_TO_I(p, kmstartup_addr));
p->state = GMON_PROF_OFF;
stopguprof(p);
enable_intr();
p->mcount_overhead_sub = p->mcount_overhead - p->cputime_overhead;
p->mexitcount_overhead_sub = p->mexitcount_overhead
- p->cputime_overhead;
printf("Profiling overheads: %u+%u %u+%u\n",
p->cputime_overhead, p->mcount_overhead_sub,
p->cputime_overhead, p->mexitcount_overhead_sub);
p->cputime_overhead_frac = p->cputime_overhead % CALIB_SCALE;
p->cputime_overhead /= CALIB_SCALE;
p->mcount_overhead_frac = p->mcount_overhead_sub % CALIB_SCALE;
p->mcount_overhead_sub /= CALIB_SCALE;
p->mcount_overhead /= CALIB_SCALE;
p->mexitcount_overhead_frac = p->mexitcount_overhead_sub % CALIB_SCALE;
p->mexitcount_overhead_sub /= CALIB_SCALE;
p->mexitcount_overhead /= CALIB_SCALE;
nullfunc_loop_profiled_time = 0;
for (i = 0; i < 28; i += sizeof(HISTCOUNTER)) {
int x;
x = KCOUNT(p, PC_TO_I(p,
(fptrint_t)nullfunc_loop_profiled + i));
nullfunc_loop_profiled_time += x;
printf("leaf[%d] = %d sum %d\n",
i, x, nullfunc_loop_profiled_time);
}
#define CALIB_DOSCALE(count) (((count) + CALIB_SCALE / 3) / CALIB_SCALE)
#define c2n(count, freq) ((int)((count) * 1000000000LL / freq))
printf("cputime %d, empty_loop %d, nullfunc_loop_profiled %d, mcount %d, mexitcount %d\n",
CALIB_DOSCALE(c2n(cputime_overhead, p->profrate)),
CALIB_DOSCALE(c2n(empty_loop_time, p->profrate)),
CALIB_DOSCALE(c2n(nullfunc_loop_profiled_time, p->profrate)),
CALIB_DOSCALE(c2n(mcount_overhead, p->profrate)),
CALIB_DOSCALE(c2n(mexitcount_overhead, p->profrate)));
cputime_overhead -= empty_loop_time;
mcount_overhead -= empty_loop_time;
mexitcount_overhead -= empty_loop_time;
/*-
* Profiling overheads are determined by the times between the
* following events:
* MC1: mcount() is called
* MC2: cputime() (called from mcount()) latches the timer
* MC3: mcount() completes
* ME1: mexitcount() is called
* ME2: cputime() (called from mexitcount()) latches the timer
* ME3: mexitcount() completes.
* The times between the events vary slightly depending on instruction
* combination and cache misses, etc. Attempt to determine the
* minimum times. These can be subtracted from the profiling times
* without much risk of reducing the profiling times below what they
* would be when profiling is not configured. Abbreviate:
* ab = minimum time between MC1 and MC3
* a = minumum time between MC1 and MC2
* b = minimum time between MC2 and MC3
* cd = minimum time between ME1 and ME3
* c = minimum time between ME1 and ME2
* d = minimum time between ME2 and ME3.
* These satisfy the relations:
* ab <= mcount_overhead (just measured)
* a + b <= ab
* cd <= mexitcount_overhead (just measured)
* c + d <= cd
* a + d <= nullfunc_loop_profiled_time (just measured)
* a >= 0, b >= 0, c >= 0, d >= 0.
* Assume that ab and cd are equal to the minimums.
*/
p->cputime_overhead = CALIB_DOSCALE(cputime_overhead);
p->mcount_overhead = CALIB_DOSCALE(mcount_overhead - cputime_overhead);
p->mexitcount_overhead = CALIB_DOSCALE(mexitcount_overhead
- cputime_overhead);
nullfunc_loop_overhead = nullfunc_loop_profiled_time - empty_loop_time;
p->mexitcount_post_overhead = CALIB_DOSCALE((mcount_overhead
- nullfunc_loop_overhead)
/ 4);
p->mexitcount_pre_overhead = p->mexitcount_overhead
+ p->cputime_overhead
- p->mexitcount_post_overhead;
p->mcount_pre_overhead = CALIB_DOSCALE(nullfunc_loop_overhead)
- p->mexitcount_post_overhead;
p->mcount_post_overhead = p->mcount_overhead
+ p->cputime_overhead
- p->mcount_pre_overhead;
printf(
"Profiling overheads: mcount: %d+%d, %d+%d; mexitcount: %d+%d, %d+%d nsec\n",
c2n(p->cputime_overhead, p->profrate),
c2n(p->mcount_overhead, p->profrate),
c2n(p->mcount_pre_overhead, p->profrate),
c2n(p->mcount_post_overhead, p->profrate),
c2n(p->cputime_overhead, p->profrate),
c2n(p->mexitcount_overhead, p->profrate),
c2n(p->mexitcount_pre_overhead, p->profrate),
c2n(p->mexitcount_post_overhead, p->profrate));
printf(
"Profiling overheads: mcount: %d+%d, %d+%d; mexitcount: %d+%d, %d+%d cycles\n",
p->cputime_overhead, p->mcount_overhead,
p->mcount_pre_overhead, p->mcount_post_overhead,
p->cputime_overhead, p->mexitcount_overhead,
p->mexitcount_pre_overhead, p->mexitcount_post_overhead);
#endif /* GUPROF */
}
@ -189,16 +297,20 @@ sysctl_kern_prof SYSCTL_HANDLER_ARGS
if (!req->newptr)
return (0);
if (state == GMON_PROF_OFF) {
gp->state = state;
stopprofclock(&proc0);
gp->state = state;
stopguprof(gp);
} else if (state == GMON_PROF_ON) {
gp->state = GMON_PROF_OFF;
stopguprof(gp);
gp->profrate = profhz;
gp->state = state;
startprofclock(&proc0);
gp->state = state;
#ifdef GUPROF
} else if (state == GMON_PROF_HIRES) {
gp->profrate = 1193182; /* XXX */
gp->state = GMON_PROF_OFF;
stopprofclock(&proc0);
startguprof(gp);
gp->state = state;
#endif
} else if (state != gp->state)

View File

@ -36,13 +36,12 @@
static char sccsid[] = "@(#)mcount.c 8.1 (Berkeley) 6/4/93";
#endif
static const char rcsid[] =
"$Id: mcount.c,v 1.7 1996/05/02 14:20:33 phk Exp $";
"$Id: mcount.c,v 1.8 1996/08/28 20:15:12 bde Exp $";
#endif
#include <sys/param.h>
#include <sys/gmon.h>
#ifdef KERNEL
#include <sys/systm.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
#include <vm/pmap.h>
@ -71,7 +70,7 @@ _MCOUNT_DECL(frompc, selfpc) /* _mcount; may be static, inline, etc */
register fptrint_t frompc, selfpc;
{
#ifdef GUPROF
u_int delta;
int delta;
#endif
register fptrdiff_t frompci;
register u_short *frompcindex;
@ -115,50 +114,33 @@ _MCOUNT_DECL(frompc, selfpc) /* _mcount; may be static, inline, etc */
#endif /* KERNEL */
#ifdef GUPROF
if (p->state != GMON_PROF_HIRES)
goto skip_guprof_stuff;
/*
* Look at the clock and add the count of clock cycles since the
* clock was last looked at to a counter for frompc. This
* solidifies the count for the function containing frompc and
* effectively starts another clock for the current function.
* The count for the new clock will be solidified when another
* function call is made or the function returns.
*
* We use the usual sampling counters since they can be located
* efficiently. 4-byte counters are usually necessary.
*
* There are many complications for subtracting the profiling
* overheads from the counts for normal functions and adding
* them to the counts for mcount(), mexitcount() and cputime().
* We attempt to handle fractional cycles, but the overheads
* are usually underestimated because they are calibrated for
* a simpler than usual setup.
*/
delta = cputime() - p->mcount_overhead;
p->cputime_overhead_resid += p->cputime_overhead_frac;
p->mcount_overhead_resid += p->mcount_overhead_frac;
if ((int)delta < 0)
*p->mcount_count += delta + p->mcount_overhead
- p->cputime_overhead;
else if (delta != 0) {
if (p->cputime_overhead_resid >= CALIB_SCALE) {
p->cputime_overhead_resid -= CALIB_SCALE;
++*p->cputime_count;
--delta;
}
if (delta != 0) {
if (p->mcount_overhead_resid >= CALIB_SCALE) {
p->mcount_overhead_resid -= CALIB_SCALE;
++*p->mcount_count;
--delta;
}
KCOUNT(p, frompci) += delta;
}
*p->mcount_count += p->mcount_overhead_sub;
if (p->state == GMON_PROF_HIRES) {
/*
* Count the time since cputime() was previously called
* against `frompc'. Compensate for overheads.
*
* cputime() sets its prev_count variable to the count when
* it is called. This in effect starts a counter for
* the next period of execution (normally from now until
* the next call to mcount() or mexitcount()). We set
* cputime_bias to compensate for our own overhead.
*
* We use the usual sampling counters since they can be
* located efficiently. 4-byte counters are usually
* necessary. gprof will add up the scattered counts
* just like it does for statistical profiling. All
* counts are signed so that underflow in the subtractions
* doesn't matter much (negative counts are normally
* compensated for by larger counts elsewhere). Underflow
* shouldn't occur, but may be caused by slightly wrong
* calibrations or from not clearing cputime_bias.
*/
delta = cputime() - cputime_bias - p->mcount_pre_overhead;
cputime_bias = p->mcount_post_overhead;
KCOUNT(p, frompci) += delta;
*p->cputime_count += p->cputime_overhead;
*p->mcount_count += p->mcount_overhead;
}
*p->cputime_count += p->cputime_overhead;
skip_guprof_stuff:
#endif /* GUPROF */
#ifdef KERNEL
@ -290,36 +272,40 @@ mexitcount(selfpc)
p = &_gmonparam;
selfpcdiff = selfpc - (fptrint_t)p->lowpc;
if (selfpcdiff < p->textsize) {
u_int delta;
int delta;
/*
* Solidify the count for the current function.
* Count the time since cputime() was previously called
* against `selfpc'. Compensate for overheads.
*/
delta = cputime() - p->mexitcount_overhead;
p->cputime_overhead_resid += p->cputime_overhead_frac;
p->mexitcount_overhead_resid += p->mexitcount_overhead_frac;
if ((int)delta < 0)
*p->mexitcount_count += delta + p->mexitcount_overhead
- p->cputime_overhead;
else if (delta != 0) {
if (p->cputime_overhead_resid >= CALIB_SCALE) {
p->cputime_overhead_resid -= CALIB_SCALE;
++*p->cputime_count;
--delta;
}
if (delta != 0) {
if (p->mexitcount_overhead_resid
>= CALIB_SCALE) {
p->mexitcount_overhead_resid
-= CALIB_SCALE;
++*p->mexitcount_count;
--delta;
}
KCOUNT(p, selfpcdiff) += delta;
}
*p->mexitcount_count += p->mexitcount_overhead_sub;
}
delta = cputime() - cputime_bias - p->mexitcount_pre_overhead;
cputime_bias = p->mexitcount_post_overhead;
KCOUNT(p, selfpcdiff) += delta;
*p->cputime_count += p->cputime_overhead;
*p->mexitcount_count += p->mexitcount_overhead;
}
}
void
empty_loop()
{
int i;
for (i = 0; i < CALIB_SCALE; i++)
;
}
void
nullfunc()
{
}
void
nullfunc_loop()
{
int i;
for (i = 0; i < CALIB_SCALE; i++)
nullfunc();
}
#endif /* GUPROF */

View File

@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)gmon.h 8.2 (Berkeley) 1/4/94
* $Id: gmon.h,v 1.7 1995/08/29 03:09:14 bde Exp $
* $Id: gmon.h,v 1.8 1995/12/29 15:29:26 bde Exp $
*/
#ifndef _SYS_GMON_H_
@ -57,7 +57,7 @@ struct gmonhdr {
* Type of histogram counters used in the kernel.
*/
#ifdef GPROF4
#define HISTCOUNTER unsigned
#define HISTCOUNTER int
#else
#define HISTCOUNTER unsigned short
#endif
@ -174,22 +174,17 @@ struct gmonparam {
fptrint_t highpc;
u_long textsize;
u_long hashfraction;
u_long profrate;
int profrate; /* XXX wrong type to match gmonhdr */
HISTCOUNTER *cputime_count;
u_int cputime_overhead;
u_int cputime_overhead_frac;
u_int cputime_overhead_resid;
u_int cputime_overhead_sub;
int cputime_overhead;
HISTCOUNTER *mcount_count;
u_int mcount_overhead;
u_int mcount_overhead_frac;
u_int mcount_overhead_resid;
u_int mcount_overhead_sub;
int mcount_overhead;
int mcount_post_overhead;
int mcount_pre_overhead;
HISTCOUNTER *mexitcount_count;
u_int mexitcount_overhead;
u_int mexitcount_overhead_frac;
u_int mexitcount_overhead_resid;
u_int mexitcount_overhead_sub;
int mexitcount_overhead;
int mexitcount_post_overhead;
int mexitcount_pre_overhead;
};
extern struct gmonparam _gmonparam;