freebsd-nq/sys/i386/include/profile.h

167 lines
5.1 KiB
C
Raw Normal View History

1994-05-25 01:34:38 +00:00
/*
* Copyright (c) 1992, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)profile.h 8.1 (Berkeley) 6/11/93
1999-08-28 01:08:13 +00:00
* $FreeBSD$
1994-05-25 01:34:38 +00:00
*/
#ifndef _MACHINE_PROFILE_H_
#define _MACHINE_PROFILE_H_
#ifdef _KERNEL
/*
* Config generates something to tell the compiler to align functions on 16
* byte boundaries. A strict alignment is good for keeping the tables small.
*/
#define FUNCTION_ALIGNMENT 16
/*
* The kernel uses assembler stubs instead of unportable inlines.
* This is mainly to save a little time when profiling is not enabled,
* which is the usual case for the kernel.
*/
#define _MCOUNT_DECL void mcount
#define MCOUNT
#ifdef GUPROF
#define MCOUNT_DECL(s)
#define MCOUNT_ENTER(s)
#define MCOUNT_EXIT(s)
#if defined(__GNUC__) || defined(__INTEL_COMPILER)
#define MCOUNT_OVERHEAD(label) \
__asm __volatile("pushl %0; call __mcount; popl %%ecx" \
: \
: "i" (profil) \
: "ax", "dx", "cx", "memory")
#define MEXITCOUNT_OVERHEAD() \
__asm __volatile("call .mexitcount; 1:" \
: : \
: "ax", "dx", "cx", "memory")
#define MEXITCOUNT_OVERHEAD_GETLABEL(labelp) \
__asm __volatile("movl $1b,%0" : "=rm" (labelp))
#elif defined(lint)
#define MCOUNT_OVERHEAD(label)
#define MEXITCOUNT_OVERHEAD()
#define MEXITCOUNT_OVERHEAD_GETLABEL()
#else
#error
#endif /* !(__GNUC__ || __INTEL_COMPILER) */
#else /* !GUPROF */
#define MCOUNT_DECL(s) u_long s;
#ifdef SMP
extern int mcount_lock;
#define MCOUNT_ENTER(s) { s = read_eflags(); disable_intr(); \
while (!atomic_cmpset_acq_int(&mcount_lock, 0, 1)) \
/* nothing */ ; }
#define MCOUNT_EXIT(s) { atomic_store_rel_int(&mcount_lock, 0); \
write_eflags(s); }
#else
#define MCOUNT_ENTER(s) { s = read_eflags(); disable_intr(); }
#define MCOUNT_EXIT(s) (write_eflags(s))
#endif
#endif /* GUPROF */
void bintr(void);
void btrap(void);
void eintr(void);
void user(void);
#define MCOUNT_FROMPC_USER(pc) \
((pc < (uintfptr_t)VM_MAXUSER_ADDRESS) ? (uintfptr_t)user : pc)
#define MCOUNT_FROMPC_INTR(pc) \
((pc >= (uintfptr_t)btrap && pc < (uintfptr_t)eintr) ? \
((pc >= (uintfptr_t)bintr) ? (uintfptr_t)bintr : \
(uintfptr_t)btrap) : ~0U)
#else /* !_KERNEL */
#define FUNCTION_ALIGNMENT 4
#define _MCOUNT_DECL static __inline void _mcount
1994-05-25 01:34:38 +00:00
These are changes to allow to use the Intel C/C++ compiler (lang/icc) to build the kernel. It doesn't affect the operation if gcc. Most of the changes are just adding __INTEL_COMPILER to #ifdef's, as icc v8 may define __GNUC__ some parts may look strange but are necessary. Additional changes: - in_cksum.[ch]: * use a generic C version instead of the assembly version in the !gcc case (ASM code breaks with the optimizations icc does) -> no bad checksums with an icc compiled kernel Help from: andre, grehan, das Stolen from: alpha version via ppc version The entire checksum code should IMHO be replaced with the DragonFly version (because it isn't guaranteed future revisions of gcc will include similar optimizations) as in: ---snip--- Revision Changes Path 1.12 +1 -0 src/sys/conf/files.i386 1.4 +142 -558 src/sys/i386/i386/in_cksum.c 1.5 +33 -69 src/sys/i386/include/in_cksum.h 1.5 +2 -0 src/sys/netinet/igmp.c 1.6 +0 -1 src/sys/netinet/in.h 1.6 +2 -0 src/sys/netinet/ip_icmp.c 1.4 +3 -4 src/contrib/ipfilter/ip_compat.h 1.3 +1 -2 src/sbin/natd/icmp.c 1.4 +0 -1 src/sbin/natd/natd.c 1.48 +1 -0 src/sys/conf/files 1.2 +0 -1 src/sys/conf/files.amd64 1.13 +0 -1 src/sys/conf/files.i386 1.5 +0 -1 src/sys/conf/files.pc98 1.7 +1 -1 src/sys/contrib/ipfilter/netinet/fil.c 1.10 +2 -3 src/sys/contrib/ipfilter/netinet/ip_compat.h 1.10 +1 -1 src/sys/contrib/ipfilter/netinet/ip_fil.c 1.7 +1 -1 src/sys/dev/netif/txp/if_txp.c 1.7 +1 -1 src/sys/net/ip_mroute/ip_mroute.c 1.7 +1 -2 src/sys/net/ipfw/ip_fw2.c 1.6 +1 -2 src/sys/netinet/igmp.c 1.4 +158 -116 src/sys/netinet/in_cksum.c 1.6 +1 -1 src/sys/netinet/ip_gre.c 1.7 +1 -2 src/sys/netinet/ip_icmp.c 1.10 +1 -1 src/sys/netinet/ip_input.c 1.10 +1 -2 src/sys/netinet/ip_output.c 1.13 +1 -2 src/sys/netinet/tcp_input.c 1.9 +1 -2 src/sys/netinet/tcp_output.c 1.10 +1 -1 src/sys/netinet/tcp_subr.c 1.10 +1 -1 src/sys/netinet/tcp_syncache.c 1.9 +1 -2 src/sys/netinet/udp_usrreq.c 1.5 +1 -2 src/sys/netinet6/ipsec.c 1.5 +1 -2 src/sys/netproto/ipsec/ipsec.c 1.5 +1 -1 src/sys/netproto/ipsec/ipsec_input.c 1.4 +1 -2 src/sys/netproto/ipsec/ipsec_output.c and finally remove sys/i386/i386 in_cksum.c sys/i386/include in_cksum.h ---snip--- - endian.h: * DTRT in C++ mode - quad.h: * we don't use gcc v1 anymore, remove support for it Suggested by: bde (long ago) - assym.h: * avoid zero-length arrays (remove dependency on a gcc specific feature) This change changes the contents of the object file, but as it's only used to generate some values for a header, and the generator knows how to handle this, there's no impact in the gcc case. Explained by: bde Submitted by: Marius Strobl <marius@alchemy.franken.de> - aicasm.c: * minor change to teach it about the way icc spells "-nostdinc" Not approved by: gibbs (no reply to my mail) - bump __FreeBSD_version (lang/icc needs to know about the changes) Incarnations of this patch survive gcc compiles since a loooong time, I use it on my desktop. An icc compiled kernel works since Nov. 2003 (exceptions: snd_* if used as modules), it survives a build of the entire ports collection with icc. Parts of this commit contains suggestions or submissions from Marius Strobl <marius@alchemy.franken.de>. Reviewed by: -arch Submitted by: netchild
2004-03-12 21:45:33 +00:00
#if defined(__GNUC__) || defined(__INTEL_COMPILER)
#define MCOUNT \
void \
mcount() \
{ \
uintfptr_t selfpc, frompc; \
/* \
* Find the return address for mcount, \
* and the return address for mcount's caller. \
* \
* selfpc = pc pushed by call to mcount \
*/ \
2003-06-02 00:29:35 +00:00
__asm("movl 4(%%ebp),%0" : "=r" (selfpc)); \
/* \
* frompc = pc pushed by call to mcount's caller. \
* The caller's stack frame has already been built, so %ebp is \
* the caller's frame pointer. The caller's raddr is in the \
* caller's frame following the caller's caller's frame pointer.\
*/ \
__asm("movl (%%ebp),%0" : "=r" (frompc)); \
frompc = ((uintfptr_t *)frompc)[1]; \
_mcount(frompc, selfpc); \
1994-05-25 01:34:38 +00:00
}
#else /* !(__GNUC__ || __INTEL_COMPILER) */
void \
#define MCOUNT \
mcount() \
{ \
}
#endif /* __GNUC__ || __INTEL_COMPILER */
typedef u_int uintfptr_t;
#endif /* _KERNEL */
/*
* An unsigned integral type that can hold non-negative difference between
* function pointers.
*/
typedef u_int fptrdiff_t;
#ifdef _KERNEL
Improved non-statistical (GUPROF) profiling: - use a more accurate and more efficient method of compensating for overheads. The old method counted too much time against leaf functions. - normally use the Pentium timestamp counter if available. On Pentiums, the times are now accurate to within a couple of cpu clock cycles per function call in the (unlikely) event that there are no cache misses in or caused by the profiling code. - optionally use an arbitrary Pentium event counter if available. - optionally regress to using the i8254 counter. - scaled the i8254 counter by a factor of 128. Now the i8254 counters overflow slightly faster than the TSC counters for a 150MHz Pentium :-) (after about 16 seconds). This is to avoid fractional overheads. files.i386: permon.c temporarily has to be classified as a profiling-routine because a couple of functions in it may be called from profiling code. options.i386: - I586_CTR_GUPROF is currently unused (oops). - I586_PMC_GUPROF should be something like 0x70000 to enable (but not use unless prof_machdep.c is changed) support for Pentium event counters. 7 is a control mode and the counter number 0 is somewhere in the 0000 bits (see perfmon.h for the encoding). profile.h: - added declarations. - cleaned up separation of user mode declarations. prof_machdep.c: Mostly clock-select changes. The default clock can be changed by editing kmem. There should be a sysctl for this. subr_prof.c: - added copyright. - calibrate overheads for the new method. - documented new method. - fixed races and and machine dependencies in start/stop code. mcount.c: Use the new overhead compensation method. gmon.h: - changed GPROF4 counter type from unsigned to int. Oops, this should be machine-dependent and/or int32_t. - reorganized overhead counters. Submitted by: Pentium event counter changes mostly by wollman
1996-10-17 19:32:31 +00:00
2002-03-20 05:48:58 +00:00
void mcount(uintfptr_t frompc, uintfptr_t selfpc);
Improved non-statistical (GUPROF) profiling: - use a more accurate and more efficient method of compensating for overheads. The old method counted too much time against leaf functions. - normally use the Pentium timestamp counter if available. On Pentiums, the times are now accurate to within a couple of cpu clock cycles per function call in the (unlikely) event that there are no cache misses in or caused by the profiling code. - optionally use an arbitrary Pentium event counter if available. - optionally regress to using the i8254 counter. - scaled the i8254 counter by a factor of 128. Now the i8254 counters overflow slightly faster than the TSC counters for a 150MHz Pentium :-) (after about 16 seconds). This is to avoid fractional overheads. files.i386: permon.c temporarily has to be classified as a profiling-routine because a couple of functions in it may be called from profiling code. options.i386: - I586_CTR_GUPROF is currently unused (oops). - I586_PMC_GUPROF should be something like 0x70000 to enable (but not use unless prof_machdep.c is changed) support for Pentium event counters. 7 is a control mode and the counter number 0 is somewhere in the 0000 bits (see perfmon.h for the encoding). profile.h: - added declarations. - cleaned up separation of user mode declarations. prof_machdep.c: Mostly clock-select changes. The default clock can be changed by editing kmem. There should be a sysctl for this. subr_prof.c: - added copyright. - calibrate overheads for the new method. - documented new method. - fixed races and and machine dependencies in start/stop code. mcount.c: Use the new overhead compensation method. gmon.h: - changed GPROF4 counter type from unsigned to int. Oops, this should be machine-dependent and/or int32_t. - reorganized overhead counters. Submitted by: Pentium event counter changes mostly by wollman
1996-10-17 19:32:31 +00:00
#else /* !_KERNEL */
Improved non-statistical (GUPROF) profiling: - use a more accurate and more efficient method of compensating for overheads. The old method counted too much time against leaf functions. - normally use the Pentium timestamp counter if available. On Pentiums, the times are now accurate to within a couple of cpu clock cycles per function call in the (unlikely) event that there are no cache misses in or caused by the profiling code. - optionally use an arbitrary Pentium event counter if available. - optionally regress to using the i8254 counter. - scaled the i8254 counter by a factor of 128. Now the i8254 counters overflow slightly faster than the TSC counters for a 150MHz Pentium :-) (after about 16 seconds). This is to avoid fractional overheads. files.i386: permon.c temporarily has to be classified as a profiling-routine because a couple of functions in it may be called from profiling code. options.i386: - I586_CTR_GUPROF is currently unused (oops). - I586_PMC_GUPROF should be something like 0x70000 to enable (but not use unless prof_machdep.c is changed) support for Pentium event counters. 7 is a control mode and the counter number 0 is somewhere in the 0000 bits (see perfmon.h for the encoding). profile.h: - added declarations. - cleaned up separation of user mode declarations. prof_machdep.c: Mostly clock-select changes. The default clock can be changed by editing kmem. There should be a sysctl for this. subr_prof.c: - added copyright. - calibrate overheads for the new method. - documented new method. - fixed races and and machine dependencies in start/stop code. mcount.c: Use the new overhead compensation method. gmon.h: - changed GPROF4 counter type from unsigned to int. Oops, this should be machine-dependent and/or int32_t. - reorganized overhead counters. Submitted by: Pentium event counter changes mostly by wollman
1996-10-17 19:32:31 +00:00
#include <sys/cdefs.h>
__BEGIN_DECLS
These are changes to allow to use the Intel C/C++ compiler (lang/icc) to build the kernel. It doesn't affect the operation if gcc. Most of the changes are just adding __INTEL_COMPILER to #ifdef's, as icc v8 may define __GNUC__ some parts may look strange but are necessary. Additional changes: - in_cksum.[ch]: * use a generic C version instead of the assembly version in the !gcc case (ASM code breaks with the optimizations icc does) -> no bad checksums with an icc compiled kernel Help from: andre, grehan, das Stolen from: alpha version via ppc version The entire checksum code should IMHO be replaced with the DragonFly version (because it isn't guaranteed future revisions of gcc will include similar optimizations) as in: ---snip--- Revision Changes Path 1.12 +1 -0 src/sys/conf/files.i386 1.4 +142 -558 src/sys/i386/i386/in_cksum.c 1.5 +33 -69 src/sys/i386/include/in_cksum.h 1.5 +2 -0 src/sys/netinet/igmp.c 1.6 +0 -1 src/sys/netinet/in.h 1.6 +2 -0 src/sys/netinet/ip_icmp.c 1.4 +3 -4 src/contrib/ipfilter/ip_compat.h 1.3 +1 -2 src/sbin/natd/icmp.c 1.4 +0 -1 src/sbin/natd/natd.c 1.48 +1 -0 src/sys/conf/files 1.2 +0 -1 src/sys/conf/files.amd64 1.13 +0 -1 src/sys/conf/files.i386 1.5 +0 -1 src/sys/conf/files.pc98 1.7 +1 -1 src/sys/contrib/ipfilter/netinet/fil.c 1.10 +2 -3 src/sys/contrib/ipfilter/netinet/ip_compat.h 1.10 +1 -1 src/sys/contrib/ipfilter/netinet/ip_fil.c 1.7 +1 -1 src/sys/dev/netif/txp/if_txp.c 1.7 +1 -1 src/sys/net/ip_mroute/ip_mroute.c 1.7 +1 -2 src/sys/net/ipfw/ip_fw2.c 1.6 +1 -2 src/sys/netinet/igmp.c 1.4 +158 -116 src/sys/netinet/in_cksum.c 1.6 +1 -1 src/sys/netinet/ip_gre.c 1.7 +1 -2 src/sys/netinet/ip_icmp.c 1.10 +1 -1 src/sys/netinet/ip_input.c 1.10 +1 -2 src/sys/netinet/ip_output.c 1.13 +1 -2 src/sys/netinet/tcp_input.c 1.9 +1 -2 src/sys/netinet/tcp_output.c 1.10 +1 -1 src/sys/netinet/tcp_subr.c 1.10 +1 -1 src/sys/netinet/tcp_syncache.c 1.9 +1 -2 src/sys/netinet/udp_usrreq.c 1.5 +1 -2 src/sys/netinet6/ipsec.c 1.5 +1 -2 src/sys/netproto/ipsec/ipsec.c 1.5 +1 -1 src/sys/netproto/ipsec/ipsec_input.c 1.4 +1 -2 src/sys/netproto/ipsec/ipsec_output.c and finally remove sys/i386/i386 in_cksum.c sys/i386/include in_cksum.h ---snip--- - endian.h: * DTRT in C++ mode - quad.h: * we don't use gcc v1 anymore, remove support for it Suggested by: bde (long ago) - assym.h: * avoid zero-length arrays (remove dependency on a gcc specific feature) This change changes the contents of the object file, but as it's only used to generate some values for a header, and the generator knows how to handle this, there's no impact in the gcc case. Explained by: bde Submitted by: Marius Strobl <marius@alchemy.franken.de> - aicasm.c: * minor change to teach it about the way icc spells "-nostdinc" Not approved by: gibbs (no reply to my mail) - bump __FreeBSD_version (lang/icc needs to know about the changes) Incarnations of this patch survive gcc compiles since a loooong time, I use it on my desktop. An icc compiled kernel works since Nov. 2003 (exceptions: snd_* if used as modules), it survives a build of the entire ports collection with icc. Parts of this commit contains suggestions or submissions from Marius Strobl <marius@alchemy.franken.de>. Reviewed by: -arch Submitted by: netchild
2004-03-12 21:45:33 +00:00
#if defined(__GNUC__) || defined(__INTEL_COMPILER)
2002-03-20 05:48:58 +00:00
void mcount(void) __asm(".mcount");
#endif
Improved non-statistical (GUPROF) profiling: - use a more accurate and more efficient method of compensating for overheads. The old method counted too much time against leaf functions. - normally use the Pentium timestamp counter if available. On Pentiums, the times are now accurate to within a couple of cpu clock cycles per function call in the (unlikely) event that there are no cache misses in or caused by the profiling code. - optionally use an arbitrary Pentium event counter if available. - optionally regress to using the i8254 counter. - scaled the i8254 counter by a factor of 128. Now the i8254 counters overflow slightly faster than the TSC counters for a 150MHz Pentium :-) (after about 16 seconds). This is to avoid fractional overheads. files.i386: permon.c temporarily has to be classified as a profiling-routine because a couple of functions in it may be called from profiling code. options.i386: - I586_CTR_GUPROF is currently unused (oops). - I586_PMC_GUPROF should be something like 0x70000 to enable (but not use unless prof_machdep.c is changed) support for Pentium event counters. 7 is a control mode and the counter number 0 is somewhere in the 0000 bits (see perfmon.h for the encoding). profile.h: - added declarations. - cleaned up separation of user mode declarations. prof_machdep.c: Mostly clock-select changes. The default clock can be changed by editing kmem. There should be a sysctl for this. subr_prof.c: - added copyright. - calibrate overheads for the new method. - documented new method. - fixed races and and machine dependencies in start/stop code. mcount.c: Use the new overhead compensation method. gmon.h: - changed GPROF4 counter type from unsigned to int. Oops, this should be machine-dependent and/or int32_t. - reorganized overhead counters. Submitted by: Pentium event counter changes mostly by wollman
1996-10-17 19:32:31 +00:00
__END_DECLS
#endif /* _KERNEL */
#endif /* !_MACHINE_PROFILE_H_ */