dtrace_gethrtime: improve scaling of TSC ticks to nanoseconds

Currently dtrace_gethrtime uses formula similar to the following for
converting TSC ticks to nanoseconds:
rdtsc() * 10^9 / tsc_freq
The dividend overflows 64-bit type and wraps-around every 2^64/10^9 =
18446744073 ticks which is just a few seconds on modern machines.

Now we instead use precalculated scaling factor of
10^9*2^N/tsc_freq < 2^32 and perform TSC value multiplication separately
for each 32-bit half.  This allows to avoid overflow of the dividend
described above.
The idea is taken from OpenSolaris.
This has an added feature of always scaling TSC with invariant value
regardless of TSC frequency changes. Thus the timestamps will not be
accurate if TSC actually changes, but they are always proportional to
TSC ticks and thus monotonic. This should be much better than current
formula which produces wildly different non-monotonic results on when
tsc_freq changes.

Also drop write-only 'cp' variable from amd64 dtrace_gethrtime_init()
to make it identical to the i386 twin.

PR:		kern/127441
Tested by:	Thomas Backman <serenity@exscape.org>
Reviewed by:	jhb
Discussed with:	current@, bde, gnn
Silence from:	jb
Approved by:	re (gnn)
MFC after:	1 week
This commit is contained in:
avg 2009-07-15 17:07:39 +00:00
parent 9990f66d44
commit b898b874c6
2 changed files with 95 additions and 4 deletions

View File

@ -366,6 +366,10 @@ dtrace_safe_defer_signal(void)
static int64_t tgt_cpu_tsc;
static int64_t hst_cpu_tsc;
static int64_t tsc_skew[MAXCPU];
static uint64_t nsec_scale;
/* See below for the explanation of this macro. */
#define SCALE_SHIFT 28
static void
dtrace_gethrtime_init_sync(void *arg)
@ -401,9 +405,36 @@ dtrace_gethrtime_init_cpu(void *arg)
static void
dtrace_gethrtime_init(void *arg)
{
uint64_t tsc_f;
cpumask_t map;
int i;
struct pcpu *cp;
/*
* Get TSC frequency known at this moment.
* This should be constant if TSC is invariant.
* Otherwise tick->time conversion will be inaccurate, but
* will preserve monotonic property of TSC.
*/
tsc_f = tsc_freq;
/*
* The following line checks that nsec_scale calculated below
* doesn't overflow 32-bit unsigned integer, so that it can multiply
* another 32-bit integer without overflowing 64-bit.
* Thus minimum supported TSC frequency is 62.5MHz.
*/
KASSERT(tsc_f > (NANOSEC >> (32 - SCALE_SHIFT)), ("TSC frequency is too low"));
/*
* We scale up NANOSEC/tsc_f ratio to preserve as much precision
* as possible.
* 2^28 factor was chosen quite arbitrarily from practical
* considerations:
* - it supports TSC frequencies as low as 62.5MHz (see above);
* - it provides quite good precision (e < 0.01%) up to THz
* (terahertz) values;
*/
nsec_scale = ((uint64_t)NANOSEC << SCALE_SHIFT) / tsc_f;
/* The current CPU is the reference one. */
tsc_skew[curcpu] = 0;
@ -412,7 +443,7 @@ dtrace_gethrtime_init(void *arg)
if (i == curcpu)
continue;
if ((cp = pcpu_find(i)) == NULL)
if (pcpu_find(i) == NULL)
continue;
map = 0;
@ -439,7 +470,21 @@ SYSINIT(dtrace_gethrtime_init, SI_SUB_SMP, SI_ORDER_ANY, dtrace_gethrtime_init,
uint64_t
dtrace_gethrtime()
{
return ((rdtsc() + tsc_skew[curcpu]) * (int64_t) 1000000000 / tsc_freq);
uint64_t tsc;
uint32_t lo;
uint32_t hi;
/*
* We split TSC value into lower and higher 32-bit halves and separately
* scale them with nsec_scale, then we scale them down by 2^28
* (see nsec_scale calculations) taking into account 32-bit shift of
* the higher half and finally add.
*/
tsc = rdtsc() + tsc_skew[curcpu];
lo = tsc;
hi = tsc >> 32;
return (((lo * nsec_scale) >> SCALE_SHIFT) +
((hi * nsec_scale) << (32 - SCALE_SHIFT)));
}
uint64_t

View File

@ -366,6 +366,10 @@ dtrace_safe_defer_signal(void)
static int64_t tgt_cpu_tsc;
static int64_t hst_cpu_tsc;
static int64_t tsc_skew[MAXCPU];
static uint64_t nsec_scale;
/* See below for the explanation of this macro. */
#define SCALE_SHIFT 28
static void
dtrace_gethrtime_init_sync(void *arg)
@ -401,9 +405,37 @@ dtrace_gethrtime_init_cpu(void *arg)
static void
dtrace_gethrtime_init(void *arg)
{
uint64_t tsc_f;
cpumask_t map;
int i;
/*
* Get TSC frequency known at this moment.
* This should be constant if TSC is invariant.
* Otherwise tick->time conversion will be inaccurate, but
* will preserve monotonic property of TSC.
*/
tsc_f = tsc_freq;
/*
* The following line checks that nsec_scale calculated below
* doesn't overflow 32-bit unsigned integer, so that it can multiply
* another 32-bit integer without overflowing 64-bit.
* Thus minimum supported TSC frequency is 62.5MHz.
*/
KASSERT(tsc_f > (NANOSEC >> (32 - SCALE_SHIFT)), ("TSC frequency is too low"));
/*
* We scale up NANOSEC/tsc_f ratio to preserve as much precision
* as possible.
* 2^28 factor was chosen quite arbitrarily from practical
* considerations:
* - it supports TSC frequencies as low as 62.5MHz (see above);
* - it provides quite good precision (e < 0.01%) up to THz
* (terahertz) values;
*/
nsec_scale = ((uint64_t)NANOSEC << SCALE_SHIFT) / tsc_f;
/* The current CPU is the reference one. */
tsc_skew[curcpu] = 0;
@ -438,7 +470,21 @@ SYSINIT(dtrace_gethrtime_init, SI_SUB_SMP, SI_ORDER_ANY, dtrace_gethrtime_init,
uint64_t
dtrace_gethrtime()
{
return ((rdtsc() + tsc_skew[curcpu]) * (int64_t) 1000000000 / tsc_freq);
uint64_t tsc;
uint32_t lo;
uint32_t hi;
/*
* We split TSC value into lower and higher 32-bit halves and separately
* scale them with nsec_scale, then we scale them down by 2^28
* (see nsec_scale calculations) taking into account 32-bit shift of
* the higher half and finally add.
*/
tsc = rdtsc() + tsc_skew[curcpu];
lo = tsc;
hi = tsc >> 32;
return (((lo * nsec_scale) >> SCALE_SHIFT) +
((hi * nsec_scale) << (32 - SCALE_SHIFT)));
}
uint64_t