Estimate an initial overhead of 0 usec instead of 20 usec in DELAY().

I have code to calibrate the overhead fairly accurately, but there is little point in using it since it is most accurate on machines where an estimate of 0 works well. On slow machines, the accuracy of DELAY() has a large variance since it is limited by the resolution of getit() even if the initial delay is calibrated perfectly. Use fixed point and long longs to speed up scaling in DELAY(). The old method slowed down a lot when the frequency became variable. Assume the default frequency for short delays so that the fixed point calculation can be exact. Fast scaling is only important for small delays. Scaling is done after looking at the counter and outside the loop, so it doesn't decrease accuracy or resolution provided it completes before the delay is up. The comment in the code is still confused about this.
1997-01-29 22:51:44 +00:00 · 1997-01-29 22:51:44 +00:00 · 49a116737e
commit 49a116737e
parent 89972e50e4
5 changed files with 105 additions and 50 deletions
--- a/sys/amd64/amd64/tsc.c
+++ b/sys/amd64/amd64/tsc.c
@ -362,7 +362,7 @@ getit(void)
 void
 DELAY(int n)
 {
-	int delta, prev_tick, tick, ticks_left, sec, usec;
+	int delta, prev_tick, tick, ticks_left;

 #ifdef DELAYDEBUG
 	int getit_calls = 1;
@ -393,19 +393,30 @@ DELAY(int n)
 	 * multiplications and divisions to scale the count take a while).
 	 */
 	prev_tick = getit();
-	n -= 20;
+	n -= 0;			/* XXX actually guess no initial overhead */
 	/*
 	 * Calculate (n * (timer_freq / 1e6)) without using floating point
 	 * and without any avoidable overflows.
 	 */
-	sec = n / 1000000;
-	usec = n - sec * 1000000;
-	ticks_left = sec * timer_freq
-		     + usec * (timer_freq / 1000000)
-		     + usec * ((timer_freq % 1000000) / 1000) / 1000
-		     + usec * (timer_freq % 1000) / 1000000;
-	if (n < 0)
-		ticks_left = 0;	/* XXX timer_freq is unsigned */
+	if (n <= 0)
+		ticks_left = 0;
+	else if (n < 256)
+		/*
+		 * Use fixed point to avoid a slow division by 1000000.
+		 * 39099 = 1193182 * 2^15 / 10^6 rounded to nearest.
+		 * 2^15 is the first power of 2 that gives exact results
+		 * for n between 0 and 256.
+		 */
+		ticks_left = ((u_int)n * 39099 + (1 << 15) - 1) >> 15;
+	else
+		/*
+		 * Don't bother using fixed point, although gcc-2.7.2
+		 * generates particularly poor code for the long long
+		 * division, since even the slow way will complete long
+		 * before the delay is up (unless we're interrupted).
+		 */
+		ticks_left = ((u_int)n * (long long)timer_freq + 999999)
+			     / 1000000;

 	while (ticks_left > 0) {
 		tick = getit();
--- a/sys/amd64/isa/clock.c
+++ b/sys/amd64/isa/clock.c
@ -362,7 +362,7 @@ getit(void)
 void
 DELAY(int n)
 {
-	int delta, prev_tick, tick, ticks_left, sec, usec;
+	int delta, prev_tick, tick, ticks_left;

 #ifdef DELAYDEBUG
 	int getit_calls = 1;
@ -393,19 +393,30 @@ DELAY(int n)
 	 * multiplications and divisions to scale the count take a while).
 	 */
 	prev_tick = getit();
-	n -= 20;
+	n -= 0;			/* XXX actually guess no initial overhead */
 	/*
 	 * Calculate (n * (timer_freq / 1e6)) without using floating point
 	 * and without any avoidable overflows.
 	 */
-	sec = n / 1000000;
-	usec = n - sec * 1000000;
-	ticks_left = sec * timer_freq
-		     + usec * (timer_freq / 1000000)
-		     + usec * ((timer_freq % 1000000) / 1000) / 1000
-		     + usec * (timer_freq % 1000) / 1000000;
-	if (n < 0)
-		ticks_left = 0;	/* XXX timer_freq is unsigned */
+	if (n <= 0)
+		ticks_left = 0;
+	else if (n < 256)
+		/*
+		 * Use fixed point to avoid a slow division by 1000000.
+		 * 39099 = 1193182 * 2^15 / 10^6 rounded to nearest.
+		 * 2^15 is the first power of 2 that gives exact results
+		 * for n between 0 and 256.
+		 */
+		ticks_left = ((u_int)n * 39099 + (1 << 15) - 1) >> 15;
+	else
+		/*
+		 * Don't bother using fixed point, although gcc-2.7.2
+		 * generates particularly poor code for the long long
+		 * division, since even the slow way will complete long
+		 * before the delay is up (unless we're interrupted).
+		 */
+		ticks_left = ((u_int)n * (long long)timer_freq + 999999)
+			     / 1000000;

 	while (ticks_left > 0) {
 		tick = getit();
--- a/sys/i386/i386/tsc.c
+++ b/sys/i386/i386/tsc.c
@ -362,7 +362,7 @@ getit(void)
 void
 DELAY(int n)
 {
-	int delta, prev_tick, tick, ticks_left, sec, usec;
+	int delta, prev_tick, tick, ticks_left;

 #ifdef DELAYDEBUG
 	int getit_calls = 1;
@ -393,19 +393,30 @@ DELAY(int n)
 	 * multiplications and divisions to scale the count take a while).
 	 */
 	prev_tick = getit();
-	n -= 20;
+	n -= 0;			/* XXX actually guess no initial overhead */
 	/*
 	 * Calculate (n * (timer_freq / 1e6)) without using floating point
 	 * and without any avoidable overflows.
 	 */
-	sec = n / 1000000;
-	usec = n - sec * 1000000;
-	ticks_left = sec * timer_freq
-		     + usec * (timer_freq / 1000000)
-		     + usec * ((timer_freq % 1000000) / 1000) / 1000
-		     + usec * (timer_freq % 1000) / 1000000;
-	if (n < 0)
-		ticks_left = 0;	/* XXX timer_freq is unsigned */
+	if (n <= 0)
+		ticks_left = 0;
+	else if (n < 256)
+		/*
+		 * Use fixed point to avoid a slow division by 1000000.
+		 * 39099 = 1193182 * 2^15 / 10^6 rounded to nearest.
+		 * 2^15 is the first power of 2 that gives exact results
+		 * for n between 0 and 256.
+		 */
+		ticks_left = ((u_int)n * 39099 + (1 << 15) - 1) >> 15;
+	else
+		/*
+		 * Don't bother using fixed point, although gcc-2.7.2
+		 * generates particularly poor code for the long long
+		 * division, since even the slow way will complete long
+		 * before the delay is up (unless we're interrupted).
+		 */
+		ticks_left = ((u_int)n * (long long)timer_freq + 999999)
+			     / 1000000;

 	while (ticks_left > 0) {
 		tick = getit();
--- a/sys/i386/isa/clock.c
+++ b/sys/i386/isa/clock.c
@ -362,7 +362,7 @@ getit(void)
 void
 DELAY(int n)
 {
-	int delta, prev_tick, tick, ticks_left, sec, usec;
+	int delta, prev_tick, tick, ticks_left;

 #ifdef DELAYDEBUG
 	int getit_calls = 1;
@ -393,19 +393,30 @@ DELAY(int n)
 	 * multiplications and divisions to scale the count take a while).
 	 */
 	prev_tick = getit();
-	n -= 20;
+	n -= 0;			/* XXX actually guess no initial overhead */
 	/*
 	 * Calculate (n * (timer_freq / 1e6)) without using floating point
 	 * and without any avoidable overflows.
 	 */
-	sec = n / 1000000;
-	usec = n - sec * 1000000;
-	ticks_left = sec * timer_freq
-		     + usec * (timer_freq / 1000000)
-		     + usec * ((timer_freq % 1000000) / 1000) / 1000
-		     + usec * (timer_freq % 1000) / 1000000;
-	if (n < 0)
-		ticks_left = 0;	/* XXX timer_freq is unsigned */
+	if (n <= 0)
+		ticks_left = 0;
+	else if (n < 256)
+		/*
+		 * Use fixed point to avoid a slow division by 1000000.
+		 * 39099 = 1193182 * 2^15 / 10^6 rounded to nearest.
+		 * 2^15 is the first power of 2 that gives exact results
+		 * for n between 0 and 256.
+		 */
+		ticks_left = ((u_int)n * 39099 + (1 << 15) - 1) >> 15;
+	else
+		/*
+		 * Don't bother using fixed point, although gcc-2.7.2
+		 * generates particularly poor code for the long long
+		 * division, since even the slow way will complete long
+		 * before the delay is up (unless we're interrupted).
+		 */
+		ticks_left = ((u_int)n * (long long)timer_freq + 999999)
+			     / 1000000;

 	while (ticks_left > 0) {
 		tick = getit();
--- a/sys/isa/atrtc.c
+++ b/sys/isa/atrtc.c
@ -362,7 +362,7 @@ getit(void)
 void
 DELAY(int n)
 {
-	int delta, prev_tick, tick, ticks_left, sec, usec;
+	int delta, prev_tick, tick, ticks_left;

 #ifdef DELAYDEBUG
 	int getit_calls = 1;
@ -393,19 +393,30 @@ DELAY(int n)
 	 * multiplications and divisions to scale the count take a while).
 	 */
 	prev_tick = getit();
-	n -= 20;
+	n -= 0;			/* XXX actually guess no initial overhead */
 	/*
 	 * Calculate (n * (timer_freq / 1e6)) without using floating point
 	 * and without any avoidable overflows.
 	 */
-	sec = n / 1000000;
-	usec = n - sec * 1000000;
-	ticks_left = sec * timer_freq
-		     + usec * (timer_freq / 1000000)
-		     + usec * ((timer_freq % 1000000) / 1000) / 1000
-		     + usec * (timer_freq % 1000) / 1000000;
-	if (n < 0)
-		ticks_left = 0;	/* XXX timer_freq is unsigned */
+	if (n <= 0)
+		ticks_left = 0;
+	else if (n < 256)
+		/*
+		 * Use fixed point to avoid a slow division by 1000000.
+		 * 39099 = 1193182 * 2^15 / 10^6 rounded to nearest.
+		 * 2^15 is the first power of 2 that gives exact results
+		 * for n between 0 and 256.
+		 */
+		ticks_left = ((u_int)n * 39099 + (1 << 15) - 1) >> 15;
+	else
+		/*
+		 * Don't bother using fixed point, although gcc-2.7.2
+		 * generates particularly poor code for the long long
+		 * division, since even the slow way will complete long
+		 * before the delay is up (unless we're interrupted).
+		 */
+		ticks_left = ((u_int)n * (long long)timer_freq + 999999)
+			     / 1000000;

 	while (ticks_left > 0) {
 		tick = getit();