Rearrange the inner loop of scanc() to get better code on i*86's

(on an i486, 10 cycles (+ cache misses) instead of 15). The change should be a no-op if the compiler is any good. The best possible i*86 code for the same algorithm is only 1 more cycle faster on i486's so I don't want to bother implementing an assembler version. scanc() is a bottleneck for OPOST processing. It is naturally about 4 times as slow as bcopy() on 32-bit systems.
svn path=/head/; revision=9485
1995-07-11 18:50:47 +00:00 · 1995-07-11 18:50:47 +00:00 · 5182dcbec2 · 2020-12-20 02:59:44 +00:00
commit 5182dcbec2
parent d800e06858
1 changed files with 10 additions and 2 deletions
--- a/sys/libkern/scanc.c
+++ b/sys/libkern/scanc.c
@ -32,7 +32,7 @@
 *
 *	@(#)scanc.c	8.1 (Berkeley) 6/10/93
 *
- * $Id: scanc.c,v 1.2 1994/08/02 07:44:27 davidg Exp $
+ * $Id: scanc.c,v 1.3 1995/03/17 06:15:39 phk Exp $
 */

 #include <sys/libkern.h>
@ -47,6 +47,14 @@ scanc(size, cp, table, mask0)
 	register u_char mask;

 	mask = mask0;
-	for (end = &cp[size]; cp < end && (table[*cp] & mask) == 0; ++cp);
+	for (end = &cp[size]; cp < end; ++cp) {
+		/*
+		 * gcc-2.6.3 generates poor (un)sign extension code on i386's.
+		 * The cast to volatile should have no effect, but in fact it
+		 * improves the code on i386's.
+		 */
+		if (table[*(volatile u_char *)cp] & mask)
+			break;
+	}
 	return (end - cp);
 }