Optimize bzero() by unrolling the aligned quadword loop and moving the length
manipulation away from the length comparison. Measurements on beast.cdrom.com show >3X improvement over the original code on large block sizes, putting the performance on par with the optimized assembly code in libc.
This commit is contained in:
parent
e0b7bc571a
commit
9184fb847b
@ -23,7 +23,7 @@
|
|||||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
* SUCH DAMAGE.
|
* SUCH DAMAGE.
|
||||||
*
|
*
|
||||||
* $Id: machdep.c,v 1.18 1998/10/15 22:00:54 dfr Exp $
|
* $Id: machdep.c,v 1.19 1998/10/30 05:41:07 msmith Exp $
|
||||||
*/
|
*/
|
||||||
/*-
|
/*-
|
||||||
* Copyright (c) 1998 The NetBSD Foundation, Inc.
|
* Copyright (c) 1998 The NetBSD Foundation, Inc.
|
||||||
@ -1029,10 +1029,22 @@ bzero(void *buf, size_t len)
|
|||||||
*p++ = 0;
|
*p++ = 0;
|
||||||
len--;
|
len--;
|
||||||
}
|
}
|
||||||
|
while (len >= sizeof(u_long) * 8) {
|
||||||
|
*(u_long*) p = 0;
|
||||||
|
*((u_long*) p + 1) = 0;
|
||||||
|
*((u_long*) p + 2) = 0;
|
||||||
|
*((u_long*) p + 3) = 0;
|
||||||
|
len -= sizeof(u_long) * 8;
|
||||||
|
*((u_long*) p + 4) = 0;
|
||||||
|
*((u_long*) p + 5) = 0;
|
||||||
|
*((u_long*) p + 6) = 0;
|
||||||
|
*((u_long*) p + 7) = 0;
|
||||||
|
p += sizeof(u_long) * 8;
|
||||||
|
}
|
||||||
while (len >= sizeof(u_long)) {
|
while (len >= sizeof(u_long)) {
|
||||||
*(u_long*) p = 0;
|
*(u_long*) p = 0;
|
||||||
p += sizeof(u_long);
|
|
||||||
len -= sizeof(u_long);
|
len -= sizeof(u_long);
|
||||||
|
p += sizeof(u_long);
|
||||||
}
|
}
|
||||||
while (len) {
|
while (len) {
|
||||||
*p++ = 0;
|
*p++ = 0;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user