From f0ceb98f93c40738e4d658707f2d1d5b156a1bb4 Mon Sep 17 00:00:00 2001 From: David Schultz Date: Sat, 2 Apr 2005 18:52:44 +0000 Subject: [PATCH] Replace the current strspn() and strcspn() with significantly faster implementations inspired by the ones in DragonFly. Unlike the DragonFly versions, these have a small data cache footprint, and my tests show that they're never slower than the old code except when the charset or the span is 0 or 1 characters. This implementation is generally faster than DragonFly until either the charset or the span gets in the ballpark of 32 to 64 characters. --- lib/libc/string/strcspn.c | 77 ++++++++++++++++++++------------------- lib/libc/string/strspn.c | 73 +++++++++++++++++++++---------------- sys/libkern/strspn.c | 64 ++++++++++++++++++++------------ 3 files changed, 121 insertions(+), 93 deletions(-) diff --git a/lib/libc/string/strcspn.c b/lib/libc/string/strcspn.c index 2ec9326d9b54..3879a3b085c7 100644 --- a/lib/libc/string/strcspn.c +++ b/lib/libc/string/strcspn.c @@ -1,9 +1,6 @@ /*- - * Copyright (c) 1990, 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Chris Torek. + * Copyright (c) 2005 David Schultz + * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -13,18 +10,11 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) @@ -34,36 +24,49 @@ * SUCH DAMAGE. */ -#if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)strcspn.c 8.1 (Berkeley) 6/4/93"; -#endif /* LIBC_SCCS and not lint */ #include __FBSDID("$FreeBSD$"); +#include +#include #include -/* - * Span the complement of string s2. - */ -size_t -strcspn(s1, s2) - const char *s1; - const char *s2; -{ - const char *p, *spanp; - char c, sc; +#define IDX(c) ((u_char)(c) / LONG_BIT) +#define BIT(c) ((u_long)1 << ((u_char)(c) % LONG_BIT)) +size_t +strcspn(const char *s, const char *charset) +{ /* - * Stop as soon as we find any character from s2. Note that there - * must be a NUL in s2; it suffices to stop when we find that, too. + * NB: idx and bit are temporaries whose use causes gcc 3.4.2 to + * generate better code. Without them, gcc gets a little confused. */ - for (p = s1;;) { - c = *p++; - spanp = s2; - do { - if ((sc = *spanp++) == c) - return (p - 1 - s1); - } while (sc != 0); + const char *s1; + u_long bit; + u_long tbl[(UCHAR_MAX + 1) / LONG_BIT]; + int idx; + + if(*s == '\0') + return (0); + +#if LONG_BIT == 64 /* always better to unroll on 64-bit architectures */ + tbl[0] = 1; + tbl[3] = tbl[2] = tbl[1] = 0; +#else + for (tbl[0] = idx = 1; idx < sizeof(tbl) / sizeof(tbl[0]); idx++) + tbl[idx] = 0; +#endif + for (; *charset != '\0'; charset++) { + idx = IDX(*charset); + bit = BIT(*charset); + tbl[idx] |= bit; } - /* NOTREACHED */ + + for(s1 = s; ; s1++) { + idx = IDX(*s1); + bit = BIT(*s1); + if ((tbl[idx] & bit) != 0) + break; + } + return (s1 - s); } diff --git a/lib/libc/string/strspn.c b/lib/libc/string/strspn.c index 5612ba045457..5dbac0a67857 100644 --- a/lib/libc/string/strspn.c +++ b/lib/libc/string/strspn.c @@ -1,6 +1,6 @@ -/* - * Copyright (c) 1989, 1993 - * The Regents of the University of California. All rights reserved. +/*- + * Copyright (c) 2005 David Schultz + * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -10,18 +10,11 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) @@ -31,32 +24,48 @@ * SUCH DAMAGE. */ -#if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)strspn.c 8.1 (Berkeley) 6/4/93"; -#endif /* LIBC_SCCS and not lint */ #include __FBSDID("$FreeBSD$"); +#include +#include #include -/* - * Span the string s2 (skip characters that are in s2). - */ -size_t -strspn(s1, s2) - const char *s1; - const char *s2; -{ - const char *p = s1, *spanp; - char c, sc; +#define IDX(c) ((u_char)(c) / LONG_BIT) +#define BIT(c) ((u_long)1 << ((u_char)(c) % LONG_BIT)) +size_t +strspn(const char *s, const char *charset) +{ /* - * Skip any characters in s2, excluding the terminating \0. + * NB: idx and bit are temporaries whose use causes gcc 3.4.2 to + * generate better code. Without them, gcc gets a little confused. */ -cont: - c = *p++; - for (spanp = s2; (sc = *spanp++) != 0;) - if (sc == c) - goto cont; - return (p - 1 - s1); + const char *s1; + u_long bit; + u_long tbl[(UCHAR_MAX + 1) / LONG_BIT]; + int idx; + + if(*s == '\0') + return (0); + +#if LONG_BIT == 64 /* always better to unroll on 64-bit architectures */ + tbl[3] = tbl[2] = tbl[1] = tbl[0] = 0; +#else + for (idx = 0; idx < sizeof(tbl) / sizeof(tbl[0]); idx++) + tbl[idx] = 0; +#endif + for (; *charset != '\0'; charset++) { + idx = IDX(*charset); + bit = BIT(*charset); + tbl[idx] |= bit; + } + + for(s1 = s; ; s1++) { + idx = IDX(*s1); + bit = BIT(*s1); + if ((tbl[idx] & bit) == 0) + break; + } + return (s1 - s); } diff --git a/sys/libkern/strspn.c b/sys/libkern/strspn.c index 6bcdeca22552..18327b7bd4b7 100644 --- a/sys/libkern/strspn.c +++ b/sys/libkern/strspn.c @@ -1,6 +1,6 @@ -/* - * Copyright (c) 1989, 1993 - * The Regents of the University of California. All rights reserved. +/*- + * Copyright (c) 2005 David Schultz + * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -10,14 +10,11 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) @@ -31,25 +28,44 @@ __FBSDID("$FreeBSD$"); #include +#include +#include + +#define IDX(c) ((u_char)(c) / LONG_BIT) +#define BIT(c) ((u_long)1 << ((u_char)(c) % LONG_BIT)) -/* - * Span the string s2 (skip characters that are in s2). - */ size_t -strspn(s1, s2) - const char *s1; - const char *s2; +strspn(const char *s, const char *charset) { - const char *p = s1, *spanp; - char c, sc; - /* - * Skip any characters in s2, excluding the terminating \0. + * NB: idx and bit are temporaries whose use causes gcc 3.4.2 to + * generate better code. Without them, gcc gets a little confused. */ -cont: - c = *p++; - for (spanp = s2; (sc = *spanp++) != 0;) - if (sc == c) - goto cont; - return (p - 1 - s1); + const char *s1; + u_long bit; + u_long tbl[(UCHAR_MAX + 1) / LONG_BIT]; + int idx; + + if(*s == '\0') + return (0); + +#if LONG_BIT == 64 /* always better to unroll on 64-bit architectures */ + tbl[3] = tbl[2] = tbl[1] = tbl[0] = 0; +#else + for (idx = 0; idx < sizeof(tbl) / sizeof(tbl[0]); idx++) + tbl[idx] = 0; +#endif + for (; *charset != '\0'; charset++) { + idx = IDX(*charset); + bit = BIT(*charset); + tbl[idx] |= bit; + } + + for(s1 = s; ; s1++) { + idx = IDX(*s1); + bit = BIT(*s1); + if ((tbl[idx] & bit) == 0) + break; + } + return (s1 - s); }