Replace the current strspn() and strcspn() with significantly faster
implementations inspired by the ones in DragonFly. Unlike the DragonFly versions, these have a small data cache footprint, and my tests show that they're never slower than the old code except when the charset or the span is 0 or 1 characters. This implementation is generally faster than DragonFly until either the charset or the span gets in the ballpark of 32 to 64 characters.
This commit is contained in:
parent
02f22e6e2c
commit
87aa297030
@ -1,9 +1,6 @@
|
||||
/*-
|
||||
* Copyright (c) 1990, 1993
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* Chris Torek.
|
||||
* Copyright (c) 2005 David Schultz <das@FreeBSD.ORG>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
@ -13,18 +10,11 @@
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by the University of
|
||||
* California, Berkeley and its contributors.
|
||||
* 4. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
@ -34,36 +24,49 @@
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#if defined(LIBC_SCCS) && !defined(lint)
|
||||
static char sccsid[] = "@(#)strcspn.c 8.1 (Berkeley) 6/4/93";
|
||||
#endif /* LIBC_SCCS and not lint */
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <limits.h>
|
||||
#include <string.h>
|
||||
|
||||
/*
|
||||
* Span the complement of string s2.
|
||||
*/
|
||||
size_t
|
||||
strcspn(s1, s2)
|
||||
const char *s1;
|
||||
const char *s2;
|
||||
{
|
||||
const char *p, *spanp;
|
||||
char c, sc;
|
||||
#define IDX(c) ((u_char)(c) / LONG_BIT)
|
||||
#define BIT(c) ((u_long)1 << ((u_char)(c) % LONG_BIT))
|
||||
|
||||
size_t
|
||||
strcspn(const char *s, const char *charset)
|
||||
{
|
||||
/*
|
||||
* Stop as soon as we find any character from s2. Note that there
|
||||
* must be a NUL in s2; it suffices to stop when we find that, too.
|
||||
* NB: idx and bit are temporaries whose use causes gcc 3.4.2 to
|
||||
* generate better code. Without them, gcc gets a little confused.
|
||||
*/
|
||||
for (p = s1;;) {
|
||||
c = *p++;
|
||||
spanp = s2;
|
||||
do {
|
||||
if ((sc = *spanp++) == c)
|
||||
return (p - 1 - s1);
|
||||
} while (sc != 0);
|
||||
const char *s1;
|
||||
u_long bit;
|
||||
u_long tbl[(UCHAR_MAX + 1) / LONG_BIT];
|
||||
int idx;
|
||||
|
||||
if(*s == '\0')
|
||||
return (0);
|
||||
|
||||
#if LONG_BIT == 64 /* always better to unroll on 64-bit architectures */
|
||||
tbl[0] = 1;
|
||||
tbl[3] = tbl[2] = tbl[1] = 0;
|
||||
#else
|
||||
for (tbl[0] = idx = 1; idx < sizeof(tbl) / sizeof(tbl[0]); idx++)
|
||||
tbl[idx] = 0;
|
||||
#endif
|
||||
for (; *charset != '\0'; charset++) {
|
||||
idx = IDX(*charset);
|
||||
bit = BIT(*charset);
|
||||
tbl[idx] |= bit;
|
||||
}
|
||||
/* NOTREACHED */
|
||||
|
||||
for(s1 = s; ; s1++) {
|
||||
idx = IDX(*s1);
|
||||
bit = BIT(*s1);
|
||||
if ((tbl[idx] & bit) != 0)
|
||||
break;
|
||||
}
|
||||
return (s1 - s);
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 1989, 1993
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
/*-
|
||||
* Copyright (c) 2005 David Schultz <das@FreeBSD.ORG>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
@ -10,18 +10,11 @@
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by the University of
|
||||
* California, Berkeley and its contributors.
|
||||
* 4. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
@ -31,32 +24,48 @@
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#if defined(LIBC_SCCS) && !defined(lint)
|
||||
static char sccsid[] = "@(#)strspn.c 8.1 (Berkeley) 6/4/93";
|
||||
#endif /* LIBC_SCCS and not lint */
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <limits.h>
|
||||
#include <string.h>
|
||||
|
||||
/*
|
||||
* Span the string s2 (skip characters that are in s2).
|
||||
*/
|
||||
size_t
|
||||
strspn(s1, s2)
|
||||
const char *s1;
|
||||
const char *s2;
|
||||
{
|
||||
const char *p = s1, *spanp;
|
||||
char c, sc;
|
||||
#define IDX(c) ((u_char)(c) / LONG_BIT)
|
||||
#define BIT(c) ((u_long)1 << ((u_char)(c) % LONG_BIT))
|
||||
|
||||
size_t
|
||||
strspn(const char *s, const char *charset)
|
||||
{
|
||||
/*
|
||||
* Skip any characters in s2, excluding the terminating \0.
|
||||
* NB: idx and bit are temporaries whose use causes gcc 3.4.2 to
|
||||
* generate better code. Without them, gcc gets a little confused.
|
||||
*/
|
||||
cont:
|
||||
c = *p++;
|
||||
for (spanp = s2; (sc = *spanp++) != 0;)
|
||||
if (sc == c)
|
||||
goto cont;
|
||||
return (p - 1 - s1);
|
||||
const char *s1;
|
||||
u_long bit;
|
||||
u_long tbl[(UCHAR_MAX + 1) / LONG_BIT];
|
||||
int idx;
|
||||
|
||||
if(*s == '\0')
|
||||
return (0);
|
||||
|
||||
#if LONG_BIT == 64 /* always better to unroll on 64-bit architectures */
|
||||
tbl[3] = tbl[2] = tbl[1] = tbl[0] = 0;
|
||||
#else
|
||||
for (idx = 0; idx < sizeof(tbl) / sizeof(tbl[0]); idx++)
|
||||
tbl[idx] = 0;
|
||||
#endif
|
||||
for (; *charset != '\0'; charset++) {
|
||||
idx = IDX(*charset);
|
||||
bit = BIT(*charset);
|
||||
tbl[idx] |= bit;
|
||||
}
|
||||
|
||||
for(s1 = s; ; s1++) {
|
||||
idx = IDX(*s1);
|
||||
bit = BIT(*s1);
|
||||
if ((tbl[idx] & bit) == 0)
|
||||
break;
|
||||
}
|
||||
return (s1 - s);
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 1989, 1993
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
/*-
|
||||
* Copyright (c) 2005 David Schultz <das@FreeBSD.ORG>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
@ -10,14 +10,11 @@
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 4. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
@ -31,25 +28,44 @@
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/libkern.h>
|
||||
#include <sys/limits.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#define IDX(c) ((u_char)(c) / LONG_BIT)
|
||||
#define BIT(c) ((u_long)1 << ((u_char)(c) % LONG_BIT))
|
||||
|
||||
/*
|
||||
* Span the string s2 (skip characters that are in s2).
|
||||
*/
|
||||
size_t
|
||||
strspn(s1, s2)
|
||||
const char *s1;
|
||||
const char *s2;
|
||||
strspn(const char *s, const char *charset)
|
||||
{
|
||||
const char *p = s1, *spanp;
|
||||
char c, sc;
|
||||
|
||||
/*
|
||||
* Skip any characters in s2, excluding the terminating \0.
|
||||
* NB: idx and bit are temporaries whose use causes gcc 3.4.2 to
|
||||
* generate better code. Without them, gcc gets a little confused.
|
||||
*/
|
||||
cont:
|
||||
c = *p++;
|
||||
for (spanp = s2; (sc = *spanp++) != 0;)
|
||||
if (sc == c)
|
||||
goto cont;
|
||||
return (p - 1 - s1);
|
||||
const char *s1;
|
||||
u_long bit;
|
||||
u_long tbl[(UCHAR_MAX + 1) / LONG_BIT];
|
||||
int idx;
|
||||
|
||||
if(*s == '\0')
|
||||
return (0);
|
||||
|
||||
#if LONG_BIT == 64 /* always better to unroll on 64-bit architectures */
|
||||
tbl[3] = tbl[2] = tbl[1] = tbl[0] = 0;
|
||||
#else
|
||||
for (idx = 0; idx < sizeof(tbl) / sizeof(tbl[0]); idx++)
|
||||
tbl[idx] = 0;
|
||||
#endif
|
||||
for (; *charset != '\0'; charset++) {
|
||||
idx = IDX(*charset);
|
||||
bit = BIT(*charset);
|
||||
tbl[idx] |= bit;
|
||||
}
|
||||
|
||||
for(s1 = s; ; s1++) {
|
||||
idx = IDX(*s1);
|
||||
bit = BIT(*s1);
|
||||
if ((tbl[idx] & bit) == 0)
|
||||
break;
|
||||
}
|
||||
return (s1 - s);
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user