Revert "Reimplement strlen"
This reverts commit 710e45c4b8
.
It breaks for some corner cases on big endian ppc64.
Given the stage of the release process it is best to revert for now.
Reported by: jhibbits
This commit is contained in:
parent
ae71b794cb
commit
33f0540b13
@ -35,6 +35,10 @@ __FBSDID("$FreeBSD$");
|
||||
/*
|
||||
* Portable strlen() for 32-bit and 64-bit systems.
|
||||
*
|
||||
* Rationale: it is generally much more efficient to do word length
|
||||
* operations and avoid branches on modern computer systems, as
|
||||
* compared to byte-length operations with a lot of branches.
|
||||
*
|
||||
* The expression:
|
||||
*
|
||||
* ((x - 0x01....01) & ~x & 0x80....80)
|
||||
@ -42,13 +46,18 @@ __FBSDID("$FreeBSD$");
|
||||
* would evaluate to a non-zero value iff any of the bytes in the
|
||||
* original word is zero.
|
||||
*
|
||||
* On multi-issue processors, we can divide the above expression into:
|
||||
* a) (x - 0x01....01)
|
||||
* b) (~x & 0x80....80)
|
||||
* c) a & b
|
||||
*
|
||||
* Where, a) and b) can be partially computed in parallel.
|
||||
*
|
||||
* The algorithm above is found on "Hacker's Delight" by
|
||||
* Henry S. Warren, Jr.
|
||||
*
|
||||
* Note: this leaves performance on the table and each architecture
|
||||
* would be best served with a tailor made routine instead.
|
||||
*/
|
||||
|
||||
/* Magic numbers for the algorithm */
|
||||
#if LONG_BIT == 32
|
||||
static const unsigned long mask01 = 0x01010101;
|
||||
static const unsigned long mask80 = 0x80808080;
|
||||
@ -61,45 +70,62 @@ static const unsigned long mask80 = 0x8080808080808080;
|
||||
|
||||
#define LONGPTR_MASK (sizeof(long) - 1)
|
||||
|
||||
#if BYTE_ORDER == LITTLE_ENDIAN
|
||||
#define FINDZERO __builtin_ctzl
|
||||
#else
|
||||
#define FINDZERO __builtin_clzl
|
||||
#endif
|
||||
/*
|
||||
* Helper macro to return string length if we caught the zero
|
||||
* byte.
|
||||
*/
|
||||
#define testbyte(x) \
|
||||
do { \
|
||||
if (p[x] == '\0') \
|
||||
return (p - str + x); \
|
||||
} while (0)
|
||||
|
||||
size_t
|
||||
strlen(const char *str)
|
||||
{
|
||||
const char *p;
|
||||
const unsigned long *lp;
|
||||
unsigned long mask;
|
||||
long va, vb;
|
||||
long val;
|
||||
|
||||
lp = (unsigned long *) (uintptr_t) str;
|
||||
if ((uintptr_t)lp & LONGPTR_MASK) {
|
||||
lp = (__typeof(lp)) ((uintptr_t)lp & ~LONGPTR_MASK);
|
||||
#if BYTE_ORDER == LITTLE_ENDIAN
|
||||
mask = ~(~0UL << (((uintptr_t)str & LONGPTR_MASK) << 3));
|
||||
#else
|
||||
mask = ~(~0UL >> (((uintptr_t)str & LONGPTR_MASK) << 3));
|
||||
#endif
|
||||
val = *lp | mask;
|
||||
va = (val - mask01);
|
||||
vb = ((~val) & mask80);
|
||||
if (va & vb) {
|
||||
return ((const char *)lp - str + (FINDZERO(va & vb) >> 3));
|
||||
}
|
||||
lp++;
|
||||
}
|
||||
/*
|
||||
* Before trying the hard (unaligned byte-by-byte access) way
|
||||
* to figure out whether there is a nul character, try to see
|
||||
* if there is a nul character is within this accessible word
|
||||
* first.
|
||||
*
|
||||
* p and (p & ~LONGPTR_MASK) must be equally accessible since
|
||||
* they always fall in the same memory page, as long as page
|
||||
* boundaries is integral multiple of word size.
|
||||
*/
|
||||
lp = (const unsigned long *)((uintptr_t)str & ~LONGPTR_MASK);
|
||||
va = (*lp - mask01);
|
||||
vb = ((~*lp) & mask80);
|
||||
lp++;
|
||||
if (va & vb)
|
||||
/* Check if we have \0 in the first part */
|
||||
for (p = str; p < (const char *)lp; p++)
|
||||
if (*p == '\0')
|
||||
return (p - str);
|
||||
|
||||
/* Scan the rest of the string using word sized operation */
|
||||
for (; ; lp++) {
|
||||
va = (*lp - mask01);
|
||||
vb = ((~*lp) & mask80);
|
||||
if (va & vb) {
|
||||
return ((const char *)lp - str + (FINDZERO(va & vb) >> 3));
|
||||
p = (const char *)(lp);
|
||||
testbyte(0);
|
||||
testbyte(1);
|
||||
testbyte(2);
|
||||
testbyte(3);
|
||||
#if (LONG_BIT >= 64)
|
||||
testbyte(4);
|
||||
testbyte(5);
|
||||
testbyte(6);
|
||||
testbyte(7);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
__builtin_unreachable();
|
||||
/* NOTREACHED */
|
||||
return (0);
|
||||
}
|
||||
|
@ -34,6 +34,10 @@ __FBSDID("$FreeBSD$");
|
||||
/*
|
||||
* Portable strlen() for 32-bit and 64-bit systems.
|
||||
*
|
||||
* Rationale: it is generally much more efficient to do word length
|
||||
* operations and avoid branches on modern computer systems, as
|
||||
* compared to byte-length operations with a lot of branches.
|
||||
*
|
||||
* The expression:
|
||||
*
|
||||
* ((x - 0x01....01) & ~x & 0x80....80)
|
||||
@ -41,10 +45,18 @@ __FBSDID("$FreeBSD$");
|
||||
* would evaluate to a non-zero value iff any of the bytes in the
|
||||
* original word is zero.
|
||||
*
|
||||
* On multi-issue processors, we can divide the above expression into:
|
||||
* a) (x - 0x01....01)
|
||||
* b) (~x & 0x80....80)
|
||||
* c) a & b
|
||||
*
|
||||
* Where, a) and b) can be partially computed in parallel.
|
||||
*
|
||||
* The algorithm above is found on "Hacker's Delight" by
|
||||
* Henry S. Warren, Jr.
|
||||
*/
|
||||
|
||||
/* Magic numbers for the algorithm */
|
||||
#if LONG_BIT == 32
|
||||
static const unsigned long mask01 = 0x01010101;
|
||||
static const unsigned long mask80 = 0x80808080;
|
||||
@ -57,45 +69,62 @@ static const unsigned long mask80 = 0x8080808080808080;
|
||||
|
||||
#define LONGPTR_MASK (sizeof(long) - 1)
|
||||
|
||||
#if BYTE_ORDER == LITTLE_ENDIAN
|
||||
#define FINDZERO __builtin_ctzl
|
||||
#else
|
||||
#define FINDZERO __builtin_clzl
|
||||
#endif
|
||||
/*
|
||||
* Helper macro to return string length if we caught the zero
|
||||
* byte.
|
||||
*/
|
||||
#define testbyte(x) \
|
||||
do { \
|
||||
if (p[x] == '\0') \
|
||||
return (p - str + x); \
|
||||
} while (0)
|
||||
|
||||
size_t
|
||||
(strlen)(const char *str)
|
||||
{
|
||||
const char *p;
|
||||
const unsigned long *lp;
|
||||
unsigned long mask;
|
||||
long va, vb;
|
||||
long val;
|
||||
|
||||
lp = (unsigned long *) (uintptr_t) str;
|
||||
if ((uintptr_t)lp & LONGPTR_MASK) {
|
||||
lp = (__typeof(lp)) ((uintptr_t)lp & ~LONGPTR_MASK);
|
||||
#if BYTE_ORDER == LITTLE_ENDIAN
|
||||
mask = ~(~0UL << (((uintptr_t)str & LONGPTR_MASK) << 3));
|
||||
#else
|
||||
mask = ~(~0UL >> (((uintptr_t)str & LONGPTR_MASK) << 3));
|
||||
#endif
|
||||
val = *lp | mask;
|
||||
va = (val - mask01);
|
||||
vb = ((~val) & mask80);
|
||||
if (va & vb) {
|
||||
return ((const char *)lp - str + (FINDZERO(va & vb) >> 3));
|
||||
}
|
||||
lp++;
|
||||
}
|
||||
/*
|
||||
* Before trying the hard (unaligned byte-by-byte access) way
|
||||
* to figure out whether there is a nul character, try to see
|
||||
* if there is a nul character is within this accessible word
|
||||
* first.
|
||||
*
|
||||
* p and (p & ~LONGPTR_MASK) must be equally accessible since
|
||||
* they always fall in the same memory page, as long as page
|
||||
* boundaries is integral multiple of word size.
|
||||
*/
|
||||
lp = (const unsigned long *)((uintptr_t)str & ~LONGPTR_MASK);
|
||||
va = (*lp - mask01);
|
||||
vb = ((~*lp) & mask80);
|
||||
lp++;
|
||||
if (va & vb)
|
||||
/* Check if we have \0 in the first part */
|
||||
for (p = str; p < (const char *)lp; p++)
|
||||
if (*p == '\0')
|
||||
return (p - str);
|
||||
|
||||
/* Scan the rest of the string using word sized operation */
|
||||
for (; ; lp++) {
|
||||
va = (*lp - mask01);
|
||||
vb = ((~*lp) & mask80);
|
||||
if (va & vb) {
|
||||
return ((const char *)lp - str + (FINDZERO(va & vb) >> 3));
|
||||
p = (const char *)(lp);
|
||||
testbyte(0);
|
||||
testbyte(1);
|
||||
testbyte(2);
|
||||
testbyte(3);
|
||||
#if (LONG_BIT >= 64)
|
||||
testbyte(4);
|
||||
testbyte(5);
|
||||
testbyte(6);
|
||||
testbyte(7);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
__builtin_unreachable();
|
||||
/* NOTREACHED */
|
||||
return (0);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user