libc: Add strverscmp(3) and versionsort(3)
Add a strverscmp(3) function to libc, a GNU extension I implemented by reading its glibc manual page. It orders strings following a much more natural ordering (e.g. "ent1 < ent2 < ent10" as opposed to "ent1 < ent10 < ent2" with strcmp(3)'s lexicographic ordering). Also add versionsort(3) for use as scandir(3)'s compar argument. Update manual page for scandir(3) and add one for strverscmp(3). Reviewed by: pstef, gbe, kib MFC after: 1 week Differential Revision: https://reviews.freebsd.org/D35807
This commit is contained in:
parent
465988e9fa
commit
05c9a0158f
@ -108,6 +108,7 @@ int alphasort(const struct dirent **, const struct dirent **);
|
||||
int dirfd(DIR *);
|
||||
#endif
|
||||
#if __BSD_VISIBLE
|
||||
int versionsort(const struct dirent **, const struct dirent **);
|
||||
DIR *__opendir2(const char *, int);
|
||||
int fdclosedir(DIR *);
|
||||
ssize_t getdents(int, char *, size_t);
|
||||
|
@ -81,6 +81,7 @@ char *strcat(char * __restrict, const char * __restrict);
|
||||
char *strchr(const char *, int) __pure;
|
||||
#if __BSD_VISIBLE
|
||||
char *strchrnul(const char*, int) __pure;
|
||||
int strverscmp(const char *, const char *) __pure;
|
||||
#endif
|
||||
int strcmp(const char *, const char *) __pure;
|
||||
int strcoll(const char *, const char *);
|
||||
|
@ -495,7 +495,8 @@ MLINKS+=rand48.3 _rand48.3 \
|
||||
MLINKS+=recv.2 recvmmsg.2
|
||||
MLINKS+=scandir.3 alphasort.3 \
|
||||
scandir.3 scandirat.3 \
|
||||
scandir.3 scandir_b.3
|
||||
scandir.3 scandir_b.3 \
|
||||
scandir.3 versionsort.3
|
||||
MLINKS+=sem_open.3 sem_close.3 \
|
||||
sem_open.3 sem_unlink.3
|
||||
MLINKS+=sem_wait.3 sem_trywait.3
|
||||
|
@ -443,6 +443,7 @@ FBSD_1.7 {
|
||||
sched_getaffinity;
|
||||
sched_setaffinity;
|
||||
sched_getcpu;
|
||||
versionsort;
|
||||
__cpuset_alloc;
|
||||
__cpuset_free;
|
||||
};
|
||||
|
@ -35,7 +35,8 @@
|
||||
.Nm scandir ,
|
||||
.Nm scandirat ,
|
||||
.Nm scandir_b ,
|
||||
.Nm alphasort
|
||||
.Nm alphasort ,
|
||||
.Nm versionsort
|
||||
.Nd scan a directory
|
||||
.Sh LIBRARY
|
||||
.Lb libc
|
||||
@ -65,6 +66,8 @@
|
||||
.Fc
|
||||
.Ft int
|
||||
.Fn alphasort "const struct dirent **d1" "const struct dirent **d2"
|
||||
.Ft int
|
||||
.Fn versionsort "const struct dirent **d1" "const struct dirent **d2"
|
||||
.Sh DESCRIPTION
|
||||
The
|
||||
.Fn scandir
|
||||
@ -106,6 +109,13 @@ is a routine which can be used for the
|
||||
argument to sort the array alphabetically using
|
||||
.Xr strcoll 3 .
|
||||
.Pp
|
||||
The
|
||||
.Fn versionsort
|
||||
function is a routine which can be used for the
|
||||
.Fa compar
|
||||
argument to sort the array naturally using
|
||||
.Xr strverscmp 3 .
|
||||
.Pp
|
||||
The memory allocated for the array can be deallocated with
|
||||
.Xr free 3 ,
|
||||
by freeing each pointer in the array and then the array itself.
|
||||
@ -161,7 +171,12 @@ cannot allocate enough memory to hold all the data structures.
|
||||
.Xr malloc 3 ,
|
||||
.Xr qsort 3 ,
|
||||
.Xr strcoll 3 ,
|
||||
.Xr strverscmp 3 ,
|
||||
.Xr dir 5
|
||||
.Sh STANDARDS
|
||||
The
|
||||
.Fn versionsort
|
||||
function is a GNU extension and conforms to no standard.
|
||||
.Sh HISTORY
|
||||
The
|
||||
.Fn scandir
|
||||
@ -171,5 +186,8 @@ functions appeared in
|
||||
.Bx 4.2 .
|
||||
The
|
||||
.Fn scandirat
|
||||
function was added in
|
||||
and
|
||||
.Fn
|
||||
versionsort
|
||||
functions were added in
|
||||
.Fx 14.0 .
|
||||
|
@ -191,6 +191,13 @@ alphasort(const struct dirent **d1, const struct dirent **d2)
|
||||
return (strcoll((*d1)->d_name, (*d2)->d_name));
|
||||
}
|
||||
|
||||
int
|
||||
versionsort(const struct dirent **d1, const struct dirent **d2)
|
||||
{
|
||||
|
||||
return (strverscmp((*d1)->d_name, (*d2)->d_name));
|
||||
}
|
||||
|
||||
static int
|
||||
alphasort_thunk(void *thunk, const void *p1, const void *p2)
|
||||
{
|
||||
|
@ -16,7 +16,7 @@ MISRCS+=bcmp.c bcopy.c bzero.c explicit_bzero.c \
|
||||
strcspn.c strdup.c strerror.c strlcat.c strlcpy.c strlen.c strmode.c \
|
||||
strncat.c strncmp.c strncpy.c strndup.c strnlen.c strnstr.c \
|
||||
strpbrk.c strrchr.c strsep.c strsignal.c strspn.c strstr.c strtok.c \
|
||||
strxfrm.c swab.c \
|
||||
strverscmp.c strxfrm.c swab.c \
|
||||
timingsafe_bcmp.c \
|
||||
timingsafe_memcmp.c \
|
||||
wcpcpy.c wcpncpy.c wcscasecmp.c wcscat.c \
|
||||
@ -46,7 +46,7 @@ MAN+= bcmp.3 bcopy.3 bstring.3 bzero.3 ffs.3 index.3 memccpy.3 memchr.3 \
|
||||
memcmp.3 memcpy.3 memmem.3 memmove.3 memset.3 strcasecmp.3 strcat.3 \
|
||||
strchr.3 strcmp.3 strcoll.3 strcpy.3 strdup.3 strerror.3 \
|
||||
string.3 strlcpy.3 strlen.3 strmode.3 strpbrk.3 strsep.3 \
|
||||
strspn.3 strstr.3 strtok.3 strxfrm.3 swab.3 \
|
||||
strspn.3 strstr.3 strtok.3 strverscmp.3 strxfrm.3 swab.3 \
|
||||
timingsafe_bcmp.3 \
|
||||
wcscoll.3 wcstok.3 \
|
||||
wcswidth.3 wcsxfrm.3 wmemchr.3
|
||||
|
@ -116,6 +116,7 @@ FBSD_1.6 {
|
||||
|
||||
FBSD_1.7 {
|
||||
mempcpy;
|
||||
strverscmp;
|
||||
wmempcpy;
|
||||
};
|
||||
|
||||
|
56
lib/libc/string/strverscmp.3
Normal file
56
lib/libc/string/strverscmp.3
Normal file
@ -0,0 +1,56 @@
|
||||
.\" SPDX-License-Identifier: BSD-2-Clause
|
||||
.\" Copyright (c) 2022 Aymeric Wibo <obiwac@gmail.com>
|
||||
.Dd July 11, 2022
|
||||
.Dt STRVERSCMP 3
|
||||
.Os
|
||||
.Sh NAME
|
||||
.Nm strverscmp
|
||||
.Nd compare strings according to natural order
|
||||
.Sh LIBRARY
|
||||
.Lb libc
|
||||
.Sh SYNOPSIS
|
||||
.In string.h
|
||||
.Ft int
|
||||
.Fn strverscmp "const char *s1" "const char *s2"
|
||||
.Sh DESCRIPTION
|
||||
The
|
||||
.Fn strverscmp
|
||||
function
|
||||
compares the null-terminated strings
|
||||
.Fa s1
|
||||
and
|
||||
.Fa s2
|
||||
according to their natural order
|
||||
and returns an integer greater than, equal to, or less than 0,
|
||||
depending on whether
|
||||
.Fa s1
|
||||
is greater than, equal to, or less than
|
||||
.Fa s2 .
|
||||
.Pp
|
||||
More specifically, this natural order is found by iterating over both
|
||||
strings until a difference is found.
|
||||
If the difference is between non-decimal characters,
|
||||
.Fn strverscmp
|
||||
acts like
|
||||
.Xr strcmp 3
|
||||
(thus, the ordering would be "a", "b", "train").
|
||||
If a decimal digit is found, the whole number is read and compared
|
||||
(thus, the ordering would be "9", "10", "420" which is different to lexicographic order,
|
||||
what
|
||||
.Xr strcmp 3
|
||||
would have done).
|
||||
Numbers with leading zeroes are interpreted as fractional parts (even without a decimal point),
|
||||
and numbers with more leading zeroes are placed before numbers with fewer leading zeroes
|
||||
(thus, the ordering would be "000", "00", "01", "010", "09", "0", "1", "9", "10").
|
||||
.Sh SEE ALSO
|
||||
.Xr strcmp 3 ,
|
||||
.Xr versionsort 3
|
||||
.Sh STANDARDS
|
||||
The
|
||||
.Fn strverscmp
|
||||
function is a GNU extension and conforms to no standard.
|
||||
.Sh HISTORY
|
||||
The
|
||||
.Fn strverscmp
|
||||
function was added in
|
||||
.Fx 14.0 .
|
91
lib/libc/string/strverscmp.c
Normal file
91
lib/libc/string/strverscmp.c
Normal file
@ -0,0 +1,91 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
* Copyright (c) 2022 Aymeric Wibo <obiwac@gmail.com>
|
||||
*/
|
||||
|
||||
#include <ctype.h>
|
||||
#include <stddef.h>
|
||||
|
||||
int
|
||||
strverscmp(const char *s1, const char *s2)
|
||||
{
|
||||
size_t digit_count_1, digit_count_2;
|
||||
size_t zeros_count_1, zeros_count_2;
|
||||
const unsigned char *num_1, *num_2;
|
||||
const unsigned char *u1 = __DECONST(const unsigned char *, s1);
|
||||
const unsigned char *u2 = __DECONST(const unsigned char *, s2);
|
||||
|
||||
/*
|
||||
* If pointers are the same, no need to go through to process of
|
||||
* comparing them.
|
||||
*/
|
||||
if (s1 == s2)
|
||||
return (0);
|
||||
|
||||
while (*u1 != '\0' && *u2 != '\0') {
|
||||
/* If either character is not a digit, act like strcmp(3). */
|
||||
|
||||
if (!isdigit(*u1) || !isdigit(*u2)) {
|
||||
if (*u1 != *u2)
|
||||
return (*u1 - *u2);
|
||||
u1++;
|
||||
u2++;
|
||||
continue;
|
||||
}
|
||||
if (*u1 == '0' || *u2 == '0') {
|
||||
/*
|
||||
* Treat leading zeros as if they were the fractional
|
||||
* part of a number, i.e. as if they had a decimal point
|
||||
* in front. First, count the leading zeros (more zeros
|
||||
* == smaller number).
|
||||
*/
|
||||
zeros_count_1 = 0;
|
||||
zeros_count_2 = 0;
|
||||
for (; *u1 == '0'; u1++)
|
||||
zeros_count_1++;
|
||||
for (; *u2 == '0'; u2++)
|
||||
zeros_count_2++;
|
||||
if (zeros_count_1 != zeros_count_2)
|
||||
return (zeros_count_2 - zeros_count_1);
|
||||
|
||||
/* Handle the case where 0 < 09. */
|
||||
if (!isdigit(*u1) && isdigit(*u2))
|
||||
return (1);
|
||||
if (!isdigit(*u2) && isdigit(*u1))
|
||||
return (-1);
|
||||
} else {
|
||||
/*
|
||||
* No leading zeros; we're simply comparing two numbers.
|
||||
* It is necessary to first count how many digits there
|
||||
* are before going back to compare each digit, so that
|
||||
* e.g. 7 is not considered larger than 60.
|
||||
*/
|
||||
num_1 = u1;
|
||||
num_2 = u2;
|
||||
|
||||
/* Count digits (more digits == larger number). */
|
||||
for (; isdigit(*u1); u1++)
|
||||
;
|
||||
for (; isdigit(*u2); u2++)
|
||||
;
|
||||
digit_count_1 = u1 - num_1;
|
||||
digit_count_2 = u2 - num_2;
|
||||
if (digit_count_1 != digit_count_2)
|
||||
return (digit_count_1 - digit_count_2);
|
||||
|
||||
/*
|
||||
* If there are the same number of digits, go back to
|
||||
* the start of the number.
|
||||
*/
|
||||
u1 = num_1;
|
||||
u2 = num_2;
|
||||
}
|
||||
|
||||
/* Compare each digit until there are none left. */
|
||||
for (; isdigit(*u1) && isdigit(*u2); u1++, u2++) {
|
||||
if (*u1 != *u2)
|
||||
return (*u1 - *u2);
|
||||
}
|
||||
}
|
||||
return (*u1 - *u2);
|
||||
}
|
@ -4,10 +4,11 @@ ATF_TESTS_C+= memcmp_test
|
||||
ATF_TESTS_C+= memset_s_test
|
||||
ATF_TESTS_C+= stpncpy_test
|
||||
ATF_TESTS_C+= strerror2_test
|
||||
ATF_TESTS_C+= wcscasecmp_test
|
||||
ATF_TESTS_C+= wcsnlen_test
|
||||
ATF_TESTS_C+= strverscmp_test
|
||||
ATF_TESTS_C+= strxfrm_test
|
||||
ATF_TESTS_C+= wcscasecmp_test
|
||||
ATF_TESTS_C+= wcscoll_test
|
||||
ATF_TESTS_C+= wcsnlen_test
|
||||
|
||||
# TODO: popcount, stresep
|
||||
|
||||
|
93
lib/libc/tests/string/strverscmp_test.c
Normal file
93
lib/libc/tests/string/strverscmp_test.c
Normal file
@ -0,0 +1,93 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
* Copyright (c) 2022 Aymeric Wibo <obiwac@gmail.com>
|
||||
*/
|
||||
|
||||
#include <atf-c.h>
|
||||
#include <string.h>
|
||||
|
||||
static void
|
||||
check_all(size_t len, const char *ordered[len])
|
||||
{
|
||||
const char *a, *b;
|
||||
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
for (size_t j = 0; j < len; j++) {
|
||||
a = ordered[i];
|
||||
b = ordered[j];
|
||||
|
||||
if (i == j)
|
||||
ATF_CHECK_MSG(
|
||||
strverscmp(a, b) == 0,
|
||||
"strverscmp(\"%s\", \"%s\") == 0",
|
||||
a, b
|
||||
);
|
||||
else if (i < j)
|
||||
ATF_CHECK_MSG(
|
||||
strverscmp(a, b) < 0,
|
||||
"strverscmp(\"%s\", \"%s\") < 0",
|
||||
a, b
|
||||
);
|
||||
else if (i > j)
|
||||
ATF_CHECK_MSG(
|
||||
strverscmp(a, b) > 0,
|
||||
"strverscmp(\"%s\", \"%s\") > 0",
|
||||
a, b
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#define CHECK_ALL(...) do { \
|
||||
const char *ordered[] = { __VA_ARGS__ }; \
|
||||
check_all(sizeof(ordered) / sizeof(*ordered), ordered); \
|
||||
} while (0)
|
||||
|
||||
ATF_TC_WITHOUT_HEAD(strcmp_functionality);
|
||||
ATF_TC_BODY(strcmp_functionality, tc)
|
||||
{
|
||||
CHECK_ALL("", "a", "b");
|
||||
}
|
||||
|
||||
/* from Linux man page strverscmp(3) */
|
||||
|
||||
ATF_TC_WITHOUT_HEAD(vers_ordering);
|
||||
ATF_TC_BODY(vers_ordering, tc)
|
||||
{
|
||||
CHECK_ALL("000", "00", "01", "010", "09", "0", "1", "9", "10");
|
||||
}
|
||||
|
||||
ATF_TC_WITHOUT_HEAD(natural_ordering);
|
||||
ATF_TC_BODY(natural_ordering, tc)
|
||||
{
|
||||
CHECK_ALL("jan1", "jan2", "jan9", "jan10", "jan11", "jan19", "jan20");
|
||||
}
|
||||
|
||||
/* https://sourceware.org/bugzilla/show_bug.cgi?id=9913 */
|
||||
|
||||
ATF_TC_WITHOUT_HEAD(glibc_bug_9913);
|
||||
ATF_TC_BODY(glibc_bug_9913, tc)
|
||||
{
|
||||
CHECK_ALL(
|
||||
"B0075022800016.gbp.corp.com",
|
||||
"B007502280067.gbp.corp.com",
|
||||
"B007502357019.GBP.CORP.COM"
|
||||
);
|
||||
}
|
||||
|
||||
ATF_TC_WITHOUT_HEAD(semver_ordering);
|
||||
ATF_TC_BODY(semver_ordering, tc)
|
||||
{
|
||||
CHECK_ALL("2.6.20", "2.6.21");
|
||||
}
|
||||
|
||||
ATF_TP_ADD_TCS(tp)
|
||||
{
|
||||
ATF_TP_ADD_TC(tp, strcmp_functionality);
|
||||
ATF_TP_ADD_TC(tp, vers_ordering);
|
||||
ATF_TP_ADD_TC(tp, natural_ordering);
|
||||
ATF_TP_ADD_TC(tp, glibc_bug_9913);
|
||||
ATF_TP_ADD_TC(tp, semver_ordering);
|
||||
|
||||
return (atf_no_error());
|
||||
}
|
Loading…
Reference in New Issue
Block a user