libc: Add strverscmp(3) and versionsort(3)

Add a strverscmp(3) function to libc, a GNU extension I implemented by
reading its glibc manual page. It orders strings following a much more
natural ordering (e.g. "ent1 < ent2 < ent10" as opposed to
"ent1 < ent10 < ent2" with strcmp(3)'s lexicographic ordering).

Also add versionsort(3) for use as scandir(3)'s compar argument.

Update manual page for scandir(3) and add one for strverscmp(3).

Reviewed by:	pstef, gbe, kib
MFC after:	1 week
Differential Revision: https://reviews.freebsd.org/D35807
This commit is contained in:
Aymeric Wibo 2022-08-25 02:20:13 +03:00 committed by Konstantin Belousov
parent 465988e9fa
commit 05c9a0158f
12 changed files with 278 additions and 7 deletions

View File

@ -108,6 +108,7 @@ int alphasort(const struct dirent **, const struct dirent **);
int dirfd(DIR *);
#endif
#if __BSD_VISIBLE
int versionsort(const struct dirent **, const struct dirent **);
DIR *__opendir2(const char *, int);
int fdclosedir(DIR *);
ssize_t getdents(int, char *, size_t);

View File

@ -81,6 +81,7 @@ char *strcat(char * __restrict, const char * __restrict);
char *strchr(const char *, int) __pure;
#if __BSD_VISIBLE
char *strchrnul(const char*, int) __pure;
int strverscmp(const char *, const char *) __pure;
#endif
int strcmp(const char *, const char *) __pure;
int strcoll(const char *, const char *);

View File

@ -495,7 +495,8 @@ MLINKS+=rand48.3 _rand48.3 \
MLINKS+=recv.2 recvmmsg.2
MLINKS+=scandir.3 alphasort.3 \
scandir.3 scandirat.3 \
scandir.3 scandir_b.3
scandir.3 scandir_b.3 \
scandir.3 versionsort.3
MLINKS+=sem_open.3 sem_close.3 \
sem_open.3 sem_unlink.3
MLINKS+=sem_wait.3 sem_trywait.3

View File

@ -443,6 +443,7 @@ FBSD_1.7 {
sched_getaffinity;
sched_setaffinity;
sched_getcpu;
versionsort;
__cpuset_alloc;
__cpuset_free;
};

View File

@ -35,7 +35,8 @@
.Nm scandir ,
.Nm scandirat ,
.Nm scandir_b ,
.Nm alphasort
.Nm alphasort ,
.Nm versionsort
.Nd scan a directory
.Sh LIBRARY
.Lb libc
@ -65,6 +66,8 @@
.Fc
.Ft int
.Fn alphasort "const struct dirent **d1" "const struct dirent **d2"
.Ft int
.Fn versionsort "const struct dirent **d1" "const struct dirent **d2"
.Sh DESCRIPTION
The
.Fn scandir
@ -106,6 +109,13 @@ is a routine which can be used for the
argument to sort the array alphabetically using
.Xr strcoll 3 .
.Pp
The
.Fn versionsort
function is a routine which can be used for the
.Fa compar
argument to sort the array naturally using
.Xr strverscmp 3 .
.Pp
The memory allocated for the array can be deallocated with
.Xr free 3 ,
by freeing each pointer in the array and then the array itself.
@ -161,7 +171,12 @@ cannot allocate enough memory to hold all the data structures.
.Xr malloc 3 ,
.Xr qsort 3 ,
.Xr strcoll 3 ,
.Xr strverscmp 3 ,
.Xr dir 5
.Sh STANDARDS
The
.Fn versionsort
function is a GNU extension and conforms to no standard.
.Sh HISTORY
The
.Fn scandir
@ -171,5 +186,8 @@ functions appeared in
.Bx 4.2 .
The
.Fn scandirat
function was added in
and
.Fn
versionsort
functions were added in
.Fx 14.0 .

View File

@ -191,6 +191,13 @@ alphasort(const struct dirent **d1, const struct dirent **d2)
return (strcoll((*d1)->d_name, (*d2)->d_name));
}
int
versionsort(const struct dirent **d1, const struct dirent **d2)
{
return (strverscmp((*d1)->d_name, (*d2)->d_name));
}
static int
alphasort_thunk(void *thunk, const void *p1, const void *p2)
{

View File

@ -16,7 +16,7 @@ MISRCS+=bcmp.c bcopy.c bzero.c explicit_bzero.c \
strcspn.c strdup.c strerror.c strlcat.c strlcpy.c strlen.c strmode.c \
strncat.c strncmp.c strncpy.c strndup.c strnlen.c strnstr.c \
strpbrk.c strrchr.c strsep.c strsignal.c strspn.c strstr.c strtok.c \
strxfrm.c swab.c \
strverscmp.c strxfrm.c swab.c \
timingsafe_bcmp.c \
timingsafe_memcmp.c \
wcpcpy.c wcpncpy.c wcscasecmp.c wcscat.c \
@ -46,7 +46,7 @@ MAN+= bcmp.3 bcopy.3 bstring.3 bzero.3 ffs.3 index.3 memccpy.3 memchr.3 \
memcmp.3 memcpy.3 memmem.3 memmove.3 memset.3 strcasecmp.3 strcat.3 \
strchr.3 strcmp.3 strcoll.3 strcpy.3 strdup.3 strerror.3 \
string.3 strlcpy.3 strlen.3 strmode.3 strpbrk.3 strsep.3 \
strspn.3 strstr.3 strtok.3 strxfrm.3 swab.3 \
strspn.3 strstr.3 strtok.3 strverscmp.3 strxfrm.3 swab.3 \
timingsafe_bcmp.3 \
wcscoll.3 wcstok.3 \
wcswidth.3 wcsxfrm.3 wmemchr.3

View File

@ -116,6 +116,7 @@ FBSD_1.6 {
FBSD_1.7 {
mempcpy;
strverscmp;
wmempcpy;
};

View File

@ -0,0 +1,56 @@
.\" SPDX-License-Identifier: BSD-2-Clause
.\" Copyright (c) 2022 Aymeric Wibo <obiwac@gmail.com>
.Dd July 11, 2022
.Dt STRVERSCMP 3
.Os
.Sh NAME
.Nm strverscmp
.Nd compare strings according to natural order
.Sh LIBRARY
.Lb libc
.Sh SYNOPSIS
.In string.h
.Ft int
.Fn strverscmp "const char *s1" "const char *s2"
.Sh DESCRIPTION
The
.Fn strverscmp
function
compares the null-terminated strings
.Fa s1
and
.Fa s2
according to their natural order
and returns an integer greater than, equal to, or less than 0,
depending on whether
.Fa s1
is greater than, equal to, or less than
.Fa s2 .
.Pp
More specifically, this natural order is found by iterating over both
strings until a difference is found.
If the difference is between non-decimal characters,
.Fn strverscmp
acts like
.Xr strcmp 3
(thus, the ordering would be "a", "b", "train").
If a decimal digit is found, the whole number is read and compared
(thus, the ordering would be "9", "10", "420" which is different to lexicographic order,
what
.Xr strcmp 3
would have done).
Numbers with leading zeroes are interpreted as fractional parts (even without a decimal point),
and numbers with more leading zeroes are placed before numbers with fewer leading zeroes
(thus, the ordering would be "000", "00", "01", "010", "09", "0", "1", "9", "10").
.Sh SEE ALSO
.Xr strcmp 3 ,
.Xr versionsort 3
.Sh STANDARDS
The
.Fn strverscmp
function is a GNU extension and conforms to no standard.
.Sh HISTORY
The
.Fn strverscmp
function was added in
.Fx 14.0 .

View File

@ -0,0 +1,91 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
* Copyright (c) 2022 Aymeric Wibo <obiwac@gmail.com>
*/
#include <ctype.h>
#include <stddef.h>
int
strverscmp(const char *s1, const char *s2)
{
size_t digit_count_1, digit_count_2;
size_t zeros_count_1, zeros_count_2;
const unsigned char *num_1, *num_2;
const unsigned char *u1 = __DECONST(const unsigned char *, s1);
const unsigned char *u2 = __DECONST(const unsigned char *, s2);
/*
* If pointers are the same, no need to go through to process of
* comparing them.
*/
if (s1 == s2)
return (0);
while (*u1 != '\0' && *u2 != '\0') {
/* If either character is not a digit, act like strcmp(3). */
if (!isdigit(*u1) || !isdigit(*u2)) {
if (*u1 != *u2)
return (*u1 - *u2);
u1++;
u2++;
continue;
}
if (*u1 == '0' || *u2 == '0') {
/*
* Treat leading zeros as if they were the fractional
* part of a number, i.e. as if they had a decimal point
* in front. First, count the leading zeros (more zeros
* == smaller number).
*/
zeros_count_1 = 0;
zeros_count_2 = 0;
for (; *u1 == '0'; u1++)
zeros_count_1++;
for (; *u2 == '0'; u2++)
zeros_count_2++;
if (zeros_count_1 != zeros_count_2)
return (zeros_count_2 - zeros_count_1);
/* Handle the case where 0 < 09. */
if (!isdigit(*u1) && isdigit(*u2))
return (1);
if (!isdigit(*u2) && isdigit(*u1))
return (-1);
} else {
/*
* No leading zeros; we're simply comparing two numbers.
* It is necessary to first count how many digits there
* are before going back to compare each digit, so that
* e.g. 7 is not considered larger than 60.
*/
num_1 = u1;
num_2 = u2;
/* Count digits (more digits == larger number). */
for (; isdigit(*u1); u1++)
;
for (; isdigit(*u2); u2++)
;
digit_count_1 = u1 - num_1;
digit_count_2 = u2 - num_2;
if (digit_count_1 != digit_count_2)
return (digit_count_1 - digit_count_2);
/*
* If there are the same number of digits, go back to
* the start of the number.
*/
u1 = num_1;
u2 = num_2;
}
/* Compare each digit until there are none left. */
for (; isdigit(*u1) && isdigit(*u2); u1++, u2++) {
if (*u1 != *u2)
return (*u1 - *u2);
}
}
return (*u1 - *u2);
}

View File

@ -4,10 +4,11 @@ ATF_TESTS_C+= memcmp_test
ATF_TESTS_C+= memset_s_test
ATF_TESTS_C+= stpncpy_test
ATF_TESTS_C+= strerror2_test
ATF_TESTS_C+= wcscasecmp_test
ATF_TESTS_C+= wcsnlen_test
ATF_TESTS_C+= strverscmp_test
ATF_TESTS_C+= strxfrm_test
ATF_TESTS_C+= wcscasecmp_test
ATF_TESTS_C+= wcscoll_test
ATF_TESTS_C+= wcsnlen_test
# TODO: popcount, stresep

View File

@ -0,0 +1,93 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
* Copyright (c) 2022 Aymeric Wibo <obiwac@gmail.com>
*/
#include <atf-c.h>
#include <string.h>
static void
check_all(size_t len, const char *ordered[len])
{
const char *a, *b;
for (size_t i = 0; i < len; i++) {
for (size_t j = 0; j < len; j++) {
a = ordered[i];
b = ordered[j];
if (i == j)
ATF_CHECK_MSG(
strverscmp(a, b) == 0,
"strverscmp(\"%s\", \"%s\") == 0",
a, b
);
else if (i < j)
ATF_CHECK_MSG(
strverscmp(a, b) < 0,
"strverscmp(\"%s\", \"%s\") < 0",
a, b
);
else if (i > j)
ATF_CHECK_MSG(
strverscmp(a, b) > 0,
"strverscmp(\"%s\", \"%s\") > 0",
a, b
);
}
}
}
#define CHECK_ALL(...) do { \
const char *ordered[] = { __VA_ARGS__ }; \
check_all(sizeof(ordered) / sizeof(*ordered), ordered); \
} while (0)
ATF_TC_WITHOUT_HEAD(strcmp_functionality);
ATF_TC_BODY(strcmp_functionality, tc)
{
CHECK_ALL("", "a", "b");
}
/* from Linux man page strverscmp(3) */
ATF_TC_WITHOUT_HEAD(vers_ordering);
ATF_TC_BODY(vers_ordering, tc)
{
CHECK_ALL("000", "00", "01", "010", "09", "0", "1", "9", "10");
}
ATF_TC_WITHOUT_HEAD(natural_ordering);
ATF_TC_BODY(natural_ordering, tc)
{
CHECK_ALL("jan1", "jan2", "jan9", "jan10", "jan11", "jan19", "jan20");
}
/* https://sourceware.org/bugzilla/show_bug.cgi?id=9913 */
ATF_TC_WITHOUT_HEAD(glibc_bug_9913);
ATF_TC_BODY(glibc_bug_9913, tc)
{
CHECK_ALL(
"B0075022800016.gbp.corp.com",
"B007502280067.gbp.corp.com",
"B007502357019.GBP.CORP.COM"
);
}
ATF_TC_WITHOUT_HEAD(semver_ordering);
ATF_TC_BODY(semver_ordering, tc)
{
CHECK_ALL("2.6.20", "2.6.21");
}
ATF_TP_ADD_TCS(tp)
{
ATF_TP_ADD_TC(tp, strcmp_functionality);
ATF_TP_ADD_TC(tp, vers_ordering);
ATF_TP_ADD_TC(tp, natural_ordering);
ATF_TP_ADD_TC(tp, glibc_bug_9913);
ATF_TP_ADD_TC(tp, semver_ordering);
return (atf_no_error());
}