From 05c9a0158f6837bb3a3781e4ed75f66115f6415a Mon Sep 17 00:00:00 2001 From: Aymeric Wibo Date: Thu, 25 Aug 2022 02:20:13 +0300 Subject: [PATCH] libc: Add strverscmp(3) and versionsort(3) Add a strverscmp(3) function to libc, a GNU extension I implemented by reading its glibc manual page. It orders strings following a much more natural ordering (e.g. "ent1 < ent2 < ent10" as opposed to "ent1 < ent10 < ent2" with strcmp(3)'s lexicographic ordering). Also add versionsort(3) for use as scandir(3)'s compar argument. Update manual page for scandir(3) and add one for strverscmp(3). Reviewed by: pstef, gbe, kib MFC after: 1 week Differential Revision: https://reviews.freebsd.org/D35807 --- include/dirent.h | 1 + include/string.h | 1 + lib/libc/gen/Makefile.inc | 3 +- lib/libc/gen/Symbol.map | 1 + lib/libc/gen/scandir.3 | 22 +++++- lib/libc/gen/scandir.c | 7 ++ lib/libc/string/Makefile.inc | 4 +- lib/libc/string/Symbol.map | 1 + lib/libc/string/strverscmp.3 | 56 +++++++++++++++ lib/libc/string/strverscmp.c | 91 ++++++++++++++++++++++++ lib/libc/tests/string/Makefile | 5 +- lib/libc/tests/string/strverscmp_test.c | 93 +++++++++++++++++++++++++ 12 files changed, 278 insertions(+), 7 deletions(-) create mode 100644 lib/libc/string/strverscmp.3 create mode 100644 lib/libc/string/strverscmp.c create mode 100644 lib/libc/tests/string/strverscmp_test.c diff --git a/include/dirent.h b/include/dirent.h index 047206258471..751a016838c0 100644 --- a/include/dirent.h +++ b/include/dirent.h @@ -108,6 +108,7 @@ int alphasort(const struct dirent **, const struct dirent **); int dirfd(DIR *); #endif #if __BSD_VISIBLE +int versionsort(const struct dirent **, const struct dirent **); DIR *__opendir2(const char *, int); int fdclosedir(DIR *); ssize_t getdents(int, char *, size_t); diff --git a/include/string.h b/include/string.h index 15d4dc7e9701..dc5756830208 100644 --- a/include/string.h +++ b/include/string.h @@ -81,6 +81,7 @@ char *strcat(char * __restrict, const char * __restrict); char *strchr(const char *, int) __pure; #if __BSD_VISIBLE char *strchrnul(const char*, int) __pure; +int strverscmp(const char *, const char *) __pure; #endif int strcmp(const char *, const char *) __pure; int strcoll(const char *, const char *); diff --git a/lib/libc/gen/Makefile.inc b/lib/libc/gen/Makefile.inc index 6d0e542a1f7b..45977b6b9005 100644 --- a/lib/libc/gen/Makefile.inc +++ b/lib/libc/gen/Makefile.inc @@ -495,7 +495,8 @@ MLINKS+=rand48.3 _rand48.3 \ MLINKS+=recv.2 recvmmsg.2 MLINKS+=scandir.3 alphasort.3 \ scandir.3 scandirat.3 \ - scandir.3 scandir_b.3 + scandir.3 scandir_b.3 \ + scandir.3 versionsort.3 MLINKS+=sem_open.3 sem_close.3 \ sem_open.3 sem_unlink.3 MLINKS+=sem_wait.3 sem_trywait.3 diff --git a/lib/libc/gen/Symbol.map b/lib/libc/gen/Symbol.map index 74676ffe2270..0a20a41d0e20 100644 --- a/lib/libc/gen/Symbol.map +++ b/lib/libc/gen/Symbol.map @@ -443,6 +443,7 @@ FBSD_1.7 { sched_getaffinity; sched_setaffinity; sched_getcpu; + versionsort; __cpuset_alloc; __cpuset_free; }; diff --git a/lib/libc/gen/scandir.3 b/lib/libc/gen/scandir.3 index b533b33ce7a7..07d8074ae592 100644 --- a/lib/libc/gen/scandir.3 +++ b/lib/libc/gen/scandir.3 @@ -35,7 +35,8 @@ .Nm scandir , .Nm scandirat , .Nm scandir_b , -.Nm alphasort +.Nm alphasort , +.Nm versionsort .Nd scan a directory .Sh LIBRARY .Lb libc @@ -65,6 +66,8 @@ .Fc .Ft int .Fn alphasort "const struct dirent **d1" "const struct dirent **d2" +.Ft int +.Fn versionsort "const struct dirent **d1" "const struct dirent **d2" .Sh DESCRIPTION The .Fn scandir @@ -106,6 +109,13 @@ is a routine which can be used for the argument to sort the array alphabetically using .Xr strcoll 3 . .Pp +The +.Fn versionsort +function is a routine which can be used for the +.Fa compar +argument to sort the array naturally using +.Xr strverscmp 3 . +.Pp The memory allocated for the array can be deallocated with .Xr free 3 , by freeing each pointer in the array and then the array itself. @@ -161,7 +171,12 @@ cannot allocate enough memory to hold all the data structures. .Xr malloc 3 , .Xr qsort 3 , .Xr strcoll 3 , +.Xr strverscmp 3 , .Xr dir 5 +.Sh STANDARDS +The +.Fn versionsort +function is a GNU extension and conforms to no standard. .Sh HISTORY The .Fn scandir @@ -171,5 +186,8 @@ functions appeared in .Bx 4.2 . The .Fn scandirat -function was added in +and +.Fn +versionsort +functions were added in .Fx 14.0 . diff --git a/lib/libc/gen/scandir.c b/lib/libc/gen/scandir.c index 3a891b0ad3f2..8a260adcd2f3 100644 --- a/lib/libc/gen/scandir.c +++ b/lib/libc/gen/scandir.c @@ -191,6 +191,13 @@ alphasort(const struct dirent **d1, const struct dirent **d2) return (strcoll((*d1)->d_name, (*d2)->d_name)); } +int +versionsort(const struct dirent **d1, const struct dirent **d2) +{ + + return (strverscmp((*d1)->d_name, (*d2)->d_name)); +} + static int alphasort_thunk(void *thunk, const void *p1, const void *p2) { diff --git a/lib/libc/string/Makefile.inc b/lib/libc/string/Makefile.inc index 1df3d40e329f..afc113eeb867 100644 --- a/lib/libc/string/Makefile.inc +++ b/lib/libc/string/Makefile.inc @@ -16,7 +16,7 @@ MISRCS+=bcmp.c bcopy.c bzero.c explicit_bzero.c \ strcspn.c strdup.c strerror.c strlcat.c strlcpy.c strlen.c strmode.c \ strncat.c strncmp.c strncpy.c strndup.c strnlen.c strnstr.c \ strpbrk.c strrchr.c strsep.c strsignal.c strspn.c strstr.c strtok.c \ - strxfrm.c swab.c \ + strverscmp.c strxfrm.c swab.c \ timingsafe_bcmp.c \ timingsafe_memcmp.c \ wcpcpy.c wcpncpy.c wcscasecmp.c wcscat.c \ @@ -46,7 +46,7 @@ MAN+= bcmp.3 bcopy.3 bstring.3 bzero.3 ffs.3 index.3 memccpy.3 memchr.3 \ memcmp.3 memcpy.3 memmem.3 memmove.3 memset.3 strcasecmp.3 strcat.3 \ strchr.3 strcmp.3 strcoll.3 strcpy.3 strdup.3 strerror.3 \ string.3 strlcpy.3 strlen.3 strmode.3 strpbrk.3 strsep.3 \ - strspn.3 strstr.3 strtok.3 strxfrm.3 swab.3 \ + strspn.3 strstr.3 strtok.3 strverscmp.3 strxfrm.3 swab.3 \ timingsafe_bcmp.3 \ wcscoll.3 wcstok.3 \ wcswidth.3 wcsxfrm.3 wmemchr.3 diff --git a/lib/libc/string/Symbol.map b/lib/libc/string/Symbol.map index ec45d7fd7ddb..4fcd194bafd8 100644 --- a/lib/libc/string/Symbol.map +++ b/lib/libc/string/Symbol.map @@ -116,6 +116,7 @@ FBSD_1.6 { FBSD_1.7 { mempcpy; + strverscmp; wmempcpy; }; diff --git a/lib/libc/string/strverscmp.3 b/lib/libc/string/strverscmp.3 new file mode 100644 index 000000000000..e4413fb96e36 --- /dev/null +++ b/lib/libc/string/strverscmp.3 @@ -0,0 +1,56 @@ +.\" SPDX-License-Identifier: BSD-2-Clause +.\" Copyright (c) 2022 Aymeric Wibo +.Dd July 11, 2022 +.Dt STRVERSCMP 3 +.Os +.Sh NAME +.Nm strverscmp +.Nd compare strings according to natural order +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In string.h +.Ft int +.Fn strverscmp "const char *s1" "const char *s2" +.Sh DESCRIPTION +The +.Fn strverscmp +function +compares the null-terminated strings +.Fa s1 +and +.Fa s2 +according to their natural order +and returns an integer greater than, equal to, or less than 0, +depending on whether +.Fa s1 +is greater than, equal to, or less than +.Fa s2 . +.Pp +More specifically, this natural order is found by iterating over both +strings until a difference is found. +If the difference is between non-decimal characters, +.Fn strverscmp +acts like +.Xr strcmp 3 +(thus, the ordering would be "a", "b", "train"). +If a decimal digit is found, the whole number is read and compared +(thus, the ordering would be "9", "10", "420" which is different to lexicographic order, +what +.Xr strcmp 3 +would have done). +Numbers with leading zeroes are interpreted as fractional parts (even without a decimal point), +and numbers with more leading zeroes are placed before numbers with fewer leading zeroes +(thus, the ordering would be "000", "00", "01", "010", "09", "0", "1", "9", "10"). +.Sh SEE ALSO +.Xr strcmp 3 , +.Xr versionsort 3 +.Sh STANDARDS +The +.Fn strverscmp +function is a GNU extension and conforms to no standard. +.Sh HISTORY +The +.Fn strverscmp +function was added in +.Fx 14.0 . diff --git a/lib/libc/string/strverscmp.c b/lib/libc/string/strverscmp.c new file mode 100644 index 000000000000..6051adb35499 --- /dev/null +++ b/lib/libc/string/strverscmp.c @@ -0,0 +1,91 @@ +/*- +* SPDX-License-Identifier: BSD-2-Clause +* Copyright (c) 2022 Aymeric Wibo +*/ + +#include +#include + +int +strverscmp(const char *s1, const char *s2) +{ + size_t digit_count_1, digit_count_2; + size_t zeros_count_1, zeros_count_2; + const unsigned char *num_1, *num_2; + const unsigned char *u1 = __DECONST(const unsigned char *, s1); + const unsigned char *u2 = __DECONST(const unsigned char *, s2); + + /* + * If pointers are the same, no need to go through to process of + * comparing them. + */ + if (s1 == s2) + return (0); + + while (*u1 != '\0' && *u2 != '\0') { + /* If either character is not a digit, act like strcmp(3). */ + + if (!isdigit(*u1) || !isdigit(*u2)) { + if (*u1 != *u2) + return (*u1 - *u2); + u1++; + u2++; + continue; + } + if (*u1 == '0' || *u2 == '0') { + /* + * Treat leading zeros as if they were the fractional + * part of a number, i.e. as if they had a decimal point + * in front. First, count the leading zeros (more zeros + * == smaller number). + */ + zeros_count_1 = 0; + zeros_count_2 = 0; + for (; *u1 == '0'; u1++) + zeros_count_1++; + for (; *u2 == '0'; u2++) + zeros_count_2++; + if (zeros_count_1 != zeros_count_2) + return (zeros_count_2 - zeros_count_1); + + /* Handle the case where 0 < 09. */ + if (!isdigit(*u1) && isdigit(*u2)) + return (1); + if (!isdigit(*u2) && isdigit(*u1)) + return (-1); + } else { + /* + * No leading zeros; we're simply comparing two numbers. + * It is necessary to first count how many digits there + * are before going back to compare each digit, so that + * e.g. 7 is not considered larger than 60. + */ + num_1 = u1; + num_2 = u2; + + /* Count digits (more digits == larger number). */ + for (; isdigit(*u1); u1++) + ; + for (; isdigit(*u2); u2++) + ; + digit_count_1 = u1 - num_1; + digit_count_2 = u2 - num_2; + if (digit_count_1 != digit_count_2) + return (digit_count_1 - digit_count_2); + + /* + * If there are the same number of digits, go back to + * the start of the number. + */ + u1 = num_1; + u2 = num_2; + } + + /* Compare each digit until there are none left. */ + for (; isdigit(*u1) && isdigit(*u2); u1++, u2++) { + if (*u1 != *u2) + return (*u1 - *u2); + } + } + return (*u1 - *u2); +} diff --git a/lib/libc/tests/string/Makefile b/lib/libc/tests/string/Makefile index c6a98572564d..eacf7e15c27c 100644 --- a/lib/libc/tests/string/Makefile +++ b/lib/libc/tests/string/Makefile @@ -4,10 +4,11 @@ ATF_TESTS_C+= memcmp_test ATF_TESTS_C+= memset_s_test ATF_TESTS_C+= stpncpy_test ATF_TESTS_C+= strerror2_test -ATF_TESTS_C+= wcscasecmp_test -ATF_TESTS_C+= wcsnlen_test +ATF_TESTS_C+= strverscmp_test ATF_TESTS_C+= strxfrm_test +ATF_TESTS_C+= wcscasecmp_test ATF_TESTS_C+= wcscoll_test +ATF_TESTS_C+= wcsnlen_test # TODO: popcount, stresep diff --git a/lib/libc/tests/string/strverscmp_test.c b/lib/libc/tests/string/strverscmp_test.c new file mode 100644 index 000000000000..fd6a2620cb48 --- /dev/null +++ b/lib/libc/tests/string/strverscmp_test.c @@ -0,0 +1,93 @@ +/*- +* SPDX-License-Identifier: BSD-2-Clause +* Copyright (c) 2022 Aymeric Wibo +*/ + +#include +#include + +static void +check_all(size_t len, const char *ordered[len]) +{ + const char *a, *b; + + for (size_t i = 0; i < len; i++) { + for (size_t j = 0; j < len; j++) { + a = ordered[i]; + b = ordered[j]; + + if (i == j) + ATF_CHECK_MSG( + strverscmp(a, b) == 0, + "strverscmp(\"%s\", \"%s\") == 0", + a, b + ); + else if (i < j) + ATF_CHECK_MSG( + strverscmp(a, b) < 0, + "strverscmp(\"%s\", \"%s\") < 0", + a, b + ); + else if (i > j) + ATF_CHECK_MSG( + strverscmp(a, b) > 0, + "strverscmp(\"%s\", \"%s\") > 0", + a, b + ); + } + } +} + +#define CHECK_ALL(...) do { \ + const char *ordered[] = { __VA_ARGS__ }; \ + check_all(sizeof(ordered) / sizeof(*ordered), ordered); \ +} while (0) + +ATF_TC_WITHOUT_HEAD(strcmp_functionality); +ATF_TC_BODY(strcmp_functionality, tc) +{ + CHECK_ALL("", "a", "b"); +} + +/* from Linux man page strverscmp(3) */ + +ATF_TC_WITHOUT_HEAD(vers_ordering); +ATF_TC_BODY(vers_ordering, tc) +{ + CHECK_ALL("000", "00", "01", "010", "09", "0", "1", "9", "10"); +} + +ATF_TC_WITHOUT_HEAD(natural_ordering); +ATF_TC_BODY(natural_ordering, tc) +{ + CHECK_ALL("jan1", "jan2", "jan9", "jan10", "jan11", "jan19", "jan20"); +} + +/* https://sourceware.org/bugzilla/show_bug.cgi?id=9913 */ + +ATF_TC_WITHOUT_HEAD(glibc_bug_9913); +ATF_TC_BODY(glibc_bug_9913, tc) +{ + CHECK_ALL( + "B0075022800016.gbp.corp.com", + "B007502280067.gbp.corp.com", + "B007502357019.GBP.CORP.COM" + ); +} + +ATF_TC_WITHOUT_HEAD(semver_ordering); +ATF_TC_BODY(semver_ordering, tc) +{ + CHECK_ALL("2.6.20", "2.6.21"); +} + +ATF_TP_ADD_TCS(tp) +{ + ATF_TP_ADD_TC(tp, strcmp_functionality); + ATF_TP_ADD_TC(tp, vers_ordering); + ATF_TP_ADD_TC(tp, natural_ordering); + ATF_TP_ADD_TC(tp, glibc_bug_9913); + ATF_TP_ADD_TC(tp, semver_ordering); + + return (atf_no_error()); +}