From 6f7abc3dcfe10bfc02e6fd549066c42891eb01d3 Mon Sep 17 00:00:00 2001 From: asomers Date: Wed, 6 Dec 2017 22:06:48 +0000 Subject: [PATCH] Optimize telldir(3) Currently each call to telldir() requires a malloc and adds an entry to a linked list which must be traversed on future telldir(), seekdir(), closedir(), and readdir() calls. Applications that call telldir() for every directory entry incur O(n^2) behavior in readdir() and O(n) in telldir() and closedir(). This optimization eliminates the malloc() and linked list in most cases by packing the relevant information into a single long. On 64-bit architectures msdosfs, NFS, tmpfs, UFS, and ZFS can all use the packed representation. On 32-bit architectures msdosfs, NFS, and UFS can use the packed representation, but ZFS and tmpfs can only use it for about the first 128 files per directory. Memory savings is about 50 bytes per telldir(3) call. Speedup for telldir()-heavy directory traversals is about 20-30x for one million files per directory. Reviewed by: kib, mav, mckusick MFC after: 3 weeks Sponsored by: Spectra Logic Corp Differential Revision: https://reviews.freebsd.org/D13385 --- lib/libc/gen/telldir.c | 76 +++++++++---- lib/libc/gen/telldir.h | 52 +++++++-- lib/libc/tests/gen/Makefile | 5 +- lib/libc/tests/gen/dir2_test.c | 189 +++++++++++++++++++++++++++++++++ 4 files changed, 292 insertions(+), 30 deletions(-) create mode 100644 lib/libc/tests/gen/dir2_test.c diff --git a/lib/libc/gen/telldir.c b/lib/libc/gen/telldir.c index ddda01ae468b..29299388f436 100644 --- a/lib/libc/gen/telldir.c +++ b/lib/libc/gen/telldir.c @@ -54,11 +54,25 @@ __FBSDID("$FreeBSD$"); long telldir(DIR *dirp) { - struct ddloc *lp, *flp; - long idx; + struct ddloc_mem *lp, *flp; + union ddloc_packed ddloc; if (__isthreaded) _pthread_mutex_lock(&dirp->dd_lock); + /* + * Outline: + * 1) If the directory position fits in a packed structure, return that. + * 2) Otherwise, see if it's already been recorded in the linked list + * 3) Otherwise, malloc a new one + */ + if (dirp->dd_seek < (1ul << DD_SEEK_BITS) && + dirp->dd_loc < (1ul << DD_LOC_BITS)) { + ddloc.s.is_packed = 1; + ddloc.s.loc = dirp->dd_loc; + ddloc.s.seek = dirp->dd_seek; + goto out; + } + flp = NULL; LIST_FOREACH(lp, &dirp->dd_td->td_locq, loc_lqe) { if (lp->loc_seek == dirp->dd_seek) { @@ -72,7 +86,7 @@ telldir(DIR *dirp) } } if (lp == NULL) { - lp = malloc(sizeof(struct ddloc)); + lp = malloc(sizeof(struct ddloc_mem)); if (lp == NULL) { if (__isthreaded) _pthread_mutex_unlock(&dirp->dd_lock); @@ -86,10 +100,17 @@ telldir(DIR *dirp) else LIST_INSERT_HEAD(&dirp->dd_td->td_locq, lp, loc_lqe); } - idx = lp->loc_index; + ddloc.i.is_packed = 0; + /* + * Technically this assignment could overflow on 32-bit architectures, + * but we would get ENOMEM long before that happens. + */ + ddloc.i.index = lp->loc_index; + +out: if (__isthreaded) _pthread_mutex_unlock(&dirp->dd_lock); - return (idx); + return (ddloc.l); } /* @@ -99,34 +120,47 @@ telldir(DIR *dirp) void _seekdir(DIR *dirp, long loc) { - struct ddloc *lp; + struct ddloc_mem *lp; struct dirent *dp; + union ddloc_packed ddloc; + off_t loc_seek; + long loc_loc; - LIST_FOREACH(lp, &dirp->dd_td->td_locq, loc_lqe) { - if (lp->loc_index == loc) - break; + ddloc.l = loc; + + if (ddloc.s.is_packed) { + loc_seek = ddloc.s.seek; + loc_loc = ddloc.s.loc; + } else { + LIST_FOREACH(lp, &dirp->dd_td->td_locq, loc_lqe) { + if (lp->loc_index == ddloc.i.index) + break; + } + if (lp == NULL) + return; + + loc_seek = lp->loc_seek; + loc_loc = lp->loc_loc; } - if (lp == NULL) - return; - if (lp->loc_loc == dirp->dd_loc && lp->loc_seek == dirp->dd_seek) + if (loc_loc == dirp->dd_loc && loc_seek == dirp->dd_seek) return; /* If it's within the same chunk of data, don't bother reloading. */ - if (lp->loc_seek == dirp->dd_seek) { + if (loc_seek == dirp->dd_seek) { /* * If we go back to 0 don't make the next readdir * trigger a call to getdirentries(). */ - if (lp->loc_loc == 0) + if (loc_loc == 0) dirp->dd_flags |= __DTF_SKIPREAD; - dirp->dd_loc = lp->loc_loc; + dirp->dd_loc = loc_loc; return; } - (void) lseek(dirp->dd_fd, (off_t)lp->loc_seek, SEEK_SET); - dirp->dd_seek = lp->loc_seek; + (void) lseek(dirp->dd_fd, (off_t)loc_seek, SEEK_SET); + dirp->dd_seek = loc_seek; dirp->dd_loc = 0; dirp->dd_flags &= ~__DTF_SKIPREAD; /* current contents are invalid */ - while (dirp->dd_loc < lp->loc_loc) { + while (dirp->dd_loc < loc_loc) { dp = _readdir_unlocked(dirp, 0); if (dp == NULL) break; @@ -145,7 +179,7 @@ _seekdir(DIR *dirp, long loc) void _fixtelldir(DIR *dirp, long oldseek, long oldloc) { - struct ddloc *lp; + struct ddloc_mem *lp; lp = LIST_FIRST(&dirp->dd_td->td_locq); if (lp != NULL) { @@ -163,8 +197,8 @@ _fixtelldir(DIR *dirp, long oldseek, long oldloc) void _reclaim_telldir(DIR *dirp) { - struct ddloc *lp; - struct ddloc *templp; + struct ddloc_mem *lp; + struct ddloc_mem *templp; lp = LIST_FIRST(&dirp->dd_td->td_locq); while (lp != NULL) { diff --git a/lib/libc/gen/telldir.h b/lib/libc/gen/telldir.h index 9a25ddf5b21c..6880ec562a45 100644 --- a/lib/libc/gen/telldir.h +++ b/lib/libc/gen/telldir.h @@ -41,24 +41,62 @@ #include /* - * One of these structures is malloced to describe the current directory - * position each time telldir is called. It records the current magic - * cookie returned by getdirentries and the offset within the buffer - * associated with that return value. + * telldir will malloc one of these to describe the current directory position, + * if it can't fit that information into the packed structure below. It + * records the current magic cookie returned by getdirentries and the offset + * within the buffer associated with that return value. */ -struct ddloc { - LIST_ENTRY(ddloc) loc_lqe; /* entry in list */ +struct ddloc_mem { + LIST_ENTRY(ddloc_mem) loc_lqe; /* entry in list */ long loc_index; /* key associated with structure */ off_t loc_seek; /* magic cookie returned by getdirentries */ long loc_loc; /* offset of entry in buffer */ }; +#ifdef __LP64__ +#define DD_LOC_BITS 31 +#define DD_SEEK_BITS 32 +#define DD_INDEX_BITS 63 +#else +#define DD_LOC_BITS 12 +#define DD_SEEK_BITS 19 +#define DD_INDEX_BITS 31 +#endif + +/* + * This is the real type returned by telldir. telldir will prefer to return + * the packed type, if possible, or the malloced type otherwise. For msdosfs, + * UFS, and NFS, directory positions usually fit within the packed type. For + * ZFS and tmpfs, they usually fit within the packed type on 64-bit + * architectures. + */ +union ddloc_packed { + long l; /* Opaque type returned by telldir(3) */ + struct { + /* Identifies union type. Must be 0. */ + unsigned long is_packed:1; + /* Index into directory's linked list of ddloc_mem */ + unsigned long index:DD_INDEX_BITS; + } __packed i; + struct { + /* Identifies union type. Must be 1. */ + unsigned long is_packed:1; + /* offset of entry in buffer*/ + unsigned long loc:DD_LOC_BITS; + /* magic cookie returned by getdirentries */ + unsigned long seek:DD_SEEK_BITS; + } __packed s; +}; + +_Static_assert(sizeof(long) == sizeof(union ddloc_packed), + "packed telldir size mismatch!"); + /* * One of these structures is malloced for each DIR to record telldir * positions. */ struct _telldir { - LIST_HEAD(, ddloc) td_locq; /* list of locations */ + LIST_HEAD(, ddloc_mem) td_locq; /* list of locations */ long td_loccnt; /* index of entry for sequential readdir's */ }; diff --git a/lib/libc/tests/gen/Makefile b/lib/libc/tests/gen/Makefile index bd00cd2ddd6f..19af3f9fd140 100644 --- a/lib/libc/tests/gen/Makefile +++ b/lib/libc/tests/gen/Makefile @@ -3,6 +3,8 @@ .include ATF_TESTS_C+= arc4random_test +ATF_TESTS_C+= dir2_test +ATF_TESTS_C+= dlopen_empty_test ATF_TESTS_C+= fmtcheck2_test ATF_TESTS_C+= fmtmsg_test ATF_TESTS_C+= fnmatch2_test @@ -12,9 +14,8 @@ ATF_TESTS_C+= getmntinfo_test ATF_TESTS_C+= glob2_test ATF_TESTS_C+= popen_test ATF_TESTS_C+= posix_spawn_test -ATF_TESTS_C+= wordexp_test -ATF_TESTS_C+= dlopen_empty_test ATF_TESTS_C+= realpath2_test +ATF_TESTS_C+= wordexp_test # TODO: t_closefrom, t_cpuset, t_fmtcheck, t_randomid, # TODO: t_siginfo (fixes require further inspection) diff --git a/lib/libc/tests/gen/dir2_test.c b/lib/libc/tests/gen/dir2_test.c new file mode 100644 index 000000000000..83fdd9910922 --- /dev/null +++ b/lib/libc/tests/gen/dir2_test.c @@ -0,0 +1,189 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2017 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Test cases for operations on DIR objects: + * opendir, readdir, seekdir, telldir, closedir, etc + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include + +#include + +ATF_TC(telldir_after_seekdir); +ATF_TC_HEAD(telldir_after_seekdir, tc) +{ + + atf_tc_set_md_var(tc, "descr", "Calling telldir(3) after seekdir(3) " + "should return the argument passed to seekdir."); +} +ATF_TC_BODY(telldir_after_seekdir, tc) +{ + const int NUMFILES = 1000; + char template[] = "dXXXXXX"; + char *tmpdir; + int i, dirfd; + DIR *dirp; + struct dirent *de; + long beginning, middle, end, td; + + /* Create a temporary directory */ + tmpdir = mkdtemp(template); + ATF_REQUIRE_MSG(tmpdir != NULL, "mkdtemp failed"); + dirfd = open(tmpdir, O_RDONLY | O_DIRECTORY); + ATF_REQUIRE(dirfd > 0); + + /* + * Fill it with files. Must be > 128 to ensure that the directory + * can't fit within a single page + */ + for (i = 0; i < NUMFILES; i = i+1) { + int fd; + char filename[16]; + + snprintf(filename, sizeof(filename), "%d", i); + fd = openat(dirfd, filename, O_WRONLY | O_CREAT); + ATF_REQUIRE(fd > 0); + close(fd); + } + + /* Get some directory bookmarks in various locations */ + dirp = fdopendir(dirfd); + ATF_REQUIRE_MSG(dirfd >= 0, "fdopendir failed"); + beginning = telldir(dirp); + for (i = 0; i < NUMFILES / 2; i = i+1) { + de = readdir(dirp); + ATF_REQUIRE_MSG(de != NULL, "readdir failed"); + } + middle = telldir(dirp); + for (; i < NUMFILES - 1; i = i+1) { + de = readdir(dirp); + ATF_REQUIRE_MSG(de != NULL, "readdir failed"); + } + end = telldir(dirp); + + /* + * Seekdir to each bookmark, check the telldir after seekdir condition, + * and check that the bookmark is valid by reading another directory + * entry. + */ + + seekdir(dirp, beginning); + td = telldir(dirp); + ATF_CHECK_EQ(beginning, td); + ATF_REQUIRE_MSG(NULL != readdir(dirp), "invalid directory index"); + + seekdir(dirp, middle); + td = telldir(dirp); + ATF_CHECK_EQ(middle, td); + ATF_REQUIRE_MSG(NULL != readdir(dirp), "invalid directory index"); + + seekdir(dirp, end); + td = telldir(dirp); + ATF_CHECK_EQ(end, td); + ATF_REQUIRE_MSG(NULL != readdir(dirp), "invalid directory index"); + + closedir(dirp); +} + +ATF_TC(telldir_at_end_of_block); +ATF_TC_HEAD(telldir_at_end_of_block, tc) +{ + + atf_tc_set_md_var(tc, "descr", "Calling telldir(3) after readdir(3) read the last entry in the block should return a valid location"); +} +ATF_TC_BODY(telldir_at_end_of_block, tc) +{ + /* For UFS and ZFS, blocks roll over at 128 directory entries. */ + const int NUMFILES = 129; + char template[] = "dXXXXXX"; + char *tmpdir; + int i, dirfd; + DIR *dirp; + struct dirent *de; + long td; + char last_filename[16]; + + /* Create a temporary directory */ + tmpdir = mkdtemp(template); + ATF_REQUIRE_MSG(tmpdir != NULL, "mkdtemp failed"); + dirfd = open(tmpdir, O_RDONLY | O_DIRECTORY); + ATF_REQUIRE(dirfd > 0); + + /* + * Fill it with files. Must be > 128 to ensure that the directory + * can't fit within a single page. The "-2" accounts for "." and ".." + */ + for (i = 0; i < NUMFILES - 2; i = i+1) { + int fd; + char filename[16]; + + snprintf(filename, sizeof(filename), "%d", i); + fd = openat(dirfd, filename, O_WRONLY | O_CREAT); + ATF_REQUIRE(fd > 0); + close(fd); + } + + /* Read all entries within the first page */ + dirp = fdopendir(dirfd); + ATF_REQUIRE_MSG(dirfd >= 0, "fdopendir failed"); + for (i = 0; i < NUMFILES - 1; i = i + 1) + ATF_REQUIRE_MSG(readdir(dirp) != NULL, "readdir failed"); + + /* Call telldir at the end of a page */ + td = telldir(dirp); + + /* Read the last entry */ + de = readdir(dirp); + ATF_REQUIRE_MSG(de != NULL, "readdir failed"); + strlcpy(last_filename, de->d_name, sizeof(last_filename)); + + /* Seek back to the bookmark. readdir() should return the last entry */ + seekdir(dirp, td); + de = readdir(dirp); + ATF_REQUIRE_STREQ_MSG(last_filename, de->d_name, + "seekdir went to the wrong directory position"); + + closedir(dirp); +} + + +ATF_TP_ADD_TCS(tp) +{ + + ATF_TP_ADD_TC(tp, telldir_after_seekdir); + ATF_TP_ADD_TC(tp, telldir_at_end_of_block); + + return atf_no_error(); +}