Optimize telldir(3)

Currently each call to telldir() requires a malloc and adds an entry to a
linked list which must be traversed on future telldir(), seekdir(),
closedir(), and readdir() calls. Applications that call telldir() for every
directory entry incur O(n^2) behavior in readdir() and O(n) in telldir() and
closedir().

This optimization eliminates the malloc() and linked list in most cases by
packing the relevant information into a single long. On 64-bit architectures
msdosfs, NFS, tmpfs, UFS, and ZFS can all use the packed representation.  On
32-bit architectures msdosfs, NFS, and UFS can use the packed
representation, but ZFS and tmpfs can only use it for about the first 128
files per directory.  Memory savings is about 50 bytes per telldir(3) call.
Speedup for telldir()-heavy directory traversals is about 20-30x for one
million files per directory.

Reviewed by:	kib, mav, mckusick
MFC after:	3 weeks
Sponsored by:	Spectra Logic Corp
Differential Revision:	https://reviews.freebsd.org/D13385
This commit is contained in:
asomers 2017-12-06 22:06:48 +00:00
parent b8b787a20a
commit 6f7abc3dcf
4 changed files with 292 additions and 30 deletions

View File

@ -54,11 +54,25 @@ __FBSDID("$FreeBSD$");
long
telldir(DIR *dirp)
{
struct ddloc *lp, *flp;
long idx;
struct ddloc_mem *lp, *flp;
union ddloc_packed ddloc;
if (__isthreaded)
_pthread_mutex_lock(&dirp->dd_lock);
/*
* Outline:
* 1) If the directory position fits in a packed structure, return that.
* 2) Otherwise, see if it's already been recorded in the linked list
* 3) Otherwise, malloc a new one
*/
if (dirp->dd_seek < (1ul << DD_SEEK_BITS) &&
dirp->dd_loc < (1ul << DD_LOC_BITS)) {
ddloc.s.is_packed = 1;
ddloc.s.loc = dirp->dd_loc;
ddloc.s.seek = dirp->dd_seek;
goto out;
}
flp = NULL;
LIST_FOREACH(lp, &dirp->dd_td->td_locq, loc_lqe) {
if (lp->loc_seek == dirp->dd_seek) {
@ -72,7 +86,7 @@ telldir(DIR *dirp)
}
}
if (lp == NULL) {
lp = malloc(sizeof(struct ddloc));
lp = malloc(sizeof(struct ddloc_mem));
if (lp == NULL) {
if (__isthreaded)
_pthread_mutex_unlock(&dirp->dd_lock);
@ -86,10 +100,17 @@ telldir(DIR *dirp)
else
LIST_INSERT_HEAD(&dirp->dd_td->td_locq, lp, loc_lqe);
}
idx = lp->loc_index;
ddloc.i.is_packed = 0;
/*
* Technically this assignment could overflow on 32-bit architectures,
* but we would get ENOMEM long before that happens.
*/
ddloc.i.index = lp->loc_index;
out:
if (__isthreaded)
_pthread_mutex_unlock(&dirp->dd_lock);
return (idx);
return (ddloc.l);
}
/*
@ -99,34 +120,47 @@ telldir(DIR *dirp)
void
_seekdir(DIR *dirp, long loc)
{
struct ddloc *lp;
struct ddloc_mem *lp;
struct dirent *dp;
union ddloc_packed ddloc;
off_t loc_seek;
long loc_loc;
LIST_FOREACH(lp, &dirp->dd_td->td_locq, loc_lqe) {
if (lp->loc_index == loc)
break;
ddloc.l = loc;
if (ddloc.s.is_packed) {
loc_seek = ddloc.s.seek;
loc_loc = ddloc.s.loc;
} else {
LIST_FOREACH(lp, &dirp->dd_td->td_locq, loc_lqe) {
if (lp->loc_index == ddloc.i.index)
break;
}
if (lp == NULL)
return;
loc_seek = lp->loc_seek;
loc_loc = lp->loc_loc;
}
if (lp == NULL)
return;
if (lp->loc_loc == dirp->dd_loc && lp->loc_seek == dirp->dd_seek)
if (loc_loc == dirp->dd_loc && loc_seek == dirp->dd_seek)
return;
/* If it's within the same chunk of data, don't bother reloading. */
if (lp->loc_seek == dirp->dd_seek) {
if (loc_seek == dirp->dd_seek) {
/*
* If we go back to 0 don't make the next readdir
* trigger a call to getdirentries().
*/
if (lp->loc_loc == 0)
if (loc_loc == 0)
dirp->dd_flags |= __DTF_SKIPREAD;
dirp->dd_loc = lp->loc_loc;
dirp->dd_loc = loc_loc;
return;
}
(void) lseek(dirp->dd_fd, (off_t)lp->loc_seek, SEEK_SET);
dirp->dd_seek = lp->loc_seek;
(void) lseek(dirp->dd_fd, (off_t)loc_seek, SEEK_SET);
dirp->dd_seek = loc_seek;
dirp->dd_loc = 0;
dirp->dd_flags &= ~__DTF_SKIPREAD; /* current contents are invalid */
while (dirp->dd_loc < lp->loc_loc) {
while (dirp->dd_loc < loc_loc) {
dp = _readdir_unlocked(dirp, 0);
if (dp == NULL)
break;
@ -145,7 +179,7 @@ _seekdir(DIR *dirp, long loc)
void
_fixtelldir(DIR *dirp, long oldseek, long oldloc)
{
struct ddloc *lp;
struct ddloc_mem *lp;
lp = LIST_FIRST(&dirp->dd_td->td_locq);
if (lp != NULL) {
@ -163,8 +197,8 @@ _fixtelldir(DIR *dirp, long oldseek, long oldloc)
void
_reclaim_telldir(DIR *dirp)
{
struct ddloc *lp;
struct ddloc *templp;
struct ddloc_mem *lp;
struct ddloc_mem *templp;
lp = LIST_FIRST(&dirp->dd_td->td_locq);
while (lp != NULL) {

View File

@ -41,24 +41,62 @@
#include <stdbool.h>
/*
* One of these structures is malloced to describe the current directory
* position each time telldir is called. It records the current magic
* cookie returned by getdirentries and the offset within the buffer
* associated with that return value.
* telldir will malloc one of these to describe the current directory position,
* if it can't fit that information into the packed structure below. It
* records the current magic cookie returned by getdirentries and the offset
* within the buffer associated with that return value.
*/
struct ddloc {
LIST_ENTRY(ddloc) loc_lqe; /* entry in list */
struct ddloc_mem {
LIST_ENTRY(ddloc_mem) loc_lqe; /* entry in list */
long loc_index; /* key associated with structure */
off_t loc_seek; /* magic cookie returned by getdirentries */
long loc_loc; /* offset of entry in buffer */
};
#ifdef __LP64__
#define DD_LOC_BITS 31
#define DD_SEEK_BITS 32
#define DD_INDEX_BITS 63
#else
#define DD_LOC_BITS 12
#define DD_SEEK_BITS 19
#define DD_INDEX_BITS 31
#endif
/*
* This is the real type returned by telldir. telldir will prefer to return
* the packed type, if possible, or the malloced type otherwise. For msdosfs,
* UFS, and NFS, directory positions usually fit within the packed type. For
* ZFS and tmpfs, they usually fit within the packed type on 64-bit
* architectures.
*/
union ddloc_packed {
long l; /* Opaque type returned by telldir(3) */
struct {
/* Identifies union type. Must be 0. */
unsigned long is_packed:1;
/* Index into directory's linked list of ddloc_mem */
unsigned long index:DD_INDEX_BITS;
} __packed i;
struct {
/* Identifies union type. Must be 1. */
unsigned long is_packed:1;
/* offset of entry in buffer*/
unsigned long loc:DD_LOC_BITS;
/* magic cookie returned by getdirentries */
unsigned long seek:DD_SEEK_BITS;
} __packed s;
};
_Static_assert(sizeof(long) == sizeof(union ddloc_packed),
"packed telldir size mismatch!");
/*
* One of these structures is malloced for each DIR to record telldir
* positions.
*/
struct _telldir {
LIST_HEAD(, ddloc) td_locq; /* list of locations */
LIST_HEAD(, ddloc_mem) td_locq; /* list of locations */
long td_loccnt; /* index of entry for sequential readdir's */
};

View File

@ -3,6 +3,8 @@
.include <bsd.own.mk>
ATF_TESTS_C+= arc4random_test
ATF_TESTS_C+= dir2_test
ATF_TESTS_C+= dlopen_empty_test
ATF_TESTS_C+= fmtcheck2_test
ATF_TESTS_C+= fmtmsg_test
ATF_TESTS_C+= fnmatch2_test
@ -12,9 +14,8 @@ ATF_TESTS_C+= getmntinfo_test
ATF_TESTS_C+= glob2_test
ATF_TESTS_C+= popen_test
ATF_TESTS_C+= posix_spawn_test
ATF_TESTS_C+= wordexp_test
ATF_TESTS_C+= dlopen_empty_test
ATF_TESTS_C+= realpath2_test
ATF_TESTS_C+= wordexp_test
# TODO: t_closefrom, t_cpuset, t_fmtcheck, t_randomid,
# TODO: t_siginfo (fixes require further inspection)

View File

@ -0,0 +1,189 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2017 Spectra Logic Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* Test cases for operations on DIR objects:
* opendir, readdir, seekdir, telldir, closedir, etc
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <dirent.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <atf-c.h>
ATF_TC(telldir_after_seekdir);
ATF_TC_HEAD(telldir_after_seekdir, tc)
{
atf_tc_set_md_var(tc, "descr", "Calling telldir(3) after seekdir(3) "
"should return the argument passed to seekdir.");
}
ATF_TC_BODY(telldir_after_seekdir, tc)
{
const int NUMFILES = 1000;
char template[] = "dXXXXXX";
char *tmpdir;
int i, dirfd;
DIR *dirp;
struct dirent *de;
long beginning, middle, end, td;
/* Create a temporary directory */
tmpdir = mkdtemp(template);
ATF_REQUIRE_MSG(tmpdir != NULL, "mkdtemp failed");
dirfd = open(tmpdir, O_RDONLY | O_DIRECTORY);
ATF_REQUIRE(dirfd > 0);
/*
* Fill it with files. Must be > 128 to ensure that the directory
* can't fit within a single page
*/
for (i = 0; i < NUMFILES; i = i+1) {
int fd;
char filename[16];
snprintf(filename, sizeof(filename), "%d", i);
fd = openat(dirfd, filename, O_WRONLY | O_CREAT);
ATF_REQUIRE(fd > 0);
close(fd);
}
/* Get some directory bookmarks in various locations */
dirp = fdopendir(dirfd);
ATF_REQUIRE_MSG(dirfd >= 0, "fdopendir failed");
beginning = telldir(dirp);
for (i = 0; i < NUMFILES / 2; i = i+1) {
de = readdir(dirp);
ATF_REQUIRE_MSG(de != NULL, "readdir failed");
}
middle = telldir(dirp);
for (; i < NUMFILES - 1; i = i+1) {
de = readdir(dirp);
ATF_REQUIRE_MSG(de != NULL, "readdir failed");
}
end = telldir(dirp);
/*
* Seekdir to each bookmark, check the telldir after seekdir condition,
* and check that the bookmark is valid by reading another directory
* entry.
*/
seekdir(dirp, beginning);
td = telldir(dirp);
ATF_CHECK_EQ(beginning, td);
ATF_REQUIRE_MSG(NULL != readdir(dirp), "invalid directory index");
seekdir(dirp, middle);
td = telldir(dirp);
ATF_CHECK_EQ(middle, td);
ATF_REQUIRE_MSG(NULL != readdir(dirp), "invalid directory index");
seekdir(dirp, end);
td = telldir(dirp);
ATF_CHECK_EQ(end, td);
ATF_REQUIRE_MSG(NULL != readdir(dirp), "invalid directory index");
closedir(dirp);
}
ATF_TC(telldir_at_end_of_block);
ATF_TC_HEAD(telldir_at_end_of_block, tc)
{
atf_tc_set_md_var(tc, "descr", "Calling telldir(3) after readdir(3) read the last entry in the block should return a valid location");
}
ATF_TC_BODY(telldir_at_end_of_block, tc)
{
/* For UFS and ZFS, blocks roll over at 128 directory entries. */
const int NUMFILES = 129;
char template[] = "dXXXXXX";
char *tmpdir;
int i, dirfd;
DIR *dirp;
struct dirent *de;
long td;
char last_filename[16];
/* Create a temporary directory */
tmpdir = mkdtemp(template);
ATF_REQUIRE_MSG(tmpdir != NULL, "mkdtemp failed");
dirfd = open(tmpdir, O_RDONLY | O_DIRECTORY);
ATF_REQUIRE(dirfd > 0);
/*
* Fill it with files. Must be > 128 to ensure that the directory
* can't fit within a single page. The "-2" accounts for "." and ".."
*/
for (i = 0; i < NUMFILES - 2; i = i+1) {
int fd;
char filename[16];
snprintf(filename, sizeof(filename), "%d", i);
fd = openat(dirfd, filename, O_WRONLY | O_CREAT);
ATF_REQUIRE(fd > 0);
close(fd);
}
/* Read all entries within the first page */
dirp = fdopendir(dirfd);
ATF_REQUIRE_MSG(dirfd >= 0, "fdopendir failed");
for (i = 0; i < NUMFILES - 1; i = i + 1)
ATF_REQUIRE_MSG(readdir(dirp) != NULL, "readdir failed");
/* Call telldir at the end of a page */
td = telldir(dirp);
/* Read the last entry */
de = readdir(dirp);
ATF_REQUIRE_MSG(de != NULL, "readdir failed");
strlcpy(last_filename, de->d_name, sizeof(last_filename));
/* Seek back to the bookmark. readdir() should return the last entry */
seekdir(dirp, td);
de = readdir(dirp);
ATF_REQUIRE_STREQ_MSG(last_filename, de->d_name,
"seekdir went to the wrong directory position");
closedir(dirp);
}
ATF_TP_ADD_TCS(tp)
{
ATF_TP_ADD_TC(tp, telldir_after_seekdir);
ATF_TP_ADD_TC(tp, telldir_at_end_of_block);
return atf_no_error();
}