Speed up hardlink detection by using a self-sizing hash
table rather than the old linear list search. On my "hardlink detection torture test", this reduced user time from 4700 seconds down to 4.2 seconds and wallclock time from 1:24:48 down to 1:08. (Yes, that's over one THOUSAND times reduction in user time. ;-) In the worst case, the new code doubles peak memory usage, though it could actually reduce memory usage in many cases. MFC after: 1 week PR: misc/42167, bin/51151
This commit is contained in:
parent
e501d96926
commit
231a973125
171
usr.bin/du/du.c
171
usr.bin/du/du.c
@ -95,7 +95,7 @@ struct ignentry {
|
|||||||
SLIST_ENTRY(ignentry) next;
|
SLIST_ENTRY(ignentry) next;
|
||||||
};
|
};
|
||||||
|
|
||||||
int linkchk(FTSENT *);
|
static int linkchk(FTSENT *);
|
||||||
static void usage(void);
|
static void usage(void);
|
||||||
void prthumanval(double);
|
void prthumanval(double);
|
||||||
unit_t unit_adjust(double *);
|
unit_t unit_adjust(double *);
|
||||||
@ -117,12 +117,12 @@ main(int argc, char *argv[])
|
|||||||
static char dot[] = ".";
|
static char dot[] = ".";
|
||||||
|
|
||||||
Hflag = Lflag = Pflag = aflag = sflag = dflag = cflag = hflag = 0;
|
Hflag = Lflag = Pflag = aflag = sflag = dflag = cflag = hflag = 0;
|
||||||
|
|
||||||
save = argv;
|
save = argv;
|
||||||
ftsoptions = 0;
|
ftsoptions = 0;
|
||||||
depth = INT_MAX;
|
depth = INT_MAX;
|
||||||
SLIST_INIT(&ignores);
|
SLIST_INIT(&ignores);
|
||||||
|
|
||||||
while ((ch = getopt(argc, argv, "HI:LPasd:chkrx")) != -1)
|
while ((ch = getopt(argc, argv, "HI:LPasd:chkrx")) != -1)
|
||||||
switch (ch) {
|
switch (ch) {
|
||||||
case 'H':
|
case 'H':
|
||||||
@ -231,7 +231,7 @@ main(int argc, char *argv[])
|
|||||||
blocksize /= 512;
|
blocksize /= 512;
|
||||||
|
|
||||||
rval = 0;
|
rval = 0;
|
||||||
|
|
||||||
if ((fts = fts_open(argv, ftsoptions, NULL)) == NULL)
|
if ((fts = fts_open(argv, ftsoptions, NULL)) == NULL)
|
||||||
err(1, "fts_open");
|
err(1, "fts_open");
|
||||||
|
|
||||||
@ -247,7 +247,7 @@ main(int argc, char *argv[])
|
|||||||
|
|
||||||
p->fts_parent->fts_number +=
|
p->fts_parent->fts_number +=
|
||||||
p->fts_number += p->fts_statp->st_blocks;
|
p->fts_number += p->fts_statp->st_blocks;
|
||||||
|
|
||||||
if (p->fts_level <= depth) {
|
if (p->fts_level <= depth) {
|
||||||
if (hflag) {
|
if (hflag) {
|
||||||
(void) prthumanval(howmany(p->fts_number, blocksize));
|
(void) prthumanval(howmany(p->fts_number, blocksize));
|
||||||
@ -273,7 +273,7 @@ main(int argc, char *argv[])
|
|||||||
|
|
||||||
if (p->fts_statp->st_nlink > 1 && linkchk(p))
|
if (p->fts_statp->st_nlink > 1 && linkchk(p))
|
||||||
break;
|
break;
|
||||||
|
|
||||||
if (listall || p->fts_level == 0) {
|
if (listall || p->fts_level == 0) {
|
||||||
if (hflag) {
|
if (hflag) {
|
||||||
(void) prthumanval(howmany(p->fts_statp->st_blocks,
|
(void) prthumanval(howmany(p->fts_statp->st_blocks,
|
||||||
@ -307,35 +307,144 @@ main(int argc, char *argv[])
|
|||||||
exit(rval);
|
exit(rval);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
typedef struct _ID {
|
|
||||||
dev_t dev;
|
|
||||||
ino_t inode;
|
|
||||||
} ID;
|
|
||||||
|
|
||||||
|
|
||||||
int
|
|
||||||
linkchk(FTSENT *p)
|
linkchk(FTSENT *p)
|
||||||
{
|
{
|
||||||
static ID *files;
|
static const size_t links_hash_initial_size = 8192;
|
||||||
static int maxfiles, nfiles;
|
struct links_entry {
|
||||||
ID *fp, *start;
|
struct links_entry *next;
|
||||||
ino_t ino;
|
struct links_entry *previous;
|
||||||
dev_t dev;
|
int links;
|
||||||
|
dev_t dev;
|
||||||
|
ino_t ino;
|
||||||
|
};
|
||||||
|
static unsigned long number_entries;
|
||||||
|
static size_t number_buckets;
|
||||||
|
static struct links_entry **buckets;
|
||||||
|
static struct links_entry *free_list;
|
||||||
|
static char stop_allocating;
|
||||||
|
|
||||||
ino = p->fts_statp->st_ino;
|
struct links_entry *le, **new_buckets;
|
||||||
dev = p->fts_statp->st_dev;
|
struct stat *st;
|
||||||
if ((start = files) != NULL)
|
int hash;
|
||||||
for (fp = start + nfiles - 1; fp >= start; --fp)
|
size_t i, new_size;
|
||||||
if (ino == fp->inode && dev == fp->dev)
|
|
||||||
return (1);
|
|
||||||
|
|
||||||
if (nfiles == maxfiles && (files = realloc((char *)files,
|
st = p->fts_statp;
|
||||||
(u_int)(sizeof(ID) * (maxfiles += 128)))) == NULL)
|
|
||||||
errx(1, "can't allocate memory");
|
/* If necessary, initialize the hash table. */
|
||||||
files[nfiles].inode = ino;
|
if (buckets == NULL) {
|
||||||
files[nfiles].dev = dev;
|
number_buckets = links_hash_initial_size;
|
||||||
++nfiles;
|
buckets = malloc(number_buckets * sizeof(buckets[0]));
|
||||||
|
if (buckets == NULL)
|
||||||
|
err(1, "No memory for hardlink detection.");
|
||||||
|
for (i = 0; i < number_buckets; i++)
|
||||||
|
buckets[i] = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If the hash table is getting too full, enlarge it. */
|
||||||
|
if (number_entries > number_buckets * 10 && !stop_allocating) {
|
||||||
|
int count;
|
||||||
|
|
||||||
|
new_size = number_buckets * 2;
|
||||||
|
new_buckets = malloc(new_size * sizeof(struct links_entry *));
|
||||||
|
count = 0;
|
||||||
|
|
||||||
|
/* Try releasing the free list to see if that helps. */
|
||||||
|
if (new_buckets == NULL && free_list != NULL) {
|
||||||
|
while (free_list != NULL) {
|
||||||
|
le = free_list;
|
||||||
|
free_list = le->next;
|
||||||
|
free(le);
|
||||||
|
}
|
||||||
|
new_buckets = malloc(new_size * sizeof(new_buckets[0]));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (new_buckets == NULL) {
|
||||||
|
stop_allocating = 1;
|
||||||
|
warnc(ENOMEM, "No more memory for recording "
|
||||||
|
"hard links; Remaining hard links will be "
|
||||||
|
"counted as separate files.");
|
||||||
|
} else {
|
||||||
|
memset(new_buckets, 0,
|
||||||
|
new_size * sizeof(struct links_entry *));
|
||||||
|
for (i = 0; i < number_buckets; i++) {
|
||||||
|
while (buckets[i] != NULL) {
|
||||||
|
/* Remove entry from old bucket. */
|
||||||
|
le = buckets[i];
|
||||||
|
buckets[i] = le->next;
|
||||||
|
|
||||||
|
/* Add entry to new bucket. */
|
||||||
|
hash = (le->dev ^ le->ino) % new_size;
|
||||||
|
|
||||||
|
if (new_buckets[hash] != NULL)
|
||||||
|
new_buckets[hash]->previous =
|
||||||
|
le;
|
||||||
|
le->next = new_buckets[hash];
|
||||||
|
le->previous = NULL;
|
||||||
|
new_buckets[hash] = le;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
free(buckets);
|
||||||
|
buckets = new_buckets;
|
||||||
|
number_buckets = new_size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Try to locate this entry in the hash table. */
|
||||||
|
hash = ( st->st_dev ^ st->st_ino ) % number_buckets;
|
||||||
|
for (le = buckets[hash]; le != NULL; le = le->next) {
|
||||||
|
if (le->dev == st->st_dev && le->ino == st->st_ino) {
|
||||||
|
/*
|
||||||
|
* Save memory by releasing an entry when we've seen
|
||||||
|
* all of it's links.
|
||||||
|
*/
|
||||||
|
if (--le->links <= 0) {
|
||||||
|
if (le->previous != NULL)
|
||||||
|
le->previous->next = le->next;
|
||||||
|
if (le->next != NULL)
|
||||||
|
le->next->previous = le->previous;
|
||||||
|
if (buckets[hash] == le)
|
||||||
|
buckets[hash] = le->next;
|
||||||
|
number_entries--;
|
||||||
|
/* Recycle this node through the free list */
|
||||||
|
if (stop_allocating) {
|
||||||
|
free(le);
|
||||||
|
} else {
|
||||||
|
le->next = free_list;
|
||||||
|
free_list = le;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return (1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (stop_allocating)
|
||||||
|
return (0);
|
||||||
|
|
||||||
|
/* Add this entry to the links cache. */
|
||||||
|
if (free_list != NULL) {
|
||||||
|
/* Pull a node from the free list if we can. */
|
||||||
|
le = free_list;
|
||||||
|
free_list = le->next;
|
||||||
|
} else
|
||||||
|
/* Malloc one if we have to. */
|
||||||
|
le = malloc(sizeof(struct links_entry));
|
||||||
|
if (le == NULL) {
|
||||||
|
stop_allocating = 1;
|
||||||
|
warnc(ENOMEM, "No more memory for recording "
|
||||||
|
"hard links; Remaining hard links will be counted "
|
||||||
|
"as separate files.");
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
le->dev = st->st_dev;
|
||||||
|
le->ino = st->st_ino;
|
||||||
|
le->links = st->st_nlink - 1;
|
||||||
|
number_entries++;
|
||||||
|
le->next = buckets[hash];
|
||||||
|
le->previous = NULL;
|
||||||
|
if (buckets[hash] != NULL)
|
||||||
|
buckets[hash]->previous = le;
|
||||||
|
buckets[hash] = le;
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user