From 99ca5b8804cbf2070494a76a700801c9842399ad Mon Sep 17 00:00:00 2001 From: Peter Edwards Date: Sat, 8 May 2004 15:09:02 +0000 Subject: [PATCH] The FTS_NOSTAT option is an optimisation that reduces the number of stat(2) calls by keeping an eye of the number of links a directory has. It assumes that each subdirectory will have a hard link to its parent, to represent the ".." node, and stops calling stat(2) when all links are accounted for in a given directory. This assumption is really only valid for UNIX-like filesystems: A concrete example is NTFS. The NTFS "i-node" does contain a link count, but most/all directories have a link count between 0 and 2 inclusive. The end result is that find on an NTFS volume won't actually traverse the entire hierarchy of the directories passed to it. (Those with a link count of two are not traversed at all) The fix checks the "UFSness" of the filesystem before enabling the optimisation. Reviewed By: Tim Kientzle (kientzle@) --- include/fts.h | 3 ++ lib/libc/gen/fts-compat.c | 78 +++++++++++++++++++++++++++++++++++++-- lib/libc/gen/fts-compat.h | 3 ++ lib/libc/gen/fts.c | 78 +++++++++++++++++++++++++++++++++++++-- 4 files changed, 156 insertions(+), 6 deletions(-) diff --git a/include/fts.h b/include/fts.h index 09c4600a1072..dbf82c99167f 100644 --- a/include/fts.h +++ b/include/fts.h @@ -37,6 +37,8 @@ #ifndef _FTS_H_ #define _FTS_H_ +struct _fts_private; /* implementation data */ + typedef struct { struct _ftsent *fts_cur; /* current node */ struct _ftsent *fts_child; /* linked list of children */ @@ -63,6 +65,7 @@ typedef struct { #define FTS_STOP 0x200 /* (private) unrecoverable error */ int fts_options; /* fts_open options, global flags */ void *fts_clientptr; /* thunk for sort function */ + struct _fts_private *fts_priv; /* Implementation data */ } FTS; typedef struct _ftsent { diff --git a/lib/libc/gen/fts-compat.c b/lib/libc/gen/fts-compat.c index e8ebaf321a59..2081f442c9fe 100644 --- a/lib/libc/gen/fts-compat.c +++ b/lib/libc/gen/fts-compat.c @@ -43,6 +43,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -63,6 +64,7 @@ static int fts_palloc(FTS *, size_t); static FTSENT *fts_sort(FTS *, FTSENT *, int); static u_short fts_stat(FTS *, FTSENT *, int); static int fts_safe_changedir(FTS *, FTSENT *, int, char *); +static int fts_ufslinks(FTS *sp, const FTSENT *ent); #define ISDOT(a) (a[0] == '.' && (!a[1] || (a[1] == '.' && !a[2]))) @@ -77,12 +79,43 @@ static int fts_safe_changedir(FTS *, FTSENT *, int, char *); #define BNAMES 2 /* fts_children, names only */ #define BREAD 3 /* fts_read */ +/* + * Internal representation of FTS, including extra implementation details. + * The FTS returned from fts_open is ftsp_fts from this structure, and it's + * fts_priv in turn points back to this internal version. i.e. for a given + * fts_private *priv: &priv->fts_fts == (FTS *)f == priv->fts_fts.fts_priv + */ +struct _fts_private { + FTS ftsp_fts; + struct statfs ftsp_statfs; + dev_t ftsp_dev; + int ftsp_linksreliable; +}; + +/* + * The "FTS_NOSTAT" option can avoid a lot of calls to stat(2) if it knows + * that a directory could not possibly have subdirectories. This is decided + * by looking at the link count: A subdirectory would increment its parent's + * link count by virtue of its own ".." entry. + * This assumption only holds for UFS-like filesystems that implement links + * and directories this way, so we must punt for others. + */ + +static const char *ufslike_filesystems[] = { + "ufs", + "nfs", + "nfs4", + "ext2fs", + 0 +}; + FTS * fts_open(argv, options, compar) char * const *argv; int options; int (*compar)(const FTSENT * const *, const FTSENT * const *); { + struct _fts_private *priv; FTS *sp; FTSENT *p, *root; int nitems; @@ -96,11 +129,13 @@ fts_open(argv, options, compar) } /* Allocate/initialize the stream */ - if ((sp = malloc(sizeof(FTS))) == NULL) + if ((priv = malloc(sizeof(struct _fts_private))) == NULL) return (NULL); - memset(sp, 0, sizeof(FTS)); + memset(priv, 0, sizeof(struct _fts_private)); + sp = &priv->ftsp_fts; sp->fts_compar = compar; sp->fts_options = options; + sp->fts_priv = priv; /* Shush, GCC. */ tmp = NULL; @@ -637,7 +672,10 @@ fts_build(sp, type) /* Be quiet about nostat, GCC. */ nostat = 0; } else if (ISSET(FTS_NOSTAT) && ISSET(FTS_PHYSICAL)) { - nlinks = cur->fts_nlink - (ISSET(FTS_SEEDOT) ? 0 : 2); + if (fts_ufslinks(sp, cur)) + nlinks = cur->fts_nlink - (ISSET(FTS_SEEDOT) ? 0 : 2); + else + nlinks = -1; nostat = 1; } else { nlinks = -1; @@ -1154,3 +1192,37 @@ fts_safe_changedir(sp, p, fd, path) errno = oerrno; return (ret); } + +/* + * Check if the filesystem for "ent" has UFS-style links. + */ +static int +fts_ufslinks(FTS *sp, const FTSENT *ent) +{ + struct _fts_private *priv; + const char **cpp; + + priv = sp->fts_priv; + /* + * If this node's device is different from the previous, grab + * the filesystem information, and decide on the reliability + * of the link information from this filesystem for stat(2) + * avoidance. + */ + if (priv->ftsp_dev != ent->fts_dev) { + if (statfs(ent->fts_path, &priv->ftsp_statfs) != -1) { + priv->ftsp_dev = ent->fts_dev; + priv->ftsp_linksreliable = 0; + for (cpp = ufslike_filesystems; *cpp; cpp++) { + if (strcmp(priv->ftsp_statfs.f_fstypename, + *cpp) == 0) { + priv->ftsp_linksreliable = 1; + break; + } + } + } else { + priv->ftsp_linksreliable = 0; + } + } + return priv->ftsp_linksreliable; +} diff --git a/lib/libc/gen/fts-compat.h b/lib/libc/gen/fts-compat.h index 09c4600a1072..dbf82c99167f 100644 --- a/lib/libc/gen/fts-compat.h +++ b/lib/libc/gen/fts-compat.h @@ -37,6 +37,8 @@ #ifndef _FTS_H_ #define _FTS_H_ +struct _fts_private; /* implementation data */ + typedef struct { struct _ftsent *fts_cur; /* current node */ struct _ftsent *fts_child; /* linked list of children */ @@ -63,6 +65,7 @@ typedef struct { #define FTS_STOP 0x200 /* (private) unrecoverable error */ int fts_options; /* fts_open options, global flags */ void *fts_clientptr; /* thunk for sort function */ + struct _fts_private *fts_priv; /* Implementation data */ } FTS; typedef struct _ftsent { diff --git a/lib/libc/gen/fts.c b/lib/libc/gen/fts.c index e8ebaf321a59..2081f442c9fe 100644 --- a/lib/libc/gen/fts.c +++ b/lib/libc/gen/fts.c @@ -43,6 +43,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -63,6 +64,7 @@ static int fts_palloc(FTS *, size_t); static FTSENT *fts_sort(FTS *, FTSENT *, int); static u_short fts_stat(FTS *, FTSENT *, int); static int fts_safe_changedir(FTS *, FTSENT *, int, char *); +static int fts_ufslinks(FTS *sp, const FTSENT *ent); #define ISDOT(a) (a[0] == '.' && (!a[1] || (a[1] == '.' && !a[2]))) @@ -77,12 +79,43 @@ static int fts_safe_changedir(FTS *, FTSENT *, int, char *); #define BNAMES 2 /* fts_children, names only */ #define BREAD 3 /* fts_read */ +/* + * Internal representation of FTS, including extra implementation details. + * The FTS returned from fts_open is ftsp_fts from this structure, and it's + * fts_priv in turn points back to this internal version. i.e. for a given + * fts_private *priv: &priv->fts_fts == (FTS *)f == priv->fts_fts.fts_priv + */ +struct _fts_private { + FTS ftsp_fts; + struct statfs ftsp_statfs; + dev_t ftsp_dev; + int ftsp_linksreliable; +}; + +/* + * The "FTS_NOSTAT" option can avoid a lot of calls to stat(2) if it knows + * that a directory could not possibly have subdirectories. This is decided + * by looking at the link count: A subdirectory would increment its parent's + * link count by virtue of its own ".." entry. + * This assumption only holds for UFS-like filesystems that implement links + * and directories this way, so we must punt for others. + */ + +static const char *ufslike_filesystems[] = { + "ufs", + "nfs", + "nfs4", + "ext2fs", + 0 +}; + FTS * fts_open(argv, options, compar) char * const *argv; int options; int (*compar)(const FTSENT * const *, const FTSENT * const *); { + struct _fts_private *priv; FTS *sp; FTSENT *p, *root; int nitems; @@ -96,11 +129,13 @@ fts_open(argv, options, compar) } /* Allocate/initialize the stream */ - if ((sp = malloc(sizeof(FTS))) == NULL) + if ((priv = malloc(sizeof(struct _fts_private))) == NULL) return (NULL); - memset(sp, 0, sizeof(FTS)); + memset(priv, 0, sizeof(struct _fts_private)); + sp = &priv->ftsp_fts; sp->fts_compar = compar; sp->fts_options = options; + sp->fts_priv = priv; /* Shush, GCC. */ tmp = NULL; @@ -637,7 +672,10 @@ fts_build(sp, type) /* Be quiet about nostat, GCC. */ nostat = 0; } else if (ISSET(FTS_NOSTAT) && ISSET(FTS_PHYSICAL)) { - nlinks = cur->fts_nlink - (ISSET(FTS_SEEDOT) ? 0 : 2); + if (fts_ufslinks(sp, cur)) + nlinks = cur->fts_nlink - (ISSET(FTS_SEEDOT) ? 0 : 2); + else + nlinks = -1; nostat = 1; } else { nlinks = -1; @@ -1154,3 +1192,37 @@ fts_safe_changedir(sp, p, fd, path) errno = oerrno; return (ret); } + +/* + * Check if the filesystem for "ent" has UFS-style links. + */ +static int +fts_ufslinks(FTS *sp, const FTSENT *ent) +{ + struct _fts_private *priv; + const char **cpp; + + priv = sp->fts_priv; + /* + * If this node's device is different from the previous, grab + * the filesystem information, and decide on the reliability + * of the link information from this filesystem for stat(2) + * avoidance. + */ + if (priv->ftsp_dev != ent->fts_dev) { + if (statfs(ent->fts_path, &priv->ftsp_statfs) != -1) { + priv->ftsp_dev = ent->fts_dev; + priv->ftsp_linksreliable = 0; + for (cpp = ufslike_filesystems; *cpp; cpp++) { + if (strcmp(priv->ftsp_statfs.f_fstypename, + *cpp) == 0) { + priv->ftsp_linksreliable = 1; + break; + } + } + } else { + priv->ftsp_linksreliable = 0; + } + } + return priv->ftsp_linksreliable; +}