8fa03d08ca
Pointed out by: bf1783 at gmail Approved by: np (cxgb), kientzle (tar, etc.), philip (mentor)
603 lines
14 KiB
C
603 lines
14 KiB
C
/*-
|
|
* Copyright (c) 2003-2007 Tim Kientzle
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
|
|
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
|
* IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
|
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
|
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
/*-
|
|
* This is a new directory-walking system that addresses a number
|
|
* of problems I've had with fts(3). In particular, it has no
|
|
* pathname-length limits (other than the size of 'int'), handles
|
|
* deep logical traversals, uses considerably less memory, and has
|
|
* an opaque interface (easier to modify in the future).
|
|
*
|
|
* Internally, it keeps a single list of "tree_entry" items that
|
|
* represent filesystem objects that require further attention.
|
|
* Non-directories are not kept in memory: they are pulled from
|
|
* readdir(), returned to the client, then freed as soon as possible.
|
|
* Any directory entry to be traversed gets pushed onto the stack.
|
|
*
|
|
* There is surprisingly little information that needs to be kept for
|
|
* each item on the stack. Just the name, depth (represented here as the
|
|
* string length of the parent directory's pathname), and some markers
|
|
* indicating how to get back to the parent (via chdir("..") for a
|
|
* regular dir or via fchdir(2) for a symlink).
|
|
*/
|
|
#include "bsdtar_platform.h"
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
#ifdef HAVE_SYS_STAT_H
|
|
#include <sys/stat.h>
|
|
#endif
|
|
#ifdef HAVE_DIRENT_H
|
|
#include <dirent.h>
|
|
#endif
|
|
#ifdef HAVE_ERRNO_H
|
|
#include <errno.h>
|
|
#endif
|
|
#ifdef HAVE_FCNTL_H
|
|
#include <fcntl.h>
|
|
#endif
|
|
#ifdef HAVE_STDLIB_H
|
|
#include <stdlib.h>
|
|
#endif
|
|
#ifdef HAVE_STRING_H
|
|
#include <string.h>
|
|
#endif
|
|
#ifdef HAVE_UNISTD_H
|
|
#include <unistd.h>
|
|
#endif
|
|
|
|
#include "tree.h"
|
|
|
|
/*
|
|
* TODO:
|
|
* 1) Loop checking.
|
|
* 3) Arbitrary logical traversals by closing/reopening intermediate fds.
|
|
*/
|
|
|
|
struct tree_entry {
|
|
struct tree_entry *next;
|
|
struct tree_entry *parent;
|
|
char *name;
|
|
size_t dirname_length;
|
|
dev_t dev;
|
|
ino_t ino;
|
|
#ifdef HAVE_FCHDIR
|
|
int fd;
|
|
#elif defined(_WIN32) && !defined(__CYGWIN__)
|
|
char *fullpath;
|
|
#else
|
|
#error fchdir function required.
|
|
#endif
|
|
int flags;
|
|
};
|
|
|
|
/* Definitions for tree_entry.flags bitmap. */
|
|
#define isDir 1 /* This entry is a regular directory. */
|
|
#define isDirLink 2 /* This entry is a symbolic link to a directory. */
|
|
#define needsPreVisit 4 /* This entry needs to be previsited. */
|
|
#define needsPostVisit 8 /* This entry needs to be postvisited. */
|
|
|
|
/*
|
|
* Local data for this package.
|
|
*/
|
|
struct tree {
|
|
struct tree_entry *stack;
|
|
struct tree_entry *current;
|
|
DIR *d;
|
|
#ifdef HAVE_FCHDIR
|
|
int initialDirFd;
|
|
#elif defined(_WIN32) && !defined(__CYGWIN__)
|
|
char *initialDir;
|
|
#endif
|
|
int flags;
|
|
int visit_type;
|
|
int tree_errno; /* Error code from last failed operation. */
|
|
|
|
char *buff;
|
|
const char *basename;
|
|
size_t buff_length;
|
|
size_t path_length;
|
|
size_t dirname_length;
|
|
|
|
int depth;
|
|
int openCount;
|
|
int maxOpenCount;
|
|
|
|
struct stat lst;
|
|
struct stat st;
|
|
};
|
|
|
|
/* Definitions for tree.flags bitmap. */
|
|
#define needsReturn 8 /* Marks first entry as not having been returned yet. */
|
|
#define hasStat 16 /* The st entry is set. */
|
|
#define hasLstat 32 /* The lst entry is set. */
|
|
|
|
|
|
#ifdef HAVE_DIRENT_D_NAMLEN
|
|
/* BSD extension; avoids need for a strlen() call. */
|
|
#define D_NAMELEN(dp) (dp)->d_namlen
|
|
#else
|
|
#define D_NAMELEN(dp) (strlen((dp)->d_name))
|
|
#endif
|
|
|
|
#if 0
|
|
#include <stdio.h>
|
|
void
|
|
tree_dump(struct tree *t, FILE *out)
|
|
{
|
|
struct tree_entry *te;
|
|
|
|
fprintf(out, "\tdepth: %d\n", t->depth);
|
|
fprintf(out, "\tbuff: %s\n", t->buff);
|
|
fprintf(out, "\tpwd: "); fflush(stdout); system("pwd");
|
|
fprintf(out, "\taccess: %s\n", t->basename);
|
|
fprintf(out, "\tstack:\n");
|
|
for (te = t->stack; te != NULL; te = te->next) {
|
|
fprintf(out, "\t\tte->name: %s%s%s\n", te->name,
|
|
te->flags & needsPreVisit ? "" : " *",
|
|
t->current == te ? " (current)" : "");
|
|
}
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* Add a directory path to the current stack.
|
|
*/
|
|
static void
|
|
tree_push(struct tree *t, const char *path)
|
|
{
|
|
struct tree_entry *te;
|
|
|
|
te = malloc(sizeof(*te));
|
|
memset(te, 0, sizeof(*te));
|
|
te->next = t->stack;
|
|
t->stack = te;
|
|
#ifdef HAVE_FCHDIR
|
|
te->fd = -1;
|
|
#elif defined(_WIN32) && !defined(__CYGWIN__)
|
|
te->fullpath = NULL;
|
|
#endif
|
|
te->name = strdup(path);
|
|
te->flags = needsPreVisit | needsPostVisit;
|
|
te->dirname_length = t->dirname_length;
|
|
}
|
|
|
|
/*
|
|
* Append a name to the current path.
|
|
*/
|
|
static void
|
|
tree_append(struct tree *t, const char *name, size_t name_length)
|
|
{
|
|
char *p;
|
|
|
|
if (t->buff != NULL)
|
|
t->buff[t->dirname_length] = '\0';
|
|
/* Strip trailing '/' from name, unless entire name is "/". */
|
|
while (name_length > 1 && name[name_length - 1] == '/')
|
|
name_length--;
|
|
|
|
/* Resize pathname buffer as needed. */
|
|
while (name_length + 1 + t->dirname_length >= t->buff_length) {
|
|
t->buff_length *= 2;
|
|
if (t->buff_length < 1024)
|
|
t->buff_length = 1024;
|
|
t->buff = realloc(t->buff, t->buff_length);
|
|
}
|
|
p = t->buff + t->dirname_length;
|
|
t->path_length = t->dirname_length + name_length;
|
|
/* Add a separating '/' if it's needed. */
|
|
if (t->dirname_length > 0 && p[-1] != '/') {
|
|
*p++ = '/';
|
|
t->path_length ++;
|
|
}
|
|
strncpy(p, name, name_length);
|
|
p[name_length] = '\0';
|
|
t->basename = p;
|
|
}
|
|
|
|
/*
|
|
* Open a directory tree for traversal.
|
|
*/
|
|
struct tree *
|
|
tree_open(const char *path)
|
|
{
|
|
struct tree *t;
|
|
|
|
t = malloc(sizeof(*t));
|
|
memset(t, 0, sizeof(*t));
|
|
tree_append(t, path, strlen(path));
|
|
#ifdef HAVE_FCHDIR
|
|
t->initialDirFd = open(".", O_RDONLY);
|
|
#elif defined(_WIN32) && !defined(__CYGWIN__)
|
|
t->initialDir = getcwd(NULL, 0);
|
|
#endif
|
|
/*
|
|
* During most of the traversal, items are set up and then
|
|
* returned immediately from tree_next(). That doesn't work
|
|
* for the very first entry, so we set a flag for this special
|
|
* case.
|
|
*/
|
|
t->flags = needsReturn;
|
|
return (t);
|
|
}
|
|
|
|
/*
|
|
* We've finished a directory; ascend back to the parent.
|
|
*/
|
|
static int
|
|
tree_ascend(struct tree *t)
|
|
{
|
|
struct tree_entry *te;
|
|
int r = 0;
|
|
|
|
te = t->stack;
|
|
t->depth--;
|
|
if (te->flags & isDirLink) {
|
|
#ifdef HAVE_FCHDIR
|
|
if (fchdir(te->fd) != 0) {
|
|
t->tree_errno = errno;
|
|
r = TREE_ERROR_FATAL;
|
|
}
|
|
close(te->fd);
|
|
#elif defined(_WIN32) && !defined(__CYGWIN__)
|
|
if (chdir(te->fullpath) != 0) {
|
|
t->tree_errno = errno;
|
|
r = TREE_ERROR_FATAL;
|
|
}
|
|
free(te->fullpath);
|
|
te->fullpath = NULL;
|
|
#endif
|
|
t->openCount--;
|
|
} else {
|
|
if (chdir("..") != 0) {
|
|
t->tree_errno = errno;
|
|
r = TREE_ERROR_FATAL;
|
|
}
|
|
}
|
|
return (r);
|
|
}
|
|
|
|
/*
|
|
* Pop the working stack.
|
|
*/
|
|
static void
|
|
tree_pop(struct tree *t)
|
|
{
|
|
struct tree_entry *te;
|
|
|
|
t->buff[t->dirname_length] = '\0';
|
|
if (t->stack == t->current && t->current != NULL)
|
|
t->current = t->current->parent;
|
|
te = t->stack;
|
|
t->stack = te->next;
|
|
t->dirname_length = te->dirname_length;
|
|
t->basename = t->buff + t->dirname_length;
|
|
/* Special case: starting dir doesn't skip leading '/'. */
|
|
if (t->dirname_length > 0)
|
|
t->basename++;
|
|
free(te->name);
|
|
free(te);
|
|
}
|
|
|
|
/*
|
|
* Get the next item in the tree traversal.
|
|
*/
|
|
int
|
|
tree_next(struct tree *t)
|
|
{
|
|
struct dirent *de = NULL;
|
|
int r;
|
|
|
|
/* If we're called again after a fatal error, that's an API
|
|
* violation. Just crash now. */
|
|
if (t->visit_type == TREE_ERROR_FATAL) {
|
|
const char *msg = "Unable to continue traversing"
|
|
" directory hierarchy after a fatal error.";
|
|
write(2, msg, strlen(msg));
|
|
*(int *)0 = 1; /* Deliberate SEGV; NULL pointer dereference. */
|
|
exit(1); /* In case the SEGV didn't work. */
|
|
}
|
|
|
|
/* Handle the startup case by returning the initial entry. */
|
|
if (t->flags & needsReturn) {
|
|
t->flags &= ~needsReturn;
|
|
return (t->visit_type = TREE_REGULAR);
|
|
}
|
|
|
|
while (t->stack != NULL) {
|
|
/* If there's an open dir, get the next entry from there. */
|
|
while (t->d != NULL) {
|
|
de = readdir(t->d);
|
|
if (de == NULL) {
|
|
closedir(t->d);
|
|
t->d = NULL;
|
|
} else if (de->d_name[0] == '.'
|
|
&& de->d_name[1] == '\0') {
|
|
/* Skip '.' */
|
|
} else if (de->d_name[0] == '.'
|
|
&& de->d_name[1] == '.'
|
|
&& de->d_name[2] == '\0') {
|
|
/* Skip '..' */
|
|
} else {
|
|
/*
|
|
* Append the path to the current path
|
|
* and return it.
|
|
*/
|
|
tree_append(t, de->d_name, D_NAMELEN(de));
|
|
t->flags &= ~hasLstat;
|
|
t->flags &= ~hasStat;
|
|
return (t->visit_type = TREE_REGULAR);
|
|
}
|
|
}
|
|
|
|
/* If the current dir needs to be visited, set it up. */
|
|
if (t->stack->flags & needsPreVisit) {
|
|
t->current = t->stack;
|
|
tree_append(t, t->stack->name, strlen(t->stack->name));
|
|
t->stack->flags &= ~needsPreVisit;
|
|
/* If it is a link, set up fd for the ascent. */
|
|
if (t->stack->flags & isDirLink) {
|
|
#ifdef HAVE_FCHDIR
|
|
t->stack->fd = open(".", O_RDONLY);
|
|
#elif defined(_WIN32) && !defined(__CYGWIN__)
|
|
t->stack->fullpath = getcwd(NULL, 0);
|
|
#endif
|
|
t->openCount++;
|
|
if (t->openCount > t->maxOpenCount)
|
|
t->maxOpenCount = t->openCount;
|
|
}
|
|
t->dirname_length = t->path_length;
|
|
if (chdir(t->stack->name) != 0) {
|
|
/* chdir() failed; return error */
|
|
tree_pop(t);
|
|
t->tree_errno = errno;
|
|
return (t->visit_type = TREE_ERROR_DIR);
|
|
}
|
|
t->depth++;
|
|
t->d = opendir(".");
|
|
if (t->d == NULL) {
|
|
r = tree_ascend(t); /* Undo "chdir" */
|
|
tree_pop(t);
|
|
t->tree_errno = errno;
|
|
t->visit_type = r != 0 ? r : TREE_ERROR_DIR;
|
|
return (t->visit_type);
|
|
}
|
|
t->flags &= ~hasLstat;
|
|
t->flags &= ~hasStat;
|
|
t->basename = ".";
|
|
return (t->visit_type = TREE_POSTDESCENT);
|
|
}
|
|
|
|
/* We've done everything necessary for the top stack entry. */
|
|
if (t->stack->flags & needsPostVisit) {
|
|
r = tree_ascend(t);
|
|
tree_pop(t);
|
|
t->flags &= ~hasLstat;
|
|
t->flags &= ~hasStat;
|
|
t->visit_type = r != 0 ? r : TREE_POSTASCENT;
|
|
return (t->visit_type);
|
|
}
|
|
}
|
|
return (t->visit_type = 0);
|
|
}
|
|
|
|
/*
|
|
* Return error code.
|
|
*/
|
|
int
|
|
tree_errno(struct tree *t)
|
|
{
|
|
return (t->tree_errno);
|
|
}
|
|
|
|
/*
|
|
* Called by the client to mark the directory just returned from
|
|
* tree_next() as needing to be visited.
|
|
*/
|
|
void
|
|
tree_descend(struct tree *t)
|
|
{
|
|
if (t->visit_type != TREE_REGULAR)
|
|
return;
|
|
|
|
if (tree_current_is_physical_dir(t)) {
|
|
tree_push(t, t->basename);
|
|
t->stack->flags |= isDir;
|
|
} else if (tree_current_is_dir(t)) {
|
|
tree_push(t, t->basename);
|
|
t->stack->flags |= isDirLink;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Get the stat() data for the entry just returned from tree_next().
|
|
*/
|
|
const struct stat *
|
|
tree_current_stat(struct tree *t)
|
|
{
|
|
if (!(t->flags & hasStat)) {
|
|
if (stat(t->basename, &t->st) != 0)
|
|
return NULL;
|
|
t->flags |= hasStat;
|
|
}
|
|
return (&t->st);
|
|
}
|
|
|
|
/*
|
|
* Get the lstat() data for the entry just returned from tree_next().
|
|
*/
|
|
const struct stat *
|
|
tree_current_lstat(struct tree *t)
|
|
{
|
|
if (!(t->flags & hasLstat)) {
|
|
if (lstat(t->basename, &t->lst) != 0)
|
|
return NULL;
|
|
t->flags |= hasLstat;
|
|
}
|
|
return (&t->lst);
|
|
}
|
|
|
|
/*
|
|
* Test whether current entry is a dir or link to a dir.
|
|
*/
|
|
int
|
|
tree_current_is_dir(struct tree *t)
|
|
{
|
|
const struct stat *st;
|
|
|
|
/*
|
|
* If we already have lstat() info, then try some
|
|
* cheap tests to determine if this is a dir.
|
|
*/
|
|
if (t->flags & hasLstat) {
|
|
/* If lstat() says it's a dir, it must be a dir. */
|
|
if (S_ISDIR(tree_current_lstat(t)->st_mode))
|
|
return 1;
|
|
/* Not a dir; might be a link to a dir. */
|
|
/* If it's not a link, then it's not a link to a dir. */
|
|
if (!S_ISLNK(tree_current_lstat(t)->st_mode))
|
|
return 0;
|
|
/*
|
|
* It's a link, but we don't know what it's a link to,
|
|
* so we'll have to use stat().
|
|
*/
|
|
}
|
|
|
|
st = tree_current_stat(t);
|
|
/* If we can't stat it, it's not a dir. */
|
|
if (st == NULL)
|
|
return 0;
|
|
/* Use the definitive test. Hopefully this is cached. */
|
|
return (S_ISDIR(st->st_mode));
|
|
}
|
|
|
|
/*
|
|
* Test whether current entry is a physical directory. Usually, we
|
|
* already have at least one of stat() or lstat() in memory, so we
|
|
* use tricks to try to avoid an extra trip to the disk.
|
|
*/
|
|
int
|
|
tree_current_is_physical_dir(struct tree *t)
|
|
{
|
|
const struct stat *st;
|
|
|
|
/*
|
|
* If stat() says it isn't a dir, then it's not a dir.
|
|
* If stat() data is cached, this check is free, so do it first.
|
|
*/
|
|
if ((t->flags & hasStat)
|
|
&& (!S_ISDIR(tree_current_stat(t)->st_mode)))
|
|
return 0;
|
|
|
|
/*
|
|
* Either stat() said it was a dir (in which case, we have
|
|
* to determine whether it's really a link to a dir) or
|
|
* stat() info wasn't available. So we use lstat(), which
|
|
* hopefully is already cached.
|
|
*/
|
|
|
|
st = tree_current_lstat(t);
|
|
/* If we can't stat it, it's not a dir. */
|
|
if (st == NULL)
|
|
return 0;
|
|
/* Use the definitive test. Hopefully this is cached. */
|
|
return (S_ISDIR(st->st_mode));
|
|
}
|
|
|
|
/*
|
|
* Test whether current entry is a symbolic link.
|
|
*/
|
|
int
|
|
tree_current_is_physical_link(struct tree *t)
|
|
{
|
|
const struct stat *st = tree_current_lstat(t);
|
|
if (st == NULL)
|
|
return 0;
|
|
return (S_ISLNK(st->st_mode));
|
|
}
|
|
|
|
/*
|
|
* Return the access path for the entry just returned from tree_next().
|
|
*/
|
|
const char *
|
|
tree_current_access_path(struct tree *t)
|
|
{
|
|
return (t->basename);
|
|
}
|
|
|
|
/*
|
|
* Return the full path for the entry just returned from tree_next().
|
|
*/
|
|
const char *
|
|
tree_current_path(struct tree *t)
|
|
{
|
|
return (t->buff);
|
|
}
|
|
|
|
/*
|
|
* Return the length of the path for the entry just returned from tree_next().
|
|
*/
|
|
size_t
|
|
tree_current_pathlen(struct tree *t)
|
|
{
|
|
return (t->path_length);
|
|
}
|
|
|
|
/*
|
|
* Return the nesting depth of the entry just returned from tree_next().
|
|
*/
|
|
int
|
|
tree_current_depth(struct tree *t)
|
|
{
|
|
return (t->depth);
|
|
}
|
|
|
|
/*
|
|
* Terminate the traversal and release any resources.
|
|
*/
|
|
void
|
|
tree_close(struct tree *t)
|
|
{
|
|
/* Release anything remaining in the stack. */
|
|
while (t->stack != NULL)
|
|
tree_pop(t);
|
|
if (t->buff)
|
|
free(t->buff);
|
|
/* chdir() back to where we started. */
|
|
#ifdef HAVE_FCHDIR
|
|
if (t->initialDirFd >= 0) {
|
|
fchdir(t->initialDirFd);
|
|
close(t->initialDirFd);
|
|
t->initialDirFd = -1;
|
|
}
|
|
#elif defined(_WIN32) && !defined(__CYGWIN__)
|
|
if (t->initialDir != NULL) {
|
|
chdir(t->initialDir);
|
|
free(t->initialDir);
|
|
t->initialDir = NULL;
|
|
}
|
|
#endif
|
|
free(t);
|
|
}
|