tmpfs: Replace directory entry linked list with RB-Tree.

Use file name hash as a tree key, handle duplicate keys.  Both VOP_LOOKUP
and VOP_READDIR operations utilize same tree for search.  Directory
entry offset (cookie) is either file name hash or incremental id in case
of hash collisions (duplicate-cookies).  Keep sorted per directory list
of duplicate-cookie entries to facilitate cookie number allocation.

Don't fail if previous VOP_READDIR() offset is no longer valid, start
with next dirent instead.  Other file system handle it similarly.

Workaround race prone tn_readdir_last[pn] fields update.

Add tmpfs_dir_destroy() to free all dirents.

Set NFS cookies in tmpfs_dir_getdents(). Return EJUSTRETURN from
tmpfs_dir_getdents() instead of hard coded -1.

Mark directory traversal routines static as they are no longer
used outside of tmpfs_subr.c
This commit is contained in:
Gleb Kurtsou 2013-01-06 22:15:44 +00:00
parent 1f09eec3f8
commit 4fd5efe79e
4 changed files with 536 additions and 310 deletions

View File

@ -49,6 +49,7 @@
/* --------------------------------------------------------------------- */
#include <sys/malloc.h>
#include <sys/systm.h>
#include <sys/tree.h>
#include <sys/vmmeter.h>
#include <vm/swap_pager.h>
@ -60,104 +61,81 @@ MALLOC_DECLARE(M_TMPFSNAME);
/*
* Internal representation of a tmpfs directory entry.
*/
LIST_HEAD(tmpfs_dir_duphead, tmpfs_dirent);
struct tmpfs_dirent {
TAILQ_ENTRY(tmpfs_dirent) td_entries;
/*
* Depending on td_cookie flag entry can be of 3 types:
* - regular -- no hash collisions, stored in RB-Tree
* - duphead -- synthetic linked list head for dup entries
* - dup -- stored in linked list instead of RB-Tree
*/
union {
/* regular and duphead entry types */
RB_ENTRY(tmpfs_dirent) td_entries;
/* Length of the name stored in this directory entry. This avoids
* the need to recalculate it every time the name is used. */
uint16_t td_namelen;
/* dup entry type */
struct {
LIST_ENTRY(tmpfs_dirent) entries;
LIST_ENTRY(tmpfs_dirent) index_entries;
} td_dup;
} uh;
/* The name of the entry, allocated from a string pool. This
* string is not required to be zero-terminated; therefore, the
* td_namelen field must always be used when accessing its value. */
char * td_name;
uint32_t td_cookie;
uint32_t td_hash;
u_int td_namelen;
/* Pointer to the node this entry refers to. In case this field
* is NULL, the node is a whiteout. */
struct tmpfs_node * td_node;
union {
/*
* The name of the entry, allocated from a string pool. This
* string is not required to be zero-terminated.
*/
char * td_name; /* regular, dup */
struct tmpfs_dir_duphead td_duphead; /* duphead */
} ud;
};
/* A directory in tmpfs holds a sorted list of directory entries, which in
/* A directory in tmpfs holds a list of directory entries, which in
* turn point to other files (which can be directories themselves).
*
* In tmpfs, this list is managed by a tail queue, whose head is defined by
* In tmpfs, this list is managed by a RB-Tree, whose head is defined by
* the struct tmpfs_dir type.
*
* It is imporant to notice that directories do not have entries for . and
* It is important to notice that directories do not have entries for . and
* .. as other file systems do. These can be generated when requested
* based on information available by other means, such as the pointer to
* the node itself in the former case or the pointer to the parent directory
* in the latter case. This is done to simplify tmpfs's code and, more
* importantly, to remove redundancy. */
TAILQ_HEAD(tmpfs_dir, tmpfs_dirent);
RB_HEAD(tmpfs_dir, tmpfs_dirent);
/* Each entry in a directory has a cookie that identifies it. Cookies
* supersede offsets within directories because, given how tmpfs stores
* directories in memory, there is no such thing as an offset. (Emulating
* a real offset could be very difficult.)
* directories in memory, there is no such thing as an offset.
*
* The '.', '..' and the end of directory markers have fixed cookies which
* cannot collide with the cookies generated by other entries. The cookies
* fot the other entries are generated based on the memory address on which
* stores their information is stored.
* for the other entries are generated based on the file name hash value or
* unique number in case of name hash collision.
*
* Ideally, using the entry's memory pointer as the cookie would be enough
* to represent it and it wouldn't cause collisions in any system.
* Unfortunately, this results in "offsets" with very large values which
* later raise problems in the Linux compatibility layer (and maybe in other
* places) as described in PR kern/32034. Hence we need to workaround this
* with a rather ugly hack.
*
* Linux 32-bit binaries, unless built with _FILE_OFFSET_BITS=64, have off_t
* set to 'long', which is a 32-bit *signed* long integer. Regardless of
* the macro value, GLIBC (2.3 at least) always uses the getdents64
* system call (when calling readdir) which internally returns off64_t
* offsets. In order to make 32-bit binaries work, *GLIBC* converts the
* 64-bit values returned by the kernel to 32-bit ones and aborts with
* EOVERFLOW if the conversion results in values that won't fit in 32-bit
* integers (which it assumes is because the directory is extremely large).
* This wouldn't cause problems if we were dealing with unsigned integers,
* but as we have signed integers, this check fails due to sign expansion.
*
* For example, consider that the kernel returns the 0xc1234567 cookie to
* userspace in a off64_t integer. Later on, GLIBC casts this value to
* off_t (remember, signed) with code similar to:
* system call returns the offset in kernel_value;
* off_t casted_value = kernel_value;
* if (sizeof(off_t) != sizeof(off64_t) &&
* kernel_value != casted_value)
* error!
* In this case, casted_value still has 0xc1234567, but when it is compared
* for equality against kernel_value, it is promoted to a 64-bit integer and
* becomes 0xffffffffc1234567, which is different than 0x00000000c1234567.
* Then, GLIBC assumes this is because the directory is very large.
*
* Given that all the above happens in user-space, we have no control over
* it; therefore we must workaround the issue here. We do this by
* truncating the pointer value to a 32-bit integer and hope that there
* won't be collisions. In fact, this will not cause any problems in
* 32-bit platforms but some might arise in 64-bit machines (I'm not sure
* if they can happen at all in practice).
*
* XXX A nicer solution shall be attempted. */
#ifdef _KERNEL
* To preserve compatibility cookies are limited to 31 bits.
*/
#define TMPFS_DIRCOOKIE_DOT 0
#define TMPFS_DIRCOOKIE_DOTDOT 1
#define TMPFS_DIRCOOKIE_EOF 2
static __inline
off_t
tmpfs_dircookie(struct tmpfs_dirent *de)
{
off_t cookie;
cookie = ((off_t)(uintptr_t)de >> 1) & 0x7FFFFFFF;
MPASS(cookie != TMPFS_DIRCOOKIE_DOT);
MPASS(cookie != TMPFS_DIRCOOKIE_DOTDOT);
MPASS(cookie != TMPFS_DIRCOOKIE_EOF);
return cookie;
}
#endif
#define TMPFS_DIRCOOKIE_MASK ((off_t)0x3fffffffU)
#define TMPFS_DIRCOOKIE_MIN ((off_t)0x00000004U)
#define TMPFS_DIRCOOKIE_DUP ((off_t)0x40000000U)
#define TMPFS_DIRCOOKIE_DUPHEAD ((off_t)0x80000000U)
#define TMPFS_DIRCOOKIE_DUP_MIN TMPFS_DIRCOOKIE_DUP
#define TMPFS_DIRCOOKIE_DUP_MAX \
(TMPFS_DIRCOOKIE_DUP | TMPFS_DIRCOOKIE_MASK)
/* --------------------------------------------------------------------- */
@ -243,29 +221,31 @@ struct tmpfs_node {
dev_t tn_rdev;
/* Valid when tn_type == VDIR. */
struct tn_dir{
struct tn_dir {
/* Pointer to the parent directory. The root
* directory has a pointer to itself in this field;
* this property identifies the root node. */
struct tmpfs_node * tn_parent;
/* Head of a tail-queue that links the contents of
* the directory together. See above for a
* description of its contents. */
/* Head of a tree that links the contents of
* the directory together. */
struct tmpfs_dir tn_dirhead;
/* Head of a list the contains fake directory entries
* heads, i.e. entries with TMPFS_DIRCOOKIE_DUPHEAD
* flag. */
struct tmpfs_dir_duphead tn_dupindex;
/* Number and pointer of the first directory entry
* returned by the readdir operation if it were
* called again to continue reading data from the
* same directory as before. This is used to speed
* up reads of long directories, assuming that no
* more than one read is in progress at a given time.
* Otherwise, these values are discarded and a linear
* scan is performed from the beginning up to the
* point where readdir starts returning values. */
* Otherwise, these values are discarded. */
off_t tn_readdir_lastn;
struct tmpfs_dirent * tn_readdir_lastp;
}tn_dir;
} tn_dir;
/* Valid when tn_type == VLNK. */
/* The link's target, allocated from a string pool. */
@ -419,9 +399,9 @@ int tmpfs_alloc_node(struct tmpfs_mount *, enum vtype,
char *, dev_t, struct tmpfs_node **);
void tmpfs_free_node(struct tmpfs_mount *, struct tmpfs_node *);
int tmpfs_alloc_dirent(struct tmpfs_mount *, struct tmpfs_node *,
const char *, uint16_t, struct tmpfs_dirent **);
void tmpfs_free_dirent(struct tmpfs_mount *, struct tmpfs_dirent *,
boolean_t);
const char *, u_int, struct tmpfs_dirent **);
void tmpfs_free_dirent(struct tmpfs_mount *, struct tmpfs_dirent *);
void tmpfs_dirent_init(struct tmpfs_dirent *, const char *, u_int);
int tmpfs_alloc_vp(struct mount *, struct tmpfs_node *, int,
struct vnode **);
void tmpfs_free_vp(struct vnode *);
@ -429,13 +409,12 @@ int tmpfs_alloc_file(struct vnode *, struct vnode **, struct vattr *,
struct componentname *, char *);
void tmpfs_dir_attach(struct vnode *, struct tmpfs_dirent *);
void tmpfs_dir_detach(struct vnode *, struct tmpfs_dirent *);
void tmpfs_dir_destroy(struct tmpfs_mount *, struct tmpfs_node *);
struct tmpfs_dirent * tmpfs_dir_lookup(struct tmpfs_node *node,
struct tmpfs_node *f,
struct componentname *cnp);
int tmpfs_dir_getdotdent(struct tmpfs_node *, struct uio *);
int tmpfs_dir_getdotdotdent(struct tmpfs_node *, struct uio *);
struct tmpfs_dirent * tmpfs_dir_lookupbycookie(struct tmpfs_node *, off_t);
int tmpfs_dir_getdents(struct tmpfs_node *, struct uio *, off_t *);
int tmpfs_dir_getdents(struct tmpfs_node *, struct uio *, int,
u_long *, int *);
int tmpfs_dir_whiteout_add(struct vnode *, struct componentname *);
void tmpfs_dir_whiteout_remove(struct vnode *, struct componentname *);
int tmpfs_reg_resize(struct vnode *, off_t, boolean_t);
@ -467,8 +446,8 @@ int tmpfs_truncate(struct vnode *, off_t);
* with a length of 'len'.
*/
#define TMPFS_DIRENT_MATCHES(de, name, len) \
(de->td_namelen == (uint16_t)len && \
bcmp((de)->td_name, (name), (de)->td_namelen) == 0)
(de->td_namelen == len && \
bcmp((de)->ud.td_name, (name), (de)->td_namelen) == 0)
/* --------------------------------------------------------------------- */
@ -476,11 +455,10 @@ int tmpfs_truncate(struct vnode *, off_t);
* Ensures that the node pointed by 'node' is a directory and that its
* contents are consistent with respect to directories.
*/
#define TMPFS_VALIDATE_DIR(node) \
#define TMPFS_VALIDATE_DIR(node) do { \
MPASS((node)->tn_type == VDIR); \
MPASS((node)->tn_size % sizeof(struct tmpfs_dirent) == 0); \
MPASS((node)->tn_dir.tn_readdir_lastp == NULL || \
tmpfs_dircookie((node)->tn_dir.tn_readdir_lastp) == (node)->tn_dir.tn_readdir_lastn);
} while (0)
/* --------------------------------------------------------------------- */

View File

@ -37,6 +37,7 @@
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/fnv_hash.h>
#include <sys/namei.h>
#include <sys/priv.h>
#include <sys/proc.h>
@ -58,6 +59,11 @@ __FBSDID("$FreeBSD$");
#include <fs/tmpfs/tmpfs_fifoops.h>
#include <fs/tmpfs/tmpfs_vnops.h>
struct tmpfs_dir_cursor {
struct tmpfs_dirent *tdc_current;
struct tmpfs_dirent *tdc_tree;
};
SYSCTL_NODE(_vfs, OID_AUTO, tmpfs, CTLFLAG_RW, 0, "tmpfs file system");
static long tmpfs_pages_reserved = TMPFS_PAGES_MINRESERVED;
@ -87,6 +93,10 @@ SYSCTL_PROC(_vfs_tmpfs, OID_AUTO, memory_reserved, CTLTYPE_LONG|CTLFLAG_RW,
&tmpfs_pages_reserved, 0, sysctl_mem_reserved, "L",
"Amount of available memory and swap below which tmpfs growth stops");
static __inline int tmpfs_dirtree_cmp(struct tmpfs_dirent *a,
struct tmpfs_dirent *b);
RB_PROTOTYPE_STATIC(tmpfs_dir, tmpfs_dirent, uh.td_entries, tmpfs_dirtree_cmp);
size_t
tmpfs_mem_avail(void)
{
@ -188,7 +198,8 @@ tmpfs_alloc_node(struct tmpfs_mount *tmp, enum vtype type,
break;
case VDIR:
TAILQ_INIT(&nnode->tn_dir.tn_dirhead);
RB_INIT(&nnode->tn_dir.tn_dirhead);
LIST_INIT(&nnode->tn_dir.tn_dupindex);
MPASS(parent != nnode);
MPASS(IMPLIES(parent == NULL, tmp->tm_root == NULL));
nnode->tn_dir.tn_parent = (parent == NULL) ? nnode : parent;
@ -309,6 +320,49 @@ tmpfs_free_node(struct tmpfs_mount *tmp, struct tmpfs_node *node)
/* --------------------------------------------------------------------- */
static __inline uint32_t
tmpfs_dirent_hash(const char *name, u_int len)
{
uint32_t hash;
hash = fnv_32_buf(name, len, FNV1_32_INIT + len) & TMPFS_DIRCOOKIE_MASK;
#ifdef TMPFS_DEBUG_DIRCOOKIE_DUP
hash &= 0xf;
#endif
if (hash < TMPFS_DIRCOOKIE_MIN)
hash += TMPFS_DIRCOOKIE_MIN;
return (hash);
}
static __inline off_t
tmpfs_dirent_cookie(struct tmpfs_dirent *de)
{
MPASS(de->td_cookie >= TMPFS_DIRCOOKIE_MIN);
return (de->td_cookie);
}
static __inline boolean_t
tmpfs_dirent_dup(struct tmpfs_dirent *de)
{
return ((de->td_cookie & TMPFS_DIRCOOKIE_DUP) != 0);
}
static __inline boolean_t
tmpfs_dirent_duphead(struct tmpfs_dirent *de)
{
return ((de->td_cookie & TMPFS_DIRCOOKIE_DUPHEAD) != 0);
}
void
tmpfs_dirent_init(struct tmpfs_dirent *de, const char *name, u_int namelen)
{
de->td_hash = de->td_cookie = tmpfs_dirent_hash(name, namelen);
memcpy(de->ud.td_name, name, namelen);
de->td_namelen = namelen;
}
/*
* Allocates a new directory entry for the node node with a name of name.
* The new directory entry is returned in *de.
@ -320,17 +374,17 @@ tmpfs_free_node(struct tmpfs_mount *tmp, struct tmpfs_node *node)
*/
int
tmpfs_alloc_dirent(struct tmpfs_mount *tmp, struct tmpfs_node *node,
const char *name, uint16_t len, struct tmpfs_dirent **de)
const char *name, u_int len, struct tmpfs_dirent **de)
{
struct tmpfs_dirent *nde;
nde = (struct tmpfs_dirent *)uma_zalloc(
tmp->tm_dirent_pool, M_WAITOK);
nde->td_name = malloc(len, M_TMPFSNAME, M_WAITOK);
nde->td_namelen = len;
memcpy(nde->td_name, name, len);
nde = uma_zalloc(tmp->tm_dirent_pool, M_WAITOK);
nde->td_node = node;
if (name != NULL) {
nde->ud.td_name = malloc(len, M_TMPFSNAME, M_WAITOK);
tmpfs_dirent_init(nde, name, len);
} else
nde->td_namelen = 0;
if (node != NULL)
node->tn_links++;
@ -351,10 +405,8 @@ tmpfs_alloc_dirent(struct tmpfs_mount *tmp, struct tmpfs_node *node,
* directory entry, as it may already have been released from the outside.
*/
void
tmpfs_free_dirent(struct tmpfs_mount *tmp, struct tmpfs_dirent *de,
boolean_t node_exists)
tmpfs_free_dirent(struct tmpfs_mount *tmp, struct tmpfs_dirent *de)
{
if (node_exists) {
struct tmpfs_node *node;
node = de->td_node;
@ -362,9 +414,8 @@ tmpfs_free_dirent(struct tmpfs_mount *tmp, struct tmpfs_dirent *de,
MPASS(node->tn_links > 0);
node->tn_links--;
}
}
free(de->td_name, M_TMPFSNAME);
if (!tmpfs_dirent_duphead(de) && de->ud.td_name != NULL)
free(de->ud.td_name, M_TMPFSNAME);
uma_zfree(tmp->tm_dirent_pool, de);
}
@ -586,7 +637,7 @@ tmpfs_alloc_file(struct vnode *dvp, struct vnode **vpp, struct vattr *vap,
/* Allocate a vnode for the new file. */
error = tmpfs_alloc_vp(dvp->v_mount, node, LK_EXCLUSIVE, vpp);
if (error != 0) {
tmpfs_free_dirent(tmp, de, TRUE);
tmpfs_free_dirent(tmp, de);
tmpfs_free_node(tmp, node);
goto out;
}
@ -605,6 +656,215 @@ out:
/* --------------------------------------------------------------------- */
static struct tmpfs_dirent *
tmpfs_dir_first(struct tmpfs_node *dnode, struct tmpfs_dir_cursor *dc)
{
struct tmpfs_dirent *de;
de = RB_MIN(tmpfs_dir, &dnode->tn_dir.tn_dirhead);
dc->tdc_tree = de;
if (de != NULL && tmpfs_dirent_duphead(de))
de = LIST_FIRST(&de->ud.td_duphead);
dc->tdc_current = de;
return (dc->tdc_current);
}
static struct tmpfs_dirent *
tmpfs_dir_next(struct tmpfs_node *dnode, struct tmpfs_dir_cursor *dc)
{
struct tmpfs_dirent *de;
MPASS(dc->tdc_tree != NULL);
if (tmpfs_dirent_dup(dc->tdc_current)) {
dc->tdc_current = LIST_NEXT(dc->tdc_current, uh.td_dup.entries);
if (dc->tdc_current != NULL)
return (dc->tdc_current);
}
dc->tdc_tree = dc->tdc_current = RB_NEXT(tmpfs_dir,
&dnode->tn_dir.tn_dirhead, dc->tdc_tree);
if ((de = dc->tdc_current) != NULL && tmpfs_dirent_duphead(de)) {
dc->tdc_current = LIST_FIRST(&de->ud.td_duphead);
MPASS(dc->tdc_current != NULL);
}
return (dc->tdc_current);
}
/* Lookup directory entry in RB-Tree. Function may return duphead entry. */
static struct tmpfs_dirent *
tmpfs_dir_xlookup_hash(struct tmpfs_node *dnode, uint32_t hash)
{
struct tmpfs_dirent *de, dekey;
dekey.td_hash = hash;
de = RB_FIND(tmpfs_dir, &dnode->tn_dir.tn_dirhead, &dekey);
return (de);
}
/* Lookup directory entry by cookie, initialize directory cursor accordingly. */
static struct tmpfs_dirent *
tmpfs_dir_lookup_cookie(struct tmpfs_node *node, off_t cookie,
struct tmpfs_dir_cursor *dc)
{
struct tmpfs_dir *dirhead = &node->tn_dir.tn_dirhead;
struct tmpfs_dirent *de, dekey;
MPASS(cookie >= TMPFS_DIRCOOKIE_MIN);
if (cookie == node->tn_dir.tn_readdir_lastn &&
(de = node->tn_dir.tn_readdir_lastp) != NULL) {
/* Protect against possible race, tn_readdir_last[pn]
* may be updated with only shared vnode lock held. */
if (cookie == tmpfs_dirent_cookie(de))
goto out;
}
if ((cookie & TMPFS_DIRCOOKIE_DUP) != 0) {
LIST_FOREACH(de, &node->tn_dir.tn_dupindex,
uh.td_dup.index_entries) {
MPASS(tmpfs_dirent_dup(de));
if (de->td_cookie == cookie)
goto out;
/* dupindex list is sorted. */
if (de->td_cookie < cookie) {
de = NULL;
goto out;
}
}
MPASS(de == NULL);
goto out;
}
MPASS((cookie & TMPFS_DIRCOOKIE_MASK) == cookie);
dekey.td_hash = cookie;
/* Recover if direntry for cookie was removed */
de = RB_NFIND(tmpfs_dir, dirhead, &dekey);
dc->tdc_tree = de;
dc->tdc_current = de;
if (de != NULL && tmpfs_dirent_duphead(de)) {
dc->tdc_current = LIST_FIRST(&de->ud.td_duphead);
MPASS(dc->tdc_current != NULL);
}
return (dc->tdc_current);
out:
dc->tdc_tree = de;
dc->tdc_current = de;
if (de != NULL && tmpfs_dirent_dup(de))
dc->tdc_tree = tmpfs_dir_xlookup_hash(node,
de->td_hash);
return (dc->tdc_current);
}
/*
* Looks for a directory entry in the directory represented by node.
* 'cnp' describes the name of the entry to look for. Note that the .
* and .. components are not allowed as they do not physically exist
* within directories.
*
* Returns a pointer to the entry when found, otherwise NULL.
*/
struct tmpfs_dirent *
tmpfs_dir_lookup(struct tmpfs_node *node, struct tmpfs_node *f,
struct componentname *cnp)
{
struct tmpfs_dir_duphead *duphead;
struct tmpfs_dirent *de;
uint32_t hash;
MPASS(IMPLIES(cnp->cn_namelen == 1, cnp->cn_nameptr[0] != '.'));
MPASS(IMPLIES(cnp->cn_namelen == 2, !(cnp->cn_nameptr[0] == '.' &&
cnp->cn_nameptr[1] == '.')));
TMPFS_VALIDATE_DIR(node);
hash = tmpfs_dirent_hash(cnp->cn_nameptr, cnp->cn_namelen);
de = tmpfs_dir_xlookup_hash(node, hash);
if (de != NULL && tmpfs_dirent_duphead(de)) {
duphead = &de->ud.td_duphead;
LIST_FOREACH(de, duphead, uh.td_dup.entries) {
if (TMPFS_DIRENT_MATCHES(de, cnp->cn_nameptr,
cnp->cn_namelen))
break;
}
} else if (de != NULL) {
if (!TMPFS_DIRENT_MATCHES(de, cnp->cn_nameptr,
cnp->cn_namelen))
de = NULL;
}
if (de != NULL && f != NULL && de->td_node != f)
de = NULL;
return (de);
}
/*
* Attach duplicate-cookie directory entry nde to dnode and insert to dupindex
* list, allocate new cookie value.
*/
static void
tmpfs_dir_attach_dup(struct tmpfs_node *dnode,
struct tmpfs_dir_duphead *duphead, struct tmpfs_dirent *nde)
{
struct tmpfs_dir_duphead *dupindex;
struct tmpfs_dirent *de, *pde;
dupindex = &dnode->tn_dir.tn_dupindex;
de = LIST_FIRST(dupindex);
if (de == NULL || de->td_cookie < TMPFS_DIRCOOKIE_DUP_MAX) {
if (de == NULL)
nde->td_cookie = TMPFS_DIRCOOKIE_DUP_MIN;
else
nde->td_cookie = de->td_cookie + 1;
MPASS(tmpfs_dirent_dup(nde));
LIST_INSERT_HEAD(dupindex, nde, uh.td_dup.index_entries);
LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries);
return;
}
/*
* Cookie numbers are near exhaustion. Scan dupindex list for unused
* numbers. dupindex list is sorted in descending order. Keep it so
* after inserting nde.
*/
while (1) {
pde = de;
de = LIST_NEXT(de, uh.td_dup.index_entries);
if (de == NULL && pde->td_cookie != TMPFS_DIRCOOKIE_DUP_MIN) {
/*
* Last element of the index doesn't have minimal cookie
* value, use it.
*/
nde->td_cookie = TMPFS_DIRCOOKIE_DUP_MIN;
LIST_INSERT_AFTER(pde, nde, uh.td_dup.index_entries);
LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries);
return;
} else if (de == NULL) {
/*
* We are so lucky have 2^30 hash duplicates in single
* directory :) Return largest possible cookie value.
* It should be fine except possible issues with
* VOP_READDIR restart.
*/
nde->td_cookie = TMPFS_DIRCOOKIE_DUP_MAX;
LIST_INSERT_HEAD(dupindex, nde,
uh.td_dup.index_entries);
LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries);
return;
}
if (de->td_cookie + 1 == pde->td_cookie ||
de->td_cookie >= TMPFS_DIRCOOKIE_DUP_MAX)
continue; /* No hole or invalid cookie. */
nde->td_cookie = de->td_cookie + 1;
MPASS(tmpfs_dirent_dup(nde));
MPASS(pde->td_cookie > nde->td_cookie);
MPASS(nde->td_cookie > de->td_cookie);
LIST_INSERT_BEFORE(de, nde, uh.td_dup.index_entries);
LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries);
return;
};
}
/*
* Attaches the directory entry de to the directory represented by vp.
* Note that this does not change the link count of the node pointed by
@ -614,10 +874,38 @@ void
tmpfs_dir_attach(struct vnode *vp, struct tmpfs_dirent *de)
{
struct tmpfs_node *dnode;
struct tmpfs_dirent *xde, *nde;
ASSERT_VOP_ELOCKED(vp, __func__);
MPASS(de->td_namelen > 0);
MPASS(de->td_hash >= TMPFS_DIRCOOKIE_MIN);
MPASS(de->td_cookie == de->td_hash);
dnode = VP_TO_TMPFS_DIR(vp);
TAILQ_INSERT_TAIL(&dnode->tn_dir.tn_dirhead, de, td_entries);
dnode->tn_dir.tn_readdir_lastn = 0;
dnode->tn_dir.tn_readdir_lastp = NULL;
MPASS(!tmpfs_dirent_dup(de));
xde = RB_INSERT(tmpfs_dir, &dnode->tn_dir.tn_dirhead, de);
if (xde != NULL && tmpfs_dirent_duphead(xde))
tmpfs_dir_attach_dup(dnode, &xde->ud.td_duphead, de);
else if (xde != NULL) {
/*
* Allocate new duphead. Swap xde with duphead to avoid
* adding/removing elements with the same hash.
*/
MPASS(!tmpfs_dirent_dup(xde));
tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount), NULL, NULL, 0,
&nde);
/* *nde = *xde; XXX gcc 4.2.1 may generate invalid code. */
memcpy(nde, xde, sizeof(*xde));
xde->td_cookie |= TMPFS_DIRCOOKIE_DUPHEAD;
LIST_INIT(&xde->ud.td_duphead);
xde->td_namelen = 0;
xde->td_node = NULL;
tmpfs_dir_attach_dup(dnode, &xde->ud.td_duphead, nde);
tmpfs_dir_attach_dup(dnode, &xde->ud.td_duphead, de);
}
dnode->tn_size += sizeof(struct tmpfs_dirent);
dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \
TMPFS_NODE_MODIFIED;
@ -633,58 +921,61 @@ tmpfs_dir_attach(struct vnode *vp, struct tmpfs_dirent *de)
void
tmpfs_dir_detach(struct vnode *vp, struct tmpfs_dirent *de)
{
struct tmpfs_mount *tmp;
struct tmpfs_dir *head;
struct tmpfs_node *dnode;
struct tmpfs_dirent *xde;
ASSERT_VOP_ELOCKED(vp, __func__);
dnode = VP_TO_TMPFS_DIR(vp);
if (dnode->tn_dir.tn_readdir_lastp == de) {
dnode = VP_TO_TMPFS_DIR(vp);
head = &dnode->tn_dir.tn_dirhead;
dnode->tn_dir.tn_readdir_lastn = 0;
dnode->tn_dir.tn_readdir_lastp = NULL;
}
TAILQ_REMOVE(&dnode->tn_dir.tn_dirhead, de, td_entries);
if (tmpfs_dirent_dup(de)) {
/* Remove duphead if de was last entry. */
if (LIST_NEXT(de, uh.td_dup.entries) == NULL) {
xde = tmpfs_dir_xlookup_hash(dnode, de->td_hash);
MPASS(tmpfs_dirent_duphead(xde));
} else
xde = NULL;
LIST_REMOVE(de, uh.td_dup.entries);
LIST_REMOVE(de, uh.td_dup.index_entries);
if (xde != NULL) {
if (LIST_EMPTY(&xde->ud.td_duphead)) {
RB_REMOVE(tmpfs_dir, head, xde);
tmp = VFS_TO_TMPFS(vp->v_mount);
MPASS(xde->td_node == NULL);
tmpfs_free_dirent(tmp, xde);
}
}
} else
RB_REMOVE(tmpfs_dir, head, de);
dnode->tn_size -= sizeof(struct tmpfs_dirent);
dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \
TMPFS_NODE_MODIFIED;
}
/* --------------------------------------------------------------------- */
/*
* Looks for a directory entry in the directory represented by node.
* 'cnp' describes the name of the entry to look for. Note that the .
* and .. components are not allowed as they do not physically exist
* within directories.
*
* Returns a pointer to the entry when found, otherwise NULL.
*/
struct tmpfs_dirent *
tmpfs_dir_lookup(struct tmpfs_node *node, struct tmpfs_node *f,
struct componentname *cnp)
void
tmpfs_dir_destroy(struct tmpfs_mount *tmp, struct tmpfs_node *dnode)
{
boolean_t found;
struct tmpfs_dirent *de;
struct tmpfs_dirent *de, *dde, *nde;
MPASS(IMPLIES(cnp->cn_namelen == 1, cnp->cn_nameptr[0] != '.'));
MPASS(IMPLIES(cnp->cn_namelen == 2, !(cnp->cn_nameptr[0] == '.' &&
cnp->cn_nameptr[1] == '.')));
TMPFS_VALIDATE_DIR(node);
found = 0;
TAILQ_FOREACH(de, &node->tn_dir.tn_dirhead, td_entries) {
if (f != NULL && de->td_node != f)
continue;
MPASS(cnp->cn_namelen < 0xffff);
if (de->td_namelen == (uint16_t)cnp->cn_namelen &&
bcmp(de->td_name, cnp->cn_nameptr, de->td_namelen) == 0) {
found = 1;
break;
RB_FOREACH_SAFE(de, tmpfs_dir, &dnode->tn_dir.tn_dirhead, nde) {
RB_REMOVE(tmpfs_dir, &dnode->tn_dir.tn_dirhead, de);
/* Node may already be destroyed. */
de->td_node = NULL;
if (tmpfs_dirent_duphead(de)) {
while ((dde = LIST_FIRST(&de->ud.td_duphead)) != NULL) {
LIST_REMOVE(dde, uh.td_dup.entries);
dde->td_node = NULL;
tmpfs_free_dirent(tmp, dde);
}
}
node->tn_status |= TMPFS_NODE_ACCESSED;
return found ? de : NULL;
tmpfs_free_dirent(tmp, de);
}
}
/* --------------------------------------------------------------------- */
@ -696,7 +987,7 @@ tmpfs_dir_lookup(struct tmpfs_node *node, struct tmpfs_node *f,
* hold the directory entry or an appropriate error code if another
* error happens.
*/
int
static int
tmpfs_dir_getdotdent(struct tmpfs_node *node, struct uio *uio)
{
int error;
@ -713,12 +1004,9 @@ tmpfs_dir_getdotdent(struct tmpfs_node *node, struct uio *uio)
dent.d_reclen = GENERIC_DIRSIZ(&dent);
if (dent.d_reclen > uio->uio_resid)
error = -1;
else {
error = EJUSTRETURN;
else
error = uiomove(&dent, dent.d_reclen, uio);
if (error == 0)
uio->uio_offset = TMPFS_DIRCOOKIE_DOTDOT;
}
node->tn_status |= TMPFS_NODE_ACCESSED;
@ -734,7 +1022,7 @@ tmpfs_dir_getdotdent(struct tmpfs_node *node, struct uio *uio)
* hold the directory entry or an appropriate error code if another
* error happens.
*/
int
static int
tmpfs_dir_getdotdotdent(struct tmpfs_node *node, struct uio *uio)
{
int error;
@ -763,19 +1051,9 @@ tmpfs_dir_getdotdotdent(struct tmpfs_node *node, struct uio *uio)
dent.d_reclen = GENERIC_DIRSIZ(&dent);
if (dent.d_reclen > uio->uio_resid)
error = -1;
else {
error = uiomove(&dent, dent.d_reclen, uio);
if (error == 0) {
struct tmpfs_dirent *de;
de = TAILQ_FIRST(&node->tn_dir.tn_dirhead);
if (de == NULL)
uio->uio_offset = TMPFS_DIRCOOKIE_EOF;
error = EJUSTRETURN;
else
uio->uio_offset = tmpfs_dircookie(de);
}
}
error = uiomove(&dent, dent.d_reclen, uio);
node->tn_status |= TMPFS_NODE_ACCESSED;
@ -784,30 +1062,6 @@ tmpfs_dir_getdotdotdent(struct tmpfs_node *node, struct uio *uio)
/* --------------------------------------------------------------------- */
/*
* Lookup a directory entry by its associated cookie.
*/
struct tmpfs_dirent *
tmpfs_dir_lookupbycookie(struct tmpfs_node *node, off_t cookie)
{
struct tmpfs_dirent *de;
if (cookie == node->tn_dir.tn_readdir_lastn &&
node->tn_dir.tn_readdir_lastp != NULL) {
return node->tn_dir.tn_readdir_lastp;
}
TAILQ_FOREACH(de, &node->tn_dir.tn_dirhead, td_entries) {
if (tmpfs_dircookie(de) == cookie) {
break;
}
}
return de;
}
/* --------------------------------------------------------------------- */
/*
* Helper function for tmpfs_readdir. Returns as much directory entries
* as can fit in the uio space. The read starts at uio->uio_offset.
@ -816,27 +1070,47 @@ tmpfs_dir_lookupbycookie(struct tmpfs_node *node, off_t cookie)
* error code if another error happens.
*/
int
tmpfs_dir_getdents(struct tmpfs_node *node, struct uio *uio, off_t *cntp)
tmpfs_dir_getdents(struct tmpfs_node *node, struct uio *uio, int cnt,
u_long *cookies, int *ncookies)
{
int error;
off_t startcookie;
struct tmpfs_dir_cursor dc;
struct tmpfs_dirent *de;
off_t off;
int error;
TMPFS_VALIDATE_DIR(node);
/* Locate the first directory entry we have to return. We have cached
* the last readdir in the node, so use those values if appropriate.
* Otherwise do a linear scan to find the requested entry. */
startcookie = uio->uio_offset;
MPASS(startcookie != TMPFS_DIRCOOKIE_DOT);
MPASS(startcookie != TMPFS_DIRCOOKIE_DOTDOT);
if (startcookie == TMPFS_DIRCOOKIE_EOF) {
return 0;
} else {
de = tmpfs_dir_lookupbycookie(node, startcookie);
}
if (de == NULL) {
return EINVAL;
off = 0;
switch (uio->uio_offset) {
case TMPFS_DIRCOOKIE_DOT:
error = tmpfs_dir_getdotdent(node, uio);
if (error != 0)
return (error);
uio->uio_offset = TMPFS_DIRCOOKIE_DOTDOT;
if (cnt != 0)
cookies[(*ncookies)++] = off = uio->uio_offset;
case TMPFS_DIRCOOKIE_DOTDOT:
error = tmpfs_dir_getdotdotdent(node, uio);
if (error != 0)
return (error);
de = tmpfs_dir_first(node, &dc);
if (de == NULL)
uio->uio_offset = TMPFS_DIRCOOKIE_EOF;
else
uio->uio_offset = tmpfs_dirent_cookie(de);
if (cnt != 0)
cookies[(*ncookies)++] = off = uio->uio_offset;
if (de == NULL)
return (0);
break;
case TMPFS_DIRCOOKIE_EOF:
return (0);
default:
de = tmpfs_dir_lookup_cookie(node, uio->uio_offset, &dc);
if (de == NULL)
return (EINVAL);
if (cnt != 0)
off = tmpfs_dirent_cookie(de);
}
/* Read as much entries as possible; i.e., until we reach the end of
@ -887,14 +1161,14 @@ tmpfs_dir_getdents(struct tmpfs_node *node, struct uio *uio, off_t *cntp)
}
d.d_namlen = de->td_namelen;
MPASS(de->td_namelen < sizeof(d.d_name));
(void)memcpy(d.d_name, de->td_name, de->td_namelen);
(void)memcpy(d.d_name, de->ud.td_name, de->td_namelen);
d.d_name[de->td_namelen] = '\0';
d.d_reclen = GENERIC_DIRSIZ(&d);
/* Stop reading if the directory entry we are treating is
* bigger than the amount of data that can be returned. */
if (d.d_reclen > uio->uio_resid) {
error = -1;
error = EJUSTRETURN;
break;
}
@ -902,21 +1176,30 @@ tmpfs_dir_getdents(struct tmpfs_node *node, struct uio *uio, off_t *cntp)
* advance pointers. */
error = uiomove(&d, d.d_reclen, uio);
if (error == 0) {
(*cntp)++;
de = TAILQ_NEXT(de, td_entries);
de = tmpfs_dir_next(node, &dc);
if (cnt != 0) {
if (de == NULL)
off = TMPFS_DIRCOOKIE_EOF;
else
off = tmpfs_dirent_cookie(de);
MPASS(*ncookies < cnt);
cookies[(*ncookies)++] = off;
}
}
} while (error == 0 && uio->uio_resid > 0 && de != NULL);
/* Update the offset and cache. */
if (de == NULL) {
uio->uio_offset = TMPFS_DIRCOOKIE_EOF;
node->tn_dir.tn_readdir_lastn = 0;
node->tn_dir.tn_readdir_lastp = NULL;
} else {
node->tn_dir.tn_readdir_lastn = uio->uio_offset = tmpfs_dircookie(de);
node->tn_dir.tn_readdir_lastp = de;
if (cnt == 0) {
if (de == NULL)
off = TMPFS_DIRCOOKIE_EOF;
else
off = tmpfs_dirent_cookie(de);
}
uio->uio_offset = off;
node->tn_dir.tn_readdir_lastn = off;
node->tn_dir.tn_readdir_lastp = de;
node->tn_status |= TMPFS_NODE_ACCESSED;
return error;
}
@ -943,7 +1226,7 @@ tmpfs_dir_whiteout_remove(struct vnode *dvp, struct componentname *cnp)
de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(dvp), NULL, cnp);
MPASS(de != NULL && de->td_node == NULL);
tmpfs_dir_detach(dvp, de);
tmpfs_free_dirent(VFS_TO_TMPFS(dvp->v_mount), de, TRUE);
tmpfs_free_dirent(VFS_TO_TMPFS(dvp->v_mount), de);
}
/* --------------------------------------------------------------------- */
@ -1436,3 +1719,15 @@ out:
return error;
}
static __inline int
tmpfs_dirtree_cmp(struct tmpfs_dirent *a, struct tmpfs_dirent *b)
{
if (a->td_hash > b->td_hash)
return (1);
else if (a->td_hash < b->td_hash)
return (-1);
return (0);
}
RB_GENERATE_STATIC(tmpfs_dir, tmpfs_dirent, uh.td_entries, tmpfs_dirtree_cmp);

View File

@ -294,19 +294,8 @@ tmpfs_unmount(struct mount *mp, int mntflags)
while (node != NULL) {
struct tmpfs_node *next;
if (node->tn_type == VDIR) {
struct tmpfs_dirent *de;
de = TAILQ_FIRST(&node->tn_dir.tn_dirhead);
while (de != NULL) {
struct tmpfs_dirent *nde;
nde = TAILQ_NEXT(de, td_entries);
tmpfs_free_dirent(tmp, de, FALSE);
de = nde;
node->tn_size -= sizeof(struct tmpfs_dirent);
}
}
if (node->tn_type == VDIR)
tmpfs_dir_destroy(tmp, node);
next = LIST_NEXT(node, tn_entries);
tmpfs_free_node(tmp, node);

View File

@ -847,7 +847,7 @@ tmpfs_remove(struct vop_remove_args *v)
/* Free the directory entry we just deleted. Note that the node
* referred by it will not be removed until the vnode is really
* reclaimed. */
tmpfs_free_dirent(tmp, de, TRUE);
tmpfs_free_dirent(tmp, de);
node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED;
error = 0;
@ -1263,26 +1263,25 @@ tmpfs_rename(struct vop_rename_args *v)
fdnode->tn_links--;
TMPFS_NODE_UNLOCK(fdnode);
}
}
/* Do the move: just remove the entry from the source directory
* and insert it into the target one. */
tmpfs_dir_detach(fdvp, de);
if (fcnp->cn_flags & DOWHITEOUT)
tmpfs_dir_whiteout_add(fdvp, fcnp);
if (tcnp->cn_flags & ISWHITEOUT)
tmpfs_dir_whiteout_remove(tdvp, tcnp);
tmpfs_dir_attach(tdvp, de);
}
/* If the name has changed, we need to make it effective by changing
* it in the directory entry. */
if (newname != NULL) {
MPASS(tcnp->cn_namelen <= MAXNAMLEN);
free(de->td_name, M_TMPFSNAME);
de->td_namelen = (uint16_t)tcnp->cn_namelen;
memcpy(newname, tcnp->cn_nameptr, tcnp->cn_namelen);
de->td_name = newname;
free(de->ud.td_name, M_TMPFSNAME);
de->ud.td_name = newname;
tmpfs_dirent_init(de, tcnp->cn_nameptr, tcnp->cn_namelen);
fnode->tn_status |= TMPFS_NODE_CHANGED;
tdnode->tn_status |= TMPFS_NODE_MODIFIED;
@ -1291,15 +1290,20 @@ tmpfs_rename(struct vop_rename_args *v)
/* If we are overwriting an entry, we have to remove the old one
* from the target directory. */
if (tvp != NULL) {
struct tmpfs_dirent *tde;
/* Remove the old entry from the target directory. */
de = tmpfs_dir_lookup(tdnode, tnode, tcnp);
tmpfs_dir_detach(tdvp, de);
tde = tmpfs_dir_lookup(tdnode, tnode, tcnp);
tmpfs_dir_detach(tdvp, tde);
/* Free the directory entry we just deleted. Note that the
* node referred by it will not be removed until the vnode is
* really reclaimed. */
tmpfs_free_dirent(VFS_TO_TMPFS(tvp->v_mount), de, TRUE);
tmpfs_free_dirent(VFS_TO_TMPFS(tvp->v_mount), tde);
}
tmpfs_dir_attach(tdvp, de);
cache_purge(fvp);
if (tvp != NULL)
cache_purge(tvp);
@ -1427,7 +1431,7 @@ tmpfs_rmdir(struct vop_rmdir_args *v)
/* Free the directory entry we just deleted. Note that the node
* referred by it will not be removed until the vnode is really
* reclaimed. */
tmpfs_free_dirent(tmp, de, TRUE);
tmpfs_free_dirent(tmp, de);
/* Release the deleted vnode (will destroy the node, notify
* interested parties and clean it from the cache). */
@ -1473,8 +1477,8 @@ tmpfs_readdir(struct vop_readdir_args *v)
int *ncookies = v->a_ncookies;
int error;
off_t startoff;
off_t cnt = 0;
ssize_t startresid;
int cnt = 0;
struct tmpfs_node *node;
/* This operation only makes sense on directory nodes. */
@ -1483,69 +1487,29 @@ tmpfs_readdir(struct vop_readdir_args *v)
node = VP_TO_TMPFS_DIR(vp);
startoff = uio->uio_offset;
startresid = uio->uio_resid;
if (uio->uio_offset == TMPFS_DIRCOOKIE_DOT) {
error = tmpfs_dir_getdotdent(node, uio);
if (error != 0)
goto outok;
cnt++;
if (cookies != NULL && ncookies != NULL) {
cnt = howmany(node->tn_size, sizeof(struct tmpfs_dirent)) + 2;
*cookies = malloc(cnt * sizeof(**cookies), M_TEMP, M_WAITOK);
*ncookies = 0;
}
if (uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT) {
error = tmpfs_dir_getdotdotdent(node, uio);
if (error != 0)
goto outok;
cnt++;
}
if (cnt == 0)
error = tmpfs_dir_getdents(node, uio, 0, NULL, NULL);
else
error = tmpfs_dir_getdents(node, uio, cnt, *cookies, ncookies);
error = tmpfs_dir_getdents(node, uio, &cnt);
if (error == EJUSTRETURN)
error = (uio->uio_resid != startresid) ? 0 : EINVAL;
outok:
MPASS(error >= -1);
if (error == -1)
error = (cnt != 0) ? 0 : EINVAL;
if (error != 0 && cnt != 0)
free(*cookies, M_TEMP);
if (eofflag != NULL)
*eofflag =
(error == 0 && uio->uio_offset == TMPFS_DIRCOOKIE_EOF);
/* Update NFS-related variables. */
if (error == 0 && cookies != NULL && ncookies != NULL) {
off_t i;
off_t off = startoff;
struct tmpfs_dirent *de = NULL;
*ncookies = cnt;
*cookies = malloc(cnt * sizeof(off_t), M_TEMP, M_WAITOK);
for (i = 0; i < cnt; i++) {
MPASS(off != TMPFS_DIRCOOKIE_EOF);
if (off == TMPFS_DIRCOOKIE_DOT) {
off = TMPFS_DIRCOOKIE_DOTDOT;
} else {
if (off == TMPFS_DIRCOOKIE_DOTDOT) {
de = TAILQ_FIRST(&node->tn_dir.tn_dirhead);
} else if (de != NULL) {
de = TAILQ_NEXT(de, td_entries);
} else {
de = tmpfs_dir_lookupbycookie(node,
off);
MPASS(de != NULL);
de = TAILQ_NEXT(de, td_entries);
}
if (de == NULL)
off = TMPFS_DIRCOOKIE_EOF;
else
off = tmpfs_dircookie(de);
}
(*cookies)[i] = off;
}
MPASS(uio->uio_offset == off);
}
return error;
}