Add d_off support for multiple filesystems.
The d_off field has been added to the dirent structure recently. Currently filesystems don't support this feature. Support has been added and tested for zfs, ufs, ext2fs, fdescfs, msdosfs and unionfs. A stub implementation is available for cd9660, nandfs, udf and pseudofs but hasn't been tested. Motivation for this feature: our usecase is for a userspace nfs server (nfs-ganesha) with zfs. At the moment we cache direntry offsets by calling lseek once per entry, with this patch we can get the offset directly from getdirentries(2) calls which provides a significant speedup. Submitted by: Jack Halford <jack@gandi.net> Reviewed by: mckusick, pfg, rmacklem (previous versions) Sponsored by: Gandi.net MFC after: 1 week Differential revision: https://reviews.freebsd.org/D17917
This commit is contained in:
parent
d5aef6d6ca
commit
1c4ca77890
@ -28,7 +28,7 @@
|
||||
.\" @(#)getdirentries.2 8.2 (Berkeley) 5/3/95
|
||||
.\" $FreeBSD$
|
||||
.\"
|
||||
.Dd May 28, 2017
|
||||
.Dd Nov 14, 2018
|
||||
.Dt GETDIRENTRIES 2
|
||||
.Os
|
||||
.Sh NAME
|
||||
@ -88,6 +88,11 @@ Files that are linked by hard links (see
|
||||
have the same
|
||||
.Fa d_fileno .
|
||||
The
|
||||
.Fa d_off
|
||||
field returns a cookie which can be used with
|
||||
.Xr lseek 2
|
||||
to position the directory descriptor to the next entry.
|
||||
The
|
||||
.Fa d_reclen
|
||||
entry is the length, in bytes, of the directory record.
|
||||
The
|
||||
@ -140,8 +145,17 @@ a value returned in the location pointed to by
|
||||
.Fa basep
|
||||
.Po Fn getdirentries
|
||||
only
|
||||
.Pc
|
||||
.Pc ,
|
||||
a value returned in the
|
||||
.Fa d_off
|
||||
field,
|
||||
or zero.
|
||||
.Sh IMPLEMENTATION NOTES
|
||||
The
|
||||
.Fa d_off
|
||||
field is being used as a cookie to readdir for nfs servers.
|
||||
These cookies can be cached and allow to read directory entries at a specific
|
||||
offset on demand.
|
||||
.Sh RETURN VALUES
|
||||
If successful, the number of bytes actually transferred is returned.
|
||||
Otherwise, -1 is returned and the global variable
|
||||
|
@ -28,7 +28,7 @@
|
||||
.\" @(#)dir.5 8.3 (Berkeley) 4/19/94
|
||||
.\" $FreeBSD$
|
||||
.\"
|
||||
.Dd June 20, 2018
|
||||
.Dd November 14, 2018
|
||||
.Dt DIR 5
|
||||
.Os
|
||||
.Sh NAME
|
||||
@ -101,7 +101,7 @@ The directory entry format is defined in the file
|
||||
|
||||
struct dirent {
|
||||
ino_t d_fileno; /* file number of entry */
|
||||
off_t d_off; /* directory offset of entry */
|
||||
off_t d_off; /* directory offset of the next entry */
|
||||
__uint16_t d_reclen; /* length of this record */
|
||||
__uint8_t d_type; /* file type, see below */
|
||||
__uint8_t d_namlen; /* length of string in d_name */
|
||||
|
@ -1097,6 +1097,8 @@ zfsctl_snapdir_readdir(ap)
|
||||
strcpy(entry.d_name, snapname);
|
||||
entry.d_namlen = strlen(entry.d_name);
|
||||
entry.d_reclen = sizeof(entry);
|
||||
/* NOTE: d_off is the offset for the *next* entry. */
|
||||
entry.d_off = cookie + dots_offset;
|
||||
error = vfs_read_dirent(ap, &entry, uio->uio_offset);
|
||||
if (error != 0) {
|
||||
if (error == ENAMETOOLONG)
|
||||
|
@ -2529,8 +2529,8 @@ zfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, int *ncookies, u_lon
|
||||
*/
|
||||
eodp->ed_ino = objnum;
|
||||
eodp->ed_reclen = reclen;
|
||||
/* NOTE: ed_off is the offset for the *next* entry */
|
||||
next = &(eodp->ed_off);
|
||||
/* NOTE: ed_off is the offset for the *next* entry. */
|
||||
next = &eodp->ed_off;
|
||||
eodp->ed_eflags = zap.za_normalization_conflict ?
|
||||
ED_CASE_CONFLICT : 0;
|
||||
(void) strncpy(eodp->ed_name, zap.za_name,
|
||||
@ -2543,6 +2543,8 @@ zfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, int *ncookies, u_lon
|
||||
odp->d_ino = objnum;
|
||||
odp->d_reclen = reclen;
|
||||
odp->d_namlen = strlen(zap.za_name);
|
||||
/* NOTE: d_off is the offset for the *next* entry. */
|
||||
next = &odp->d_off;
|
||||
(void) strlcpy(odp->d_name, zap.za_name, odp->d_namlen + 1);
|
||||
odp->d_type = type;
|
||||
odp = (dirent64_t *)((intptr_t)odp + reclen);
|
||||
@ -2567,6 +2569,9 @@ zfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, int *ncookies, u_lon
|
||||
offset += 1;
|
||||
}
|
||||
|
||||
/* Fill the offset right after advancing the cursor. */
|
||||
if (next != NULL)
|
||||
*next = offset;
|
||||
if (cooks != NULL) {
|
||||
*cooks++ = offset;
|
||||
ncooks--;
|
||||
|
@ -576,6 +576,8 @@ cd9660_readdir(ap)
|
||||
entryoffsetinblock;
|
||||
|
||||
idp->curroff += reclen;
|
||||
/* NOTE: d_off is the offset of *next* entry. */
|
||||
idp->current.d_off = idp->curroff;
|
||||
|
||||
switch (imp->iso_ftype) {
|
||||
case ISO_FTYPE_RRIP:
|
||||
|
@ -1381,6 +1381,8 @@ devfs_readdir(struct vop_readdir_args *ap)
|
||||
if (dp->d_reclen > uio->uio_resid)
|
||||
break;
|
||||
dp->d_fileno = de->de_inode;
|
||||
/* NOTE: d_off is the offset for the *next* entry. */
|
||||
dp->d_off = off + dp->d_reclen;
|
||||
if (off >= uio->uio_offset) {
|
||||
error = vfs_read_dirent(ap, dp, off);
|
||||
if (error)
|
||||
|
@ -224,6 +224,8 @@ ext2_readdir(struct vop_readdir_args *ap)
|
||||
dstdp.d_reclen = GENERIC_DIRSIZ(&dstdp);
|
||||
bcopy(dp->e2d_name, dstdp.d_name, dstdp.d_namlen);
|
||||
dstdp.d_name[dstdp.d_namlen] = '\0';
|
||||
/* NOTE: d_off is the offset of the *next* entry. */
|
||||
dstdp.d_off = offset + dp->e2d_reclen;
|
||||
if (dstdp.d_reclen > uio->uio_resid) {
|
||||
if (uio->uio_resid == startresid)
|
||||
error = EINVAL;
|
||||
|
@ -574,6 +574,8 @@ fdesc_readdir(struct vop_readdir_args *ap)
|
||||
dp->d_fileno = i + FD_DESC;
|
||||
break;
|
||||
}
|
||||
/* NOTE: d_off is the offset of the *next* entry. */
|
||||
dp->d_off = UIO_MX * (i + 1);
|
||||
if (dp->d_namlen != 0) {
|
||||
/*
|
||||
* And ship to userland
|
||||
|
@ -1558,6 +1558,8 @@ msdosfs_readdir(struct vop_readdir_args *ap)
|
||||
break;
|
||||
}
|
||||
dirbuf.d_reclen = GENERIC_DIRSIZ(&dirbuf);
|
||||
/* NOTE: d_off is the offset of the *next* entry. */
|
||||
dirbuf.d_off = offset + sizeof(struct direntry);
|
||||
if (uio->uio_resid < dirbuf.d_reclen)
|
||||
goto out;
|
||||
error = uiomove(&dirbuf, dirbuf.d_reclen, uio);
|
||||
@ -1681,6 +1683,8 @@ msdosfs_readdir(struct vop_readdir_args *ap)
|
||||
mbnambuf_flush(&nb, &dirbuf);
|
||||
chksum = -1;
|
||||
dirbuf.d_reclen = GENERIC_DIRSIZ(&dirbuf);
|
||||
/* NOTE: d_off is the offset of the *next* entry. */
|
||||
dirbuf.d_off = offset + sizeof(struct direntry);
|
||||
if (uio->uio_resid < dirbuf.d_reclen) {
|
||||
brelse(bp);
|
||||
goto out;
|
||||
|
@ -1233,6 +1233,8 @@ nandfs_readdir(struct vop_readdir_args *ap)
|
||||
dirent.d_namlen = name_len;
|
||||
strncpy(dirent.d_name, ndirent->name, name_len);
|
||||
dirent.d_reclen = GENERIC_DIRSIZ(&dirent);
|
||||
/* NOTE: d_off is the offset of the *next* entry. */
|
||||
dirent.d_off = diroffset + ndirent->rec_len;
|
||||
DPRINTF(READDIR, ("copying `%*.*s`\n", name_len,
|
||||
name_len, dirent.d_name));
|
||||
}
|
||||
|
@ -830,6 +830,8 @@ pfs_readdir(struct vop_readdir_args *va)
|
||||
pfsent->entry.d_name[i] = pn->pn_name[i];
|
||||
pfsent->entry.d_name[i] = 0;
|
||||
pfsent->entry.d_namlen = i;
|
||||
/* NOTE: d_off is the offset of the *next* entry. */
|
||||
pfsent->entry.d_off = offset + PFS_DELEN;
|
||||
switch (pn->pn_type) {
|
||||
case pfstype_procdir:
|
||||
KASSERT(p != NULL,
|
||||
|
@ -846,6 +846,7 @@ udf_readdir(struct vop_readdir_args *a)
|
||||
dir.d_name[1] = '\0';
|
||||
dir.d_namlen = 1;
|
||||
dir.d_reclen = GENERIC_DIRSIZ(&dir);
|
||||
dir.d_off = 1;
|
||||
uiodir.dirent = &dir;
|
||||
error = udf_uiodir(&uiodir, dir.d_reclen, uio, 1);
|
||||
if (error)
|
||||
@ -858,6 +859,7 @@ udf_readdir(struct vop_readdir_args *a)
|
||||
dir.d_name[2] = '\0';
|
||||
dir.d_namlen = 2;
|
||||
dir.d_reclen = GENERIC_DIRSIZ(&dir);
|
||||
dir.d_off = 2;
|
||||
uiodir.dirent = &dir;
|
||||
error = udf_uiodir(&uiodir, dir.d_reclen, uio, 2);
|
||||
} else {
|
||||
@ -867,6 +869,7 @@ udf_readdir(struct vop_readdir_args *a)
|
||||
dir.d_type = (fid->file_char & UDF_FILE_CHAR_DIR) ?
|
||||
DT_DIR : DT_UNKNOWN;
|
||||
dir.d_reclen = GENERIC_DIRSIZ(&dir);
|
||||
dir.d_off = ds->this_off;
|
||||
uiodir.dirent = &dir;
|
||||
error = udf_uiodir(&uiodir, dir.d_reclen, uio,
|
||||
ds->this_off);
|
||||
|
@ -2218,6 +2218,8 @@ ufs_readdir(ap)
|
||||
dstdp.d_reclen = GENERIC_DIRSIZ(&dstdp);
|
||||
bcopy(dp->d_name, dstdp.d_name, dstdp.d_namlen);
|
||||
dstdp.d_name[dstdp.d_namlen] = '\0';
|
||||
/* NOTE: d_off is the offset of the *next* entry. */
|
||||
dstdp.d_off = offset + dp->d_reclen;
|
||||
if (dstdp.d_reclen > uio->uio_resid) {
|
||||
if (uio->uio_resid == startresid)
|
||||
error = EINVAL;
|
||||
|
Loading…
Reference in New Issue
Block a user