vfs: add VOP_STAT
The current scheme of calling VOP_GETATTR adds avoidable overhead. An example with tmpfs doing fstat (ops/s): before: 7488958 after: 7913833 Reviewed by: kib (previous version) Differential Revision: https://reviews.freebsd.org/D25910
This commit is contained in:
parent
1e5d733503
commit
51ea7bea91
@ -2308,7 +2308,8 @@ MLINKS+=vm_page_insert.9 vm_page_remove.9
|
||||
MLINKS+=vm_page_wire.9 vm_page_unwire.9
|
||||
MLINKS+=VOP_ACCESS.9 VOP_ACCESSX.9
|
||||
MLINKS+=VOP_ATTRIB.9 VOP_GETATTR.9 \
|
||||
VOP_ATTRIB.9 VOP_SETATTR.9
|
||||
VOP_ATTRIB.9 VOP_SETATTR.9 \
|
||||
VOP_ATTRIB.9 VOP_STAT.9
|
||||
MLINKS+=VOP_CREATE.9 VOP_MKDIR.9 \
|
||||
VOP_CREATE.9 VOP_MKNOD.9 \
|
||||
VOP_CREATE.9 VOP_SYMLINK.9
|
||||
|
@ -28,7 +28,7 @@
|
||||
.\"
|
||||
.\" $FreeBSD$
|
||||
.\"
|
||||
.Dd August 29, 2008
|
||||
.Dd August 8, 2020
|
||||
.Dt VOP_ATTRIB 9
|
||||
.Os
|
||||
.Sh NAME
|
||||
@ -42,19 +42,49 @@
|
||||
.Fn VOP_GETATTR "struct vnode *vp" "struct vattr *vap" "struct ucred *cred"
|
||||
.Ft int
|
||||
.Fn VOP_SETATTR "struct vnode *vp" "struct vattr *vap" "struct ucred *cred"
|
||||
.Ft int
|
||||
.Fn VOP_STAT "struct vnode *vp" "struct stat *sb" "struct ucred *active_cred" \
|
||||
"struct ucred *file_cred" "struct thread *td"
|
||||
.Sh DESCRIPTION
|
||||
These entry points manipulate various attributes of a file or directory,
|
||||
including file permissions, owner, group, size,
|
||||
access time and modification time.
|
||||
.Pp
|
||||
The arguments are:
|
||||
.Fn VOP_STAT
|
||||
returns data in a format suitable for the
|
||||
.Xr stat 2
|
||||
system call and by default is implemented as a wrapper around
|
||||
.Fn VOP_GETATTR .
|
||||
Filesystems may want to implement their own variant for performance reasons.
|
||||
.Pp
|
||||
For
|
||||
.Fn VOP_GETATTR
|
||||
and
|
||||
.Fn VOP_SETATTR
|
||||
the arguments are:
|
||||
.Bl -tag -width cred
|
||||
.It Fa vp
|
||||
The vnode of the file.
|
||||
.It Fa vap
|
||||
The attributes of the file.
|
||||
.It Fa cred
|
||||
The user credentials of the calling process.
|
||||
The user credentials of the calling thread.
|
||||
.El
|
||||
.Pp
|
||||
For
|
||||
.Fn VOP_STAT
|
||||
the arguments are:
|
||||
.Bl -tag -width active_cred
|
||||
.It Fa vp
|
||||
The vnode of the file.
|
||||
.It Fa sb
|
||||
The attributes of the file.
|
||||
.It Fa active_cred
|
||||
The user credentials of the calling thread.
|
||||
.It Fa file_cred
|
||||
The credentials installed on the file description pointing to the vnode or NOCRED.
|
||||
.It Fa td
|
||||
The calling thread.
|
||||
.El
|
||||
.Pp
|
||||
Attributes which are not being modified by
|
||||
@ -67,8 +97,11 @@ the contents of
|
||||
.Fa *vap
|
||||
prior to setting specific values.
|
||||
.Sh LOCKS
|
||||
Both
|
||||
.Fn VOP_GETATTR
|
||||
expects the vnode to be locked on entry and will leave the vnode locked on
|
||||
and
|
||||
.Fn VOP_STAT
|
||||
expect the vnode to be locked on entry and will leave the vnode locked on
|
||||
return.
|
||||
The lock type can be either shared or exclusive.
|
||||
.Pp
|
||||
@ -84,6 +117,10 @@ otherwise an appropriate error is returned.
|
||||
.Fn VOP_SETATTR
|
||||
returns zero if the attributes were changed successfully, otherwise an
|
||||
appropriate error is returned.
|
||||
.Fn VOP_STAT
|
||||
returns 0 if it was able to retrieve the attribute data
|
||||
.Fa *sb ,
|
||||
otherwise an appropriate error is returned.
|
||||
.Sh ERRORS
|
||||
.Bl -tag -width Er
|
||||
.It Bq Er EPERM
|
||||
|
@ -1691,7 +1691,7 @@ linux_file_stat(struct file *fp, struct stat *sb, struct ucred *active_cred,
|
||||
vp = filp->f_vnode;
|
||||
|
||||
vn_lock(vp, LK_SHARED | LK_RETRY);
|
||||
error = vn_stat(vp, sb, td->td_ucred, NOCRED, td);
|
||||
error = VOP_STAT(vp, sb, td->td_ucred, NOCRED, td);
|
||||
VOP_UNLOCK(vp);
|
||||
|
||||
return (error);
|
||||
|
@ -57,6 +57,9 @@ __FBSDID("$FreeBSD$");
|
||||
#include <sys/vnode.h>
|
||||
#include <sys/dirent.h>
|
||||
#include <sys/poll.h>
|
||||
#include <sys/stat.h>
|
||||
#include <security/audit/audit.h>
|
||||
#include <sys/priv.h>
|
||||
|
||||
#include <security/mac/mac_framework.h>
|
||||
|
||||
@ -87,6 +90,7 @@ static int vop_stdadd_writecount(struct vop_add_writecount_args *ap);
|
||||
static int vop_stdcopy_file_range(struct vop_copy_file_range_args *ap);
|
||||
static int vop_stdfdatasync(struct vop_fdatasync_args *ap);
|
||||
static int vop_stdgetpages_async(struct vop_getpages_async_args *ap);
|
||||
static int vop_stdstat(struct vop_stat_args *ap);
|
||||
|
||||
/*
|
||||
* This vnode table stores what we want to do if the filesystem doesn't
|
||||
@ -114,6 +118,7 @@ struct vop_vector default_vnodeops = {
|
||||
.vop_bmap = vop_stdbmap,
|
||||
.vop_close = VOP_NULL,
|
||||
.vop_fsync = VOP_NULL,
|
||||
.vop_stat = vop_stdstat,
|
||||
.vop_fdatasync = vop_stdfdatasync,
|
||||
.vop_getpages = vop_stdgetpages,
|
||||
.vop_getpages_async = vop_stdgetpages_async,
|
||||
@ -1461,3 +1466,111 @@ vop_sigdefer(struct vop_vector *vop, struct vop_generic_args *a)
|
||||
sigallowstop(prev_stops);
|
||||
return (rc);
|
||||
}
|
||||
|
||||
static int
|
||||
vop_stdstat(struct vop_stat_args *a)
|
||||
{
|
||||
struct vattr vattr;
|
||||
struct vattr *vap;
|
||||
struct vnode *vp;
|
||||
struct stat *sb;
|
||||
int error;
|
||||
u_short mode;
|
||||
|
||||
vp = a->a_vp;
|
||||
sb = a->a_sb;
|
||||
|
||||
error = vop_stat_helper_pre(a);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
|
||||
vap = &vattr;
|
||||
|
||||
/*
|
||||
* Initialize defaults for new and unusual fields, so that file
|
||||
* systems which don't support these fields don't need to know
|
||||
* about them.
|
||||
*/
|
||||
vap->va_birthtime.tv_sec = -1;
|
||||
vap->va_birthtime.tv_nsec = 0;
|
||||
vap->va_fsid = VNOVAL;
|
||||
vap->va_rdev = NODEV;
|
||||
|
||||
error = VOP_GETATTR(vp, vap, a->a_active_cred);
|
||||
if (error)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Zero the spare stat fields
|
||||
*/
|
||||
bzero(sb, sizeof *sb);
|
||||
|
||||
/*
|
||||
* Copy from vattr table
|
||||
*/
|
||||
if (vap->va_fsid != VNOVAL)
|
||||
sb->st_dev = vap->va_fsid;
|
||||
else
|
||||
sb->st_dev = vp->v_mount->mnt_stat.f_fsid.val[0];
|
||||
sb->st_ino = vap->va_fileid;
|
||||
mode = vap->va_mode;
|
||||
switch (vap->va_type) {
|
||||
case VREG:
|
||||
mode |= S_IFREG;
|
||||
break;
|
||||
case VDIR:
|
||||
mode |= S_IFDIR;
|
||||
break;
|
||||
case VBLK:
|
||||
mode |= S_IFBLK;
|
||||
break;
|
||||
case VCHR:
|
||||
mode |= S_IFCHR;
|
||||
break;
|
||||
case VLNK:
|
||||
mode |= S_IFLNK;
|
||||
break;
|
||||
case VSOCK:
|
||||
mode |= S_IFSOCK;
|
||||
break;
|
||||
case VFIFO:
|
||||
mode |= S_IFIFO;
|
||||
break;
|
||||
default:
|
||||
error = EBADF;
|
||||
goto out;
|
||||
}
|
||||
sb->st_mode = mode;
|
||||
sb->st_nlink = vap->va_nlink;
|
||||
sb->st_uid = vap->va_uid;
|
||||
sb->st_gid = vap->va_gid;
|
||||
sb->st_rdev = vap->va_rdev;
|
||||
if (vap->va_size > OFF_MAX) {
|
||||
error = EOVERFLOW;
|
||||
goto out;
|
||||
}
|
||||
sb->st_size = vap->va_size;
|
||||
sb->st_atim.tv_sec = vap->va_atime.tv_sec;
|
||||
sb->st_atim.tv_nsec = vap->va_atime.tv_nsec;
|
||||
sb->st_mtim.tv_sec = vap->va_mtime.tv_sec;
|
||||
sb->st_mtim.tv_nsec = vap->va_mtime.tv_nsec;
|
||||
sb->st_ctim.tv_sec = vap->va_ctime.tv_sec;
|
||||
sb->st_ctim.tv_nsec = vap->va_ctime.tv_nsec;
|
||||
sb->st_birthtim.tv_sec = vap->va_birthtime.tv_sec;
|
||||
sb->st_birthtim.tv_nsec = vap->va_birthtime.tv_nsec;
|
||||
|
||||
/*
|
||||
* According to www.opengroup.org, the meaning of st_blksize is
|
||||
* "a filesystem-specific preferred I/O block size for this
|
||||
* object. In some filesystem types, this may vary from file
|
||||
* to file"
|
||||
* Use minimum/default of PAGE_SIZE (e.g. for VCHR).
|
||||
*/
|
||||
|
||||
sb->st_blksize = max(PAGE_SIZE, vap->va_blocksize);
|
||||
sb->st_flags = vap->va_flags;
|
||||
sb->st_blocks = vap->va_bytes / S_BLKSIZE;
|
||||
sb->st_gen = vap->va_gen;
|
||||
out:
|
||||
return (vop_stat_helper_post(a, error));
|
||||
}
|
||||
|
@ -1867,7 +1867,7 @@ kern_funlinkat(struct thread *td, int dfd, const char *path, int fd,
|
||||
if (vp->v_type == VDIR && oldinum == 0) {
|
||||
error = EPERM; /* POSIX */
|
||||
} else if (oldinum != 0 &&
|
||||
((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) &&
|
||||
((error = VOP_STAT(vp, &sb, td->td_ucred, NOCRED, td)) == 0) &&
|
||||
sb.st_ino != oldinum) {
|
||||
error = EIDRM; /* Identifier removed */
|
||||
} else if (fp != NULL && fp->f_vnode != vp) {
|
||||
@ -2381,7 +2381,7 @@ kern_statat(struct thread *td, int flag, int fd, const char *path,
|
||||
|
||||
if ((error = namei(&nd)) != 0)
|
||||
return (error);
|
||||
error = vn_stat(nd.ni_vp, sbp, td->td_ucred, NOCRED, td);
|
||||
error = VOP_STAT(nd.ni_vp, sbp, td->td_ucred, NOCRED, td);
|
||||
if (error == 0) {
|
||||
SDT_PROBE2(vfs, , stat, mode, path, sbp->st_mode);
|
||||
if (S_ISREG(sbp->st_mode))
|
||||
@ -4566,7 +4566,7 @@ kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb)
|
||||
vfs_unbusy(mp);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
error = vn_stat(vp, sb, td->td_ucred, NOCRED, td);
|
||||
error = VOP_STAT(vp, sb, td->td_ucred, NOCRED, td);
|
||||
vput(vp);
|
||||
return (error);
|
||||
}
|
||||
|
@ -1455,123 +1455,12 @@ vn_statfile(struct file *fp, struct stat *sb, struct ucred *active_cred,
|
||||
int error;
|
||||
|
||||
vn_lock(vp, LK_SHARED | LK_RETRY);
|
||||
error = vn_stat(vp, sb, active_cred, fp->f_cred, td);
|
||||
error = VOP_STAT(vp, sb, active_cred, fp->f_cred, td);
|
||||
VOP_UNLOCK(vp);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* Stat a vnode; implementation for the stat syscall
|
||||
*/
|
||||
int
|
||||
vn_stat(struct vnode *vp, struct stat *sb, struct ucred *active_cred,
|
||||
struct ucred *file_cred, struct thread *td)
|
||||
{
|
||||
struct vattr vattr;
|
||||
struct vattr *vap;
|
||||
int error;
|
||||
u_short mode;
|
||||
|
||||
AUDIT_ARG_VNODE1(vp);
|
||||
#ifdef MAC
|
||||
error = mac_vnode_check_stat(active_cred, file_cred, vp);
|
||||
if (error)
|
||||
return (error);
|
||||
#endif
|
||||
|
||||
vap = &vattr;
|
||||
|
||||
/*
|
||||
* Initialize defaults for new and unusual fields, so that file
|
||||
* systems which don't support these fields don't need to know
|
||||
* about them.
|
||||
*/
|
||||
vap->va_birthtime.tv_sec = -1;
|
||||
vap->va_birthtime.tv_nsec = 0;
|
||||
vap->va_fsid = VNOVAL;
|
||||
vap->va_rdev = NODEV;
|
||||
|
||||
error = VOP_GETATTR(vp, vap, active_cred);
|
||||
if (error)
|
||||
return (error);
|
||||
|
||||
/*
|
||||
* Zero the spare stat fields
|
||||
*/
|
||||
bzero(sb, sizeof *sb);
|
||||
|
||||
/*
|
||||
* Copy from vattr table
|
||||
*/
|
||||
if (vap->va_fsid != VNOVAL)
|
||||
sb->st_dev = vap->va_fsid;
|
||||
else
|
||||
sb->st_dev = vp->v_mount->mnt_stat.f_fsid.val[0];
|
||||
sb->st_ino = vap->va_fileid;
|
||||
mode = vap->va_mode;
|
||||
switch (vap->va_type) {
|
||||
case VREG:
|
||||
mode |= S_IFREG;
|
||||
break;
|
||||
case VDIR:
|
||||
mode |= S_IFDIR;
|
||||
break;
|
||||
case VBLK:
|
||||
mode |= S_IFBLK;
|
||||
break;
|
||||
case VCHR:
|
||||
mode |= S_IFCHR;
|
||||
break;
|
||||
case VLNK:
|
||||
mode |= S_IFLNK;
|
||||
break;
|
||||
case VSOCK:
|
||||
mode |= S_IFSOCK;
|
||||
break;
|
||||
case VFIFO:
|
||||
mode |= S_IFIFO;
|
||||
break;
|
||||
default:
|
||||
return (EBADF);
|
||||
}
|
||||
sb->st_mode = mode;
|
||||
sb->st_nlink = vap->va_nlink;
|
||||
sb->st_uid = vap->va_uid;
|
||||
sb->st_gid = vap->va_gid;
|
||||
sb->st_rdev = vap->va_rdev;
|
||||
if (vap->va_size > OFF_MAX)
|
||||
return (EOVERFLOW);
|
||||
sb->st_size = vap->va_size;
|
||||
sb->st_atim.tv_sec = vap->va_atime.tv_sec;
|
||||
sb->st_atim.tv_nsec = vap->va_atime.tv_nsec;
|
||||
sb->st_mtim.tv_sec = vap->va_mtime.tv_sec;
|
||||
sb->st_mtim.tv_nsec = vap->va_mtime.tv_nsec;
|
||||
sb->st_ctim.tv_sec = vap->va_ctime.tv_sec;
|
||||
sb->st_ctim.tv_nsec = vap->va_ctime.tv_nsec;
|
||||
sb->st_birthtim.tv_sec = vap->va_birthtime.tv_sec;
|
||||
sb->st_birthtim.tv_nsec = vap->va_birthtime.tv_nsec;
|
||||
|
||||
/*
|
||||
* According to www.opengroup.org, the meaning of st_blksize is
|
||||
* "a filesystem-specific preferred I/O block size for this
|
||||
* object. In some filesystem types, this may vary from file
|
||||
* to file"
|
||||
* Use minimum/default of PAGE_SIZE (e.g. for VCHR).
|
||||
*/
|
||||
|
||||
sb->st_blksize = max(PAGE_SIZE, vap->va_blocksize);
|
||||
|
||||
sb->st_flags = vap->va_flags;
|
||||
if (priv_check_cred_vfs_generation(td->td_ucred))
|
||||
sb->st_gen = 0;
|
||||
else
|
||||
sb->st_gen = vap->va_gen;
|
||||
|
||||
sb->st_blocks = vap->va_bytes / S_BLKSIZE;
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* File table vnode ioctl routine.
|
||||
*/
|
||||
|
@ -177,6 +177,17 @@ vop_accessx {
|
||||
};
|
||||
|
||||
|
||||
%% stat vp L L L
|
||||
|
||||
vop_stat {
|
||||
IN struct vnode *vp;
|
||||
OUT struct stat *sb;
|
||||
IN struct ucred *active_cred;
|
||||
IN struct ucred *file_cred;
|
||||
IN struct thread *td;
|
||||
};
|
||||
|
||||
|
||||
%% getattr vp L L L
|
||||
|
||||
vop_getattr {
|
||||
|
@ -854,7 +854,7 @@ audit_arg_upath2_canon(char *upath)
|
||||
* It is assumed that the caller will hold any vnode locks necessary to
|
||||
* perform a VOP_GETATTR() on the passed vnode.
|
||||
*
|
||||
* XXX: The attr code is very similar to vfs_vnops.c:vn_stat(), but always
|
||||
* XXX: The attr code is very similar to vfs_default.c:vop_stdstat(), but always
|
||||
* provides access to the generation number as we need that to construct the
|
||||
* BSM file ID.
|
||||
*
|
||||
|
@ -737,8 +737,6 @@ int vn_rdwr_inchunks(enum uio_rw rw, struct vnode *vp, void *base,
|
||||
struct thread *td);
|
||||
int vn_rlimit_fsize(const struct vnode *vn, const struct uio *uio,
|
||||
struct thread *td);
|
||||
int vn_stat(struct vnode *vp, struct stat *sb, struct ucred *active_cred,
|
||||
struct ucred *file_cred, struct thread *td);
|
||||
int vn_start_write(struct vnode *vp, struct mount **mpp, int flags);
|
||||
int vn_start_secondary_write(struct vnode *vp, struct mount **mpp,
|
||||
int flags);
|
||||
@ -893,6 +891,22 @@ void vop_need_inactive_debugpost(void *a, int rc);
|
||||
|
||||
void vop_rename_fail(struct vop_rename_args *ap);
|
||||
|
||||
#define vop_stat_helper_pre(ap) ({ \
|
||||
int _error; \
|
||||
AUDIT_ARG_VNODE1(ap->a_vp); \
|
||||
_error = mac_vnode_check_stat(ap->a_active_cred, ap->a_file_cred, ap->a_vp);\
|
||||
if (__predict_true(_error == 0)) \
|
||||
bzero(ap->a_sb, sizeof(*ap->a_sb)); \
|
||||
_error; \
|
||||
})
|
||||
|
||||
#define vop_stat_helper_post(ap, error) ({ \
|
||||
int _error = (error); \
|
||||
if (priv_check_cred_vfs_generation(ap->a_td->td_ucred)) \
|
||||
ap->a_sb->st_gen = 0; \
|
||||
_error; \
|
||||
})
|
||||
|
||||
#define VOP_WRITE_PRE(ap) \
|
||||
struct vattr va; \
|
||||
int error; \
|
||||
|
Loading…
Reference in New Issue
Block a user