to be allocated at boot time. This is an expensive option, as they consume physical ram and are not pageable etc. In certain situations, this kind of option is quite useful, especially for news servers that access a large number of directories at random and torture the name cache. Defining 5000 or 10000 extra vnodes should cut down the amount of vnode recycling somewhat, which should allow better name and directory caching etc. This is a "your mileage may vary" option, with no real indication of what works best for your machine except trial and error. Too many will cost you ram that you could otherwise use for disk buffers etc. This is based on something John Dyson mentioned to me a while ago.
436 lines
15 KiB
C
436 lines
15 KiB
C
/*
|
|
* Copyright (c) 1989, 1993
|
|
* The Regents of the University of California. All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* 3. All advertising materials mentioning features or use of this software
|
|
* must display the following acknowledgement:
|
|
* This product includes software developed by the University of
|
|
* California, Berkeley and its contributors.
|
|
* 4. Neither the name of the University nor the names of its contributors
|
|
* may be used to endorse or promote products derived from this software
|
|
* without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*
|
|
* @(#)vnode.h 8.7 (Berkeley) 2/4/94
|
|
* $Id: vnode.h,v 1.32 1996/03/29 06:39:39 davidg Exp $
|
|
*/
|
|
|
|
#ifndef _SYS_VNODE_H_
|
|
#define _SYS_VNODE_H_
|
|
|
|
#include <sys/queue.h>
|
|
|
|
/*
|
|
* The vnode is the focus of all file activity in UNIX. There is a
|
|
* unique vnode allocated for each active file, each current directory,
|
|
* each mounted-on file, text file, and the root.
|
|
*/
|
|
|
|
/*
|
|
* Vnode types. VNON means no type.
|
|
*/
|
|
enum vtype { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO, VBAD };
|
|
|
|
/*
|
|
* Vnode tag types.
|
|
* These are for the benefit of external programs only (e.g., pstat)
|
|
* and should NEVER be inspected by the kernel.
|
|
*/
|
|
enum vtagtype {
|
|
VT_NON, VT_UFS, VT_NFS, VT_MFS, VT_PC, VT_LFS, VT_LOFS, VT_FDESC,
|
|
VT_PORTAL, VT_NULL, VT_UMAP, VT_KERNFS, VT_PROCFS, VT_AFS, VT_ISOFS,
|
|
VT_UNION, VT_MSDOSFS, VT_DEVFS
|
|
};
|
|
|
|
/*
|
|
* Each underlying filesystem allocates its own private area and hangs
|
|
* it from v_data. If non-null, this area is freed in getnewvnode().
|
|
*/
|
|
LIST_HEAD(buflists, buf);
|
|
|
|
typedef int vop_t __P((void *));
|
|
|
|
struct vnode {
|
|
u_long v_flag; /* vnode flags (see below) */
|
|
int v_usecount; /* reference count of users */
|
|
int v_writecount; /* reference count of writers */
|
|
int v_holdcnt; /* page & buffer references */
|
|
daddr_t v_lastr; /* last read (read-ahead) */
|
|
u_long v_id; /* capability identifier */
|
|
struct mount *v_mount; /* ptr to vfs we are in */
|
|
vop_t **v_op; /* vnode operations vector */
|
|
TAILQ_ENTRY(vnode) v_freelist; /* vnode freelist */
|
|
LIST_ENTRY(vnode) v_mntvnodes; /* vnodes for mount point */
|
|
struct buflists v_cleanblkhd; /* clean blocklist head */
|
|
struct buflists v_dirtyblkhd; /* dirty blocklist head */
|
|
long v_numoutput; /* num of writes in progress */
|
|
enum vtype v_type; /* vnode type */
|
|
union {
|
|
struct mount *vu_mountedhere;/* ptr to mounted vfs (VDIR) */
|
|
struct socket *vu_socket; /* unix ipc (VSOCK) */
|
|
struct specinfo *vu_specinfo; /* device (VCHR, VBLK) */
|
|
struct fifoinfo *vu_fifoinfo; /* fifo (VFIFO) */
|
|
} v_un;
|
|
struct nqlease *v_lease; /* Soft reference to lease */
|
|
daddr_t v_lastw; /* last write (write cluster) */
|
|
daddr_t v_cstart; /* start block of cluster */
|
|
daddr_t v_lasta; /* last allocation */
|
|
int v_clen; /* length of current cluster */
|
|
int v_ralen; /* Read-ahead length */
|
|
int v_usage; /* Vnode usage counter */
|
|
daddr_t v_maxra; /* last readahead block */
|
|
void *v_object; /* Place to store VM object */
|
|
enum vtagtype v_tag; /* type of underlying data */
|
|
void *v_data; /* private data for fs */
|
|
};
|
|
#define v_mountedhere v_un.vu_mountedhere
|
|
#define v_socket v_un.vu_socket
|
|
#define v_specinfo v_un.vu_specinfo
|
|
#define v_fifoinfo v_un.vu_fifoinfo
|
|
|
|
/*
|
|
* Vnode flags.
|
|
*/
|
|
#define VROOT 0x0001 /* root of its file system */
|
|
#define VTEXT 0x0002 /* vnode is a pure text prototype */
|
|
#define VSYSTEM 0x0004 /* vnode being used by kernel */
|
|
#define VOLOCK 0x0008 /* vnode is locked waiting for an object */
|
|
#define VOWANT 0x0010 /* a process is waiting for VOLOCK */
|
|
#define VXLOCK 0x0100 /* vnode is locked to change underlying type */
|
|
#define VXWANT 0x0200 /* process is waiting for vnode */
|
|
#define VBWAIT 0x0400 /* waiting for output to complete */
|
|
#define VALIASED 0x0800 /* vnode has an alias */
|
|
#define VDIROP 0x1000 /* LFS: vnode is involved in a directory op */
|
|
#define VVMIO 0x2000 /* VMIO flag */
|
|
#define VNINACT 0x4000 /* LFS: skip ufs_inactive() in lfs_vunref */
|
|
#define VAGE 0x8000 /* Insert vnode at head of free list */
|
|
|
|
/*
|
|
* Vnode attributes. A field value of VNOVAL represents a field whose value
|
|
* is unavailable (getattr) or which is not to be changed (setattr).
|
|
*/
|
|
struct vattr {
|
|
enum vtype va_type; /* vnode type (for create) */
|
|
u_short va_mode; /* files access mode and type */
|
|
short va_nlink; /* number of references to file */
|
|
uid_t va_uid; /* owner user id */
|
|
gid_t va_gid; /* owner group id */
|
|
long va_fsid; /* file system id (dev for now) */
|
|
long va_fileid; /* file id */
|
|
u_quad_t va_size; /* file size in bytes */
|
|
long va_blocksize; /* blocksize preferred for i/o */
|
|
struct timespec va_atime; /* time of last access */
|
|
struct timespec va_mtime; /* time of last modification */
|
|
struct timespec va_ctime; /* time file changed */
|
|
u_long va_gen; /* generation number of file */
|
|
u_long va_flags; /* flags defined for file */
|
|
dev_t va_rdev; /* device the special file represents */
|
|
u_quad_t va_bytes; /* bytes of disk space held by file */
|
|
u_quad_t va_filerev; /* file modification number */
|
|
u_int va_vaflags; /* operations flags, see below */
|
|
long va_spare; /* remain quad aligned */
|
|
};
|
|
|
|
/*
|
|
* Flags for va_cflags.
|
|
*/
|
|
#define VA_UTIMES_NULL 0x01 /* utimes argument was NULL */
|
|
|
|
/*
|
|
* Flags for ioflag.
|
|
*/
|
|
#define IO_UNIT 0x01 /* do I/O as atomic unit */
|
|
#define IO_APPEND 0x02 /* append write to end */
|
|
#define IO_SYNC 0x04 /* do I/O synchronously */
|
|
#define IO_NODELOCKED 0x08 /* underlying node already locked */
|
|
#define IO_NDELAY 0x10 /* FNDELAY flag set in file table */
|
|
#define IO_VMIO 0x20 /* data already in VMIO space */
|
|
|
|
/*
|
|
* Modes. Some values same as Ixxx entries from inode.h for now.
|
|
*/
|
|
#define VSUID 04000 /* set user id on execution */
|
|
#define VSGID 02000 /* set group id on execution */
|
|
#define VSVTX 01000 /* save swapped text even after use */
|
|
#define VREAD 00400 /* read, write, execute permissions */
|
|
#define VWRITE 00200
|
|
#define VEXEC 00100
|
|
|
|
/*
|
|
* Token indicating no attribute value yet assigned.
|
|
*/
|
|
#define VNOVAL (-1)
|
|
|
|
#ifdef KERNEL
|
|
/*
|
|
* Convert between vnode types and inode formats (since POSIX.1
|
|
* defines mode word of stat structure in terms of inode formats).
|
|
*/
|
|
extern enum vtype iftovt_tab[];
|
|
extern int vttoif_tab[];
|
|
#define IFTOVT(mode) (iftovt_tab[((mode) & S_IFMT) >> 12])
|
|
#define VTTOIF(indx) (vttoif_tab[(int)(indx)])
|
|
#define MAKEIMODE(indx, mode) (int)(VTTOIF(indx) | (mode))
|
|
|
|
/*
|
|
* Flags to various vnode functions.
|
|
*/
|
|
#define SKIPSYSTEM 0x0001 /* vflush: skip vnodes marked VSYSTEM */
|
|
#define FORCECLOSE 0x0002 /* vflush: force file closure */
|
|
#define WRITECLOSE 0x0004 /* vflush: only close writable files */
|
|
#define DOCLOSE 0x0008 /* vclean: close active files */
|
|
#define V_SAVE 0x0001 /* vinvalbuf: sync file first */
|
|
#define V_SAVEMETA 0x0002 /* vinvalbuf: leave indirect blocks */
|
|
|
|
#ifdef DIAGNOSTIC
|
|
#define HOLDRELE(vp) holdrele(vp)
|
|
#define VATTR_NULL(vap) vattr_null(vap)
|
|
#define VHOLD(vp) vhold(vp)
|
|
#define VREF(vp) vref(vp)
|
|
|
|
void holdrele __P((struct vnode *));
|
|
void vhold __P((struct vnode *));
|
|
#else
|
|
#define HOLDRELE(vp) (vp)->v_holdcnt-- /* decrease buf or page ref */
|
|
#define VATTR_NULL(vap) (*(vap) = va_null) /* initialize a vattr */
|
|
#define VHOLD(vp) (vp)->v_holdcnt++ /* increase buf or page ref */
|
|
#define VREF(vp) (vp)->v_usecount++ /* increase reference */
|
|
#endif
|
|
|
|
#define NULLVP ((struct vnode *)NULL)
|
|
|
|
#ifdef VFS_LKM
|
|
#define VNODEOP_SET(f) DATA_SET(MODVNOPS,f)
|
|
#else
|
|
#define VNODEOP_SET(f) DATA_SET(vfs_opv_descs_,f)
|
|
#endif
|
|
|
|
/*
|
|
* Global vnode data.
|
|
*/
|
|
extern struct vnode *rootvnode; /* root (i.e. "/") vnode */
|
|
extern int desiredvnodes; /* number of vnodes desired */
|
|
extern int extravnodes; /* extra vnodes to allocate at boot */
|
|
extern int prtactive; /* nonzero to call vprint() */
|
|
extern struct vattr va_null; /* predefined null vattr structure */
|
|
|
|
/*
|
|
* Macro/function to check for client cache inconsistency w.r.t. leasing.
|
|
*/
|
|
#define LEASE_READ 0x1 /* Check lease for readers */
|
|
#define LEASE_WRITE 0x2 /* Check lease for modifiers */
|
|
|
|
extern void (*lease_check) __P((struct vnode *vp, struct proc *p,
|
|
struct ucred *ucred, int flag));
|
|
extern void (*lease_updatetime) __P((int deltat));
|
|
|
|
#ifdef NFS
|
|
#ifdef NQNFS
|
|
#define LEASE_CHECK(vp, p, cred, flag) lease_check((vp), (p), (cred), (flag))
|
|
#define LEASE_UPDATETIME(dt) lease_updatetime(dt)
|
|
#else
|
|
#define LEASE_CHECK(vp, p, cred, flag)
|
|
#define LEASE_UPDATETIME(dt)
|
|
#endif /* NQNFS */
|
|
#else
|
|
#define LEASE_CHECK(vp, p, cred, flag) \
|
|
do { if(lease_check) lease_check((vp), (p), (cred), (flag)); } while(0)
|
|
#define LEASE_UPDATETIME(dt) \
|
|
do { if(lease_updatetime) lease_updatetime(dt); } while(0)
|
|
#endif /* NFS */
|
|
#endif /* KERNEL */
|
|
|
|
|
|
/*
|
|
* Mods for extensibility.
|
|
*/
|
|
|
|
/*
|
|
* Flags for vdesc_flags:
|
|
*/
|
|
#define VDESC_MAX_VPS 16
|
|
/* Low order 16 flag bits are reserved for willrele flags for vp arguments. */
|
|
#define VDESC_VP0_WILLRELE 0x0001
|
|
#define VDESC_VP1_WILLRELE 0x0002
|
|
#define VDESC_VP2_WILLRELE 0x0004
|
|
#define VDESC_VP3_WILLRELE 0x0008
|
|
#define VDESC_NOMAP_VPP 0x0100
|
|
#define VDESC_VPP_WILLRELE 0x0200
|
|
|
|
/*
|
|
* VDESC_NO_OFFSET is used to identify the end of the offset list
|
|
* and in places where no such field exists.
|
|
*/
|
|
#define VDESC_NO_OFFSET -1
|
|
|
|
/*
|
|
* This structure describes the vnode operation taking place.
|
|
*/
|
|
struct vnodeop_desc {
|
|
int vdesc_offset; /* offset in vector--first for speed */
|
|
char *vdesc_name; /* a readable name for debugging */
|
|
int vdesc_flags; /* VDESC_* flags */
|
|
|
|
/*
|
|
* These ops are used by bypass routines to map and locate arguments.
|
|
* Creds and procs are not needed in bypass routines, but sometimes
|
|
* they are useful to (for example) transport layers.
|
|
* Nameidata is useful because it has a cred in it.
|
|
*/
|
|
int *vdesc_vp_offsets; /* list ended by VDESC_NO_OFFSET */
|
|
int vdesc_vpp_offset; /* return vpp location */
|
|
int vdesc_cred_offset; /* cred location, if any */
|
|
int vdesc_proc_offset; /* proc location, if any */
|
|
int vdesc_componentname_offset; /* if any */
|
|
/*
|
|
* Finally, we've got a list of private data (about each operation)
|
|
* for each transport layer. (Support to manage this list is not
|
|
* yet part of BSD.)
|
|
*/
|
|
caddr_t *vdesc_transports;
|
|
};
|
|
|
|
#ifdef KERNEL
|
|
/*
|
|
* A list of all the operation descs.
|
|
*/
|
|
extern struct vnodeop_desc *vnodeop_descs[];
|
|
|
|
|
|
/*
|
|
* This macro is very helpful in defining those offsets in the vdesc struct.
|
|
*
|
|
* This is stolen from X11R4. I ignored all the fancy stuff for
|
|
* Crays, so if you decide to port this to such a serious machine,
|
|
* you might want to consult Intrinsic.h's XtOffset{,Of,To}.
|
|
*/
|
|
#define VOPARG_OFFSET(p_type,field) \
|
|
((int) (((char *) (&(((p_type)NULL)->field))) - ((char *) NULL)))
|
|
#define VOPARG_OFFSETOF(s_type,field) \
|
|
VOPARG_OFFSET(s_type*,field)
|
|
#define VOPARG_OFFSETTO(S_TYPE,S_OFFSET,STRUCT_P) \
|
|
((S_TYPE)(((char*)(STRUCT_P))+(S_OFFSET)))
|
|
|
|
|
|
/*
|
|
* This structure is used to configure the new vnodeops vector.
|
|
*/
|
|
struct vnodeopv_entry_desc {
|
|
struct vnodeop_desc *opve_op; /* which operation this is */
|
|
vop_t *opve_impl; /* code implementing this operation */
|
|
};
|
|
struct vnodeopv_desc {
|
|
/* ptr to the ptr to the vector where op should go */
|
|
vop_t ***opv_desc_vector_p;
|
|
struct vnodeopv_entry_desc *opv_desc_ops; /* null terminated list */
|
|
};
|
|
|
|
/*
|
|
* A default routine which just returns an error.
|
|
*/
|
|
int vn_default_error __P((void));
|
|
|
|
/*
|
|
* A generic structure.
|
|
* This can be used by bypass routines to identify generic arguments.
|
|
*/
|
|
struct vop_generic_args {
|
|
struct vnodeop_desc *a_desc;
|
|
/* other random data follows, presumably */
|
|
};
|
|
|
|
/*
|
|
* VOCALL calls an op given an ops vector. We break it out because BSD's
|
|
* vclean changes the ops vector and then wants to call ops with the old
|
|
* vector.
|
|
*/
|
|
#define VOCALL(OPSV,OFF,AP) (( *((OPSV)[(OFF)])) (AP))
|
|
|
|
/*
|
|
* This call works for vnodes in the kernel.
|
|
*/
|
|
#define VCALL(VP,OFF,AP) VOCALL((VP)->v_op,(OFF),(AP))
|
|
#define VDESC(OP) (& __CONCAT(OP,_desc))
|
|
#define VOFFSET(OP) (VDESC(OP)->vdesc_offset)
|
|
|
|
/*
|
|
* Finally, include the default set of vnode operations.
|
|
*/
|
|
#include <vnode_if.h>
|
|
|
|
/*
|
|
* Public vnode manipulation functions.
|
|
*/
|
|
struct componentname;
|
|
struct file;
|
|
struct mount;
|
|
struct nameidata;
|
|
struct proc;
|
|
struct stat;
|
|
struct ucred;
|
|
struct uio;
|
|
struct vattr;
|
|
struct vnode;
|
|
struct vop_bwrite_args;
|
|
|
|
int bdevvp __P((dev_t dev, struct vnode **vpp));
|
|
/* cache_* may belong in namei.h. */
|
|
void cache_enter __P((struct vnode *dvp, struct vnode *vp,
|
|
struct componentname *cnp));
|
|
int cache_lookup __P((struct vnode *dvp, struct vnode **vpp,
|
|
struct componentname *cnp));
|
|
void cache_purge __P((struct vnode *vp));
|
|
void cache_purgevfs __P((struct mount *mp));
|
|
struct vnode *
|
|
checkalias __P((struct vnode *vp, dev_t nvp_rdev, struct mount *mp));
|
|
int getnewvnode __P((enum vtagtype tag,
|
|
struct mount *mp, vop_t **vops, struct vnode **vpp));
|
|
void insmntque __P((struct vnode *vp, struct mount *mp));
|
|
void vattr_null __P((struct vattr *vap));
|
|
int vcount __P((struct vnode *vp));
|
|
int vfinddev __P((dev_t dev, enum vtype type, struct vnode **vpp));
|
|
void vfs_opv_init __P((struct vnodeopv_desc **them));
|
|
int vget __P((struct vnode *vp, int lockflag));
|
|
void vgone __P((struct vnode *vp));
|
|
void vgoneall __P((struct vnode *vp));
|
|
int vinvalbuf __P((struct vnode *vp, int save, struct ucred *cred,
|
|
struct proc *p, int slpflag, int slptimeo));
|
|
int vn_bwrite __P((struct vop_bwrite_args *ap));
|
|
int vn_close __P((struct vnode *vp,
|
|
int flags, struct ucred *cred, struct proc *p));
|
|
int vn_open __P((struct nameidata *ndp, int fmode, int cmode));
|
|
int vn_rdwr __P((enum uio_rw rw, struct vnode *vp, caddr_t base,
|
|
int len, off_t offset, enum uio_seg segflg, int ioflg,
|
|
struct ucred *cred, int *aresid, struct proc *p));
|
|
int vn_stat __P((struct vnode *vp, struct stat *sb, struct proc *p));
|
|
void vn_vmio_close __P((struct vnode *vp));
|
|
int vn_vmio_open __P((struct vnode *vp, struct proc *p,
|
|
struct ucred *cred));
|
|
int vn_writechk __P((struct vnode *vp));
|
|
void vprint __P((char *label, struct vnode *vp));
|
|
void vput __P((struct vnode *vp));
|
|
void vref __P((struct vnode *vp));
|
|
void vrele __P((struct vnode *vp));
|
|
#endif /* KERNEL */
|
|
|
|
#endif /* !_SYS_VNODE_H_ */
|