/* $FreeBSD$ */ /* $Id: nfs_vnops.c,v 1.45 2003/11/05 14:59:02 rees Exp $ */ /* * copyright (c) 2003 * the regents of the university of michigan * all rights reserved * * permission is granted to use, copy, create derivative works and redistribute * this software and such derivative works for any purpose, so long as the name * of the university of michigan is not used in any advertising or publicity * pertaining to the use or distribution of this software without specific, * written prior authorization. if the above copyright notice or any other * identification of the university of michigan is included in any copy of any * portion of this software, then the disclaimer below must also be included. * * this software is provided as is, without representation from the university * of michigan as to its fitness for any purpose, and without warranty by the * university of michigan of any kind, either express or implied, including * without limitation the implied warranties of merchantability and fitness for * a particular purpose. the regents of the university of michigan shall not be * liable for any damages, including special, indirect, incidental, or * consequential damages, with respect to any claim arising out of or in * connection with the use of the software, even if it has been or is hereafter * advised of the possibility of such damages. */ /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)nfs_vnops.c 8.16 (Berkeley) 5/27/95 */ #include __FBSDID("$FreeBSD$"); /* * vnode op calls for Sun NFS version 2 and 3 */ #include "opt_inet.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* NFSv4 */ #include #include /* Defs */ #define TRUE 1 #define FALSE 0 /* * Ifdef for FreeBSD-current merged buffer cache. It is unfortunate that these * calls are not in getblk() and brelse() so that they would not be necessary * here. */ #ifndef B_VMIO #define vfs_busy_pages(bp, f) #endif static int nfsspec_read(struct vop_read_args *); static int nfsspec_write(struct vop_write_args *); static int nfsfifo_read(struct vop_read_args *); static int nfsfifo_write(struct vop_write_args *); static int nfsspec_close(struct vop_close_args *); static int nfsfifo_close(struct vop_close_args *); static int nfs4_flush(struct vnode *, struct ucred *, int, struct thread *, int); static int nfs4_setattrrpc(struct vnode *, struct vattr *, struct ucred *, struct thread *); static int nfs4_closerpc(struct vnode *, struct ucred *, struct thread *, int); static int nfs4_lookup(struct vop_lookup_args *); static int nfs4_create(struct vop_create_args *); static int nfs4_mknod(struct vop_mknod_args *); static int nfs4_open(struct vop_open_args *); static int nfs4_close(struct vop_close_args *); static int nfs4_access(struct vop_access_args *); static int nfs4_getattr(struct vop_getattr_args *); static int nfs4_setattr(struct vop_setattr_args *); static int nfs4_read(struct vop_read_args *); static int nfs4_fsync(struct vop_fsync_args *); static int nfs4_remove(struct vop_remove_args *); static int nfs4_link(struct vop_link_args *); static int nfs4_rename(struct vop_rename_args *); static int nfs4_mkdir(struct vop_mkdir_args *); static int nfs4_rmdir(struct vop_rmdir_args *); static int nfs4_symlink(struct vop_symlink_args *); static int nfs4_readdir(struct vop_readdir_args *); static int nfs4_strategy(struct vop_strategy_args *); static int nfs4_lookitup(struct vnode *, const char *, int, struct ucred *, struct thread *, struct nfsnode **); static int nfs4_sillyrename(struct vnode *, struct vnode *, struct componentname *); static int nfsspec_access(struct vop_access_args *); static int nfs4_readlink(struct vop_readlink_args *); static int nfs4_print(struct vop_print_args *); static int nfs4_advlock(struct vop_advlock_args *); /* * Global vfs data structures for nfs */ vop_t **nfs4_vnodeop_p; static struct vnodeopv_entry_desc nfs4_vnodeop_entries[] = { { &vop_default_desc, (vop_t *) vop_defaultop }, { &vop_access_desc, (vop_t *) nfs4_access }, { &vop_advlock_desc, (vop_t *) nfs4_advlock }, { &vop_close_desc, (vop_t *) nfs4_close }, { &vop_create_desc, (vop_t *) nfs4_create }, { &vop_fsync_desc, (vop_t *) nfs4_fsync }, { &vop_getattr_desc, (vop_t *) nfs4_getattr }, { &vop_getpages_desc, (vop_t *) nfs_getpages }, { &vop_putpages_desc, (vop_t *) nfs_putpages }, { &vop_inactive_desc, (vop_t *) nfs_inactive }, { &vop_lease_desc, (vop_t *) vop_null }, { &vop_link_desc, (vop_t *) nfs4_link }, { &vop_lookup_desc, (vop_t *) nfs4_lookup }, { &vop_mkdir_desc, (vop_t *) nfs4_mkdir }, { &vop_mknod_desc, (vop_t *) nfs4_mknod }, { &vop_open_desc, (vop_t *) nfs4_open }, { &vop_print_desc, (vop_t *) nfs4_print }, { &vop_read_desc, (vop_t *) nfs4_read }, { &vop_readdir_desc, (vop_t *) nfs4_readdir }, { &vop_readlink_desc, (vop_t *) nfs4_readlink }, { &vop_reclaim_desc, (vop_t *) nfs_reclaim }, { &vop_remove_desc, (vop_t *) nfs4_remove }, { &vop_rename_desc, (vop_t *) nfs4_rename }, { &vop_rmdir_desc, (vop_t *) nfs4_rmdir }, { &vop_setattr_desc, (vop_t *) nfs4_setattr }, { &vop_strategy_desc, (vop_t *) nfs4_strategy }, { &vop_symlink_desc, (vop_t *) nfs4_symlink }, { &vop_write_desc, (vop_t *) nfs_write }, { NULL, NULL } }; static struct vnodeopv_desc nfs4_vnodeop_opv_desc = { &nfs4_vnodeop_p, nfs4_vnodeop_entries }; VNODEOP_SET(nfs4_vnodeop_opv_desc); /* * Special device vnode ops */ vop_t **spec_nfs4nodeop_p; static struct vnodeopv_entry_desc nfs4_specop_entries[] = { { &vop_default_desc, (vop_t *) spec_vnoperate }, { &vop_access_desc, (vop_t *) nfsspec_access }, { &vop_close_desc, (vop_t *) nfsspec_close }, { &vop_fsync_desc, (vop_t *) nfs4_fsync }, { &vop_getattr_desc, (vop_t *) nfs4_getattr }, { &vop_inactive_desc, (vop_t *) nfs_inactive }, { &vop_print_desc, (vop_t *) nfs4_print }, { &vop_read_desc, (vop_t *) nfsspec_read }, { &vop_reclaim_desc, (vop_t *) nfs_reclaim }, { &vop_setattr_desc, (vop_t *) nfs4_setattr }, { &vop_write_desc, (vop_t *) nfsspec_write }, { NULL, NULL } }; static struct vnodeopv_desc spec_nfs4nodeop_opv_desc = { &spec_nfs4nodeop_p, nfs4_specop_entries }; VNODEOP_SET(spec_nfs4nodeop_opv_desc); vop_t **fifo_nfs4nodeop_p; static struct vnodeopv_entry_desc nfs4_fifoop_entries[] = { { &vop_default_desc, (vop_t *) fifo_vnoperate }, { &vop_access_desc, (vop_t *) nfsspec_access }, { &vop_close_desc, (vop_t *) nfsfifo_close }, { &vop_fsync_desc, (vop_t *) nfs4_fsync }, { &vop_getattr_desc, (vop_t *) nfs4_getattr }, { &vop_inactive_desc, (vop_t *) nfs_inactive }, { &vop_print_desc, (vop_t *) nfs4_print }, { &vop_read_desc, (vop_t *) nfsfifo_read }, { &vop_reclaim_desc, (vop_t *) nfs_reclaim }, { &vop_setattr_desc, (vop_t *) nfs4_setattr }, { &vop_write_desc, (vop_t *) nfsfifo_write }, { NULL, NULL } }; static struct vnodeopv_desc fifo_nfs4nodeop_opv_desc = { &fifo_nfs4nodeop_p, nfs4_fifoop_entries }; VNODEOP_SET(fifo_nfs4nodeop_opv_desc); static int nfs4_removerpc(struct vnode *dvp, const char *name, int namelen, struct ucred *cred, struct thread *td); static int nfs4_renamerpc(struct vnode *fdvp, const char *fnameptr, int fnamelen, struct vnode *tdvp, const char *tnameptr, int tnamelen, struct ucred *cred, struct thread *td); static int nfs4_renameit(struct vnode *sdvp, struct componentname *scnp, struct sillyrename *sp); static int nfs4_openrpc(struct vnode *, struct vnode **, struct componentname *, int, struct vattr *); static int nfs4_open_confirm(struct vnode *vp, struct nfs4_compound *cpp, struct nfs4_oparg_open *openap, struct nfs4_oparg_getfh *gfh, struct ucred *cred, struct thread *td); static int nfs4_createrpc(struct vnode *, struct vnode **, struct componentname *, nfstype, struct vattr *, char *); /* * Global variables */ struct nfs4_lowner nfs4_masterlowner; #define DIRHDSIZ (sizeof (struct dirent) - (MAXNAMLEN + 1)) SYSCTL_DECL(_vfs_nfs4); static int nfsaccess_cache_timeout = NFS_MAXATTRTIMO; SYSCTL_INT(_vfs_nfs4, OID_AUTO, access_cache_timeout, CTLFLAG_RW, &nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout"); static int nfsv3_commit_on_close = 0; SYSCTL_INT(_vfs_nfs4, OID_AUTO, nfsv3_commit_on_close, CTLFLAG_RW, &nfsv3_commit_on_close, 0, "write+commit on close, else only write"); #if 0 SYSCTL_INT(_vfs_nfs4, OID_AUTO, access_cache_hits, CTLFLAG_RD, &nfsstats.accesscache_hits, 0, "NFS ACCESS cache hit count"); SYSCTL_INT(_vfs_nfs4, OID_AUTO, access_cache_misses, CTLFLAG_RD, &nfsstats.accesscache_misses, 0, "NFS ACCESS cache miss count"); #endif #define NFSV3ACCESS_ALL (NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY \ | NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE \ | NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP) static int nfs3_access_otw(struct vnode *vp, int wmode, struct thread *td, struct ucred *cred) { const int v3 = 1; u_int32_t *tl; int error = 0, attrflag; return (0); struct mbuf *mreq, *mrep = NULL, *md, *mb; caddr_t bpos, dpos; u_int32_t rmode; struct nfsnode *np = VTONFS(vp); nfsstats.rpccnt[NFSPROC_ACCESS]++; mreq = nfsm_reqhead(vp, NFSPROC_ACCESS, NFSX_FH(v3) + NFSX_UNSIGNED); mb = mreq; bpos = mtod(mb, caddr_t); nfsm_fhtom(vp, v3); tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(wmode); nfsm_request(vp, NFSPROC_ACCESS, td, cred); nfsm_postop_attr(vp, attrflag); if (!error) { tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); rmode = fxdr_unsigned(u_int32_t, *tl); np->n_mode = rmode; np->n_modeuid = cred->cr_uid; np->n_modestamp = time_second; } m_freem(mrep); nfsmout: return error; } /* * nfs access vnode op. * For nfs version 2, just return ok. File accesses may fail later. * For nfs version 3, use the access rpc to check accessibility. If file modes * are changed on the server, accesses might still fail later. */ static int nfs4_access(struct vop_access_args *ap) { struct vnode *vp = ap->a_vp; int error = 0; u_int32_t mode, wmode; int v3 = NFS_ISV3(vp); /* v3 \in v4 */ struct nfsnode *np = VTONFS(vp); caddr_t bpos, dpos; struct mbuf *mreq, *mrep = NULL, *md, *mb; struct nfs4_compound cp; struct nfs4_oparg_access acc; struct thread *td = ap->a_td; struct ucred *cred = ap->a_cred; /* * Disallow write attempts on filesystems mounted read-only; * unless the file is a socket, fifo, or a block or character * device resident on the filesystem. */ if ((ap->a_mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) { switch (vp->v_type) { case VREG: case VDIR: case VLNK: return (EROFS); default: break; } } /* * For nfs v3, check to see if we have done this recently, and if * so return our cached result instead of making an ACCESS call. * If not, do an access rpc, otherwise you are stuck emulating * ufs_access() locally using the vattr. This may not be correct, * since the server may apply other access criteria such as * client uid-->server uid mapping that we do not know about. */ /* XXX Disable this for now; needs fixing of _access_otw() */ if (0 && v3) { if (ap->a_mode & VREAD) mode = NFSV3ACCESS_READ; else mode = 0; if (vp->v_type != VDIR) { if (ap->a_mode & VWRITE) mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND); if (ap->a_mode & VEXEC) mode |= NFSV3ACCESS_EXECUTE; } else { if (ap->a_mode & VWRITE) mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND | NFSV3ACCESS_DELETE); if (ap->a_mode & VEXEC) mode |= NFSV3ACCESS_LOOKUP; } /* XXX safety belt, only make blanket request if caching */ if (nfsaccess_cache_timeout > 0) { wmode = NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE | NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP; } else { wmode = mode; } /* * Does our cached result allow us to give a definite yes to * this request? */ if ((time_second < (np->n_modestamp + nfsaccess_cache_timeout)) && (ap->a_cred->cr_uid == np->n_modeuid) && ((np->n_mode & mode) == mode)) { nfsstats.accesscache_hits++; } else { /* * Either a no, or a don't know. Go to the wire. */ nfsstats.accesscache_misses++; error = nfs3_access_otw(vp, wmode, ap->a_td,ap->a_cred); if (!error) { if ((np->n_mode & mode) != mode) { error = EACCES; } } } return (error); } /* XXX use generic access code here? */ mode = ap->a_mode & VREAD ? NFSV4ACCESS_READ : 0; if (vp->v_type == VDIR) { if (ap->a_mode & VWRITE) mode |= NFSV4ACCESS_MODIFY | NFSV4ACCESS_EXTEND | NFSV4ACCESS_DELETE; if (ap->a_mode & VEXEC) mode |= NFSV4ACCESS_LOOKUP; } else { if (ap->a_mode & VWRITE) mode |= NFSV4ACCESS_MODIFY | NFSV4ACCESS_EXTEND; if (ap->a_mode & VEXEC) mode |= NFSV4ACCESS_EXECUTE; } nfs_v4initcompound(&cp); acc.mode = mode; mreq = nfsm_reqhead(vp, NFSV4PROC_COMPOUND, 0); mb = mreq; bpos = mtod(mb, caddr_t); nfsm_v4build_compound(&cp, "nfs4_access()"); nfsm_v4build_putfh(&cp, vp); nfsm_v4build_access(&cp, &acc); nfsm_v4build_finalize(&cp); nfsm_request(vp, NFSV4PROC_COMPOUND, td, cred); if (error != 0) goto nfsmout; nfsm_v4dissect_compound(&cp); nfsm_v4dissect_putfh(&cp); nfsm_v4dissect_access(&cp, &acc); if ((acc.rmode & mode) != mode) error = EACCES; nfsmout: error = nfs_v4postop(&cp, error); if (mrep != NULL) m_freem(mrep); return (error); } static int nfs4_openrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, int flags, struct vattr *vap) { struct vnode *vp = *vpp; struct nfs4_oparg_getattr getattr; struct nfs4_oparg_getfh getfh; struct nfs4_oparg_open opena; struct nfs4_compound cp; caddr_t bpos, dpos; int error = 0; struct mbuf *mreq, *mrep = NULL, *md, *mb; struct ucred *cred = cnp->cn_cred; struct thread *td = cnp->cn_thread; struct nfs4_fctx xfc, *fcp; struct nfsnode *np; if (vp == NULL) { /* Create a new file */ np = NULL; fcp = &xfc; bzero(fcp, sizeof(*fcp)); } else { np = VTONFS(vp); fcp = flags & FWRITE ? &np->n_wfc : &np->n_rfc; } /* * Since we are currently only one lockowner; we only open the * file one each for reading and writing. */ if (fcp->refcnt++ != 0) { *vpp = vp; /*printf("not opening %s\n", np->n_name != NULL ? np->n_name : "");*/ return (0); } fcp->lop = &nfs4_masterlowner; fcp->pid = cnp->cn_thread->td_proc->p_pid; fcp->np = np; nfs_v4initcompound(&cp); cp.nmp = VFSTONFS(dvp->v_mount); opena.ctype = NCLNULL; opena.flags = flags; opena.vap = vap; opena.fcp = fcp; /* For lockowner */ opena.cnp = cnp; getattr.bm = &nfsv4_getattrbm; mreq = nfsm_reqhead(vp, NFSV4PROC_COMPOUND, 0); mb = mreq; bpos = mtod(mb, caddr_t); nfsm_v4build_compound(&cp, "nfs4_openrpc()"); nfsm_v4build_putfh(&cp, dvp); nfsm_v4build_open(&cp, &opena); nfsm_v4build_getattr(&cp, &getattr); nfsm_v4build_getfh(&cp, &getfh); nfsm_v4build_finalize(&cp); nfsm_request(vp != NULL ? vp : dvp, NFSV4PROC_COMPOUND, td, cred); if (error != 0) goto nfsmout; nfsm_v4dissect_compound(&cp); nfsm_v4dissect_putfh(&cp); nfsm_v4dissect_open(&cp, &opena); nfsm_v4dissect_getattr(&cp, &getattr); nfsm_v4dissect_getfh(&cp, &getfh); error = nfs_v4postop(&cp, error); if (opena.rflags & NFSV4OPENRES_CONFIRM) { error = nfs4_open_confirm(vp ? vp : dvp, &cp, &opena, &getfh, cred, td); if (error != 0) goto nfsmout; } if (vp == NULL) { /* New file */ error = nfs_nget(dvp->v_mount, &getfh.fh_val, getfh.fh_len, &np); if (error != 0) goto nfsmout; vp = NFSTOV(np); np->n_dvp = dvp; np->n_namelen = cnp->cn_namelen; /* XXX memory leaks on these; track! */ if (np->n_name != NULL) FREE(np->n_name, M_NFSREQ); MALLOC(np->n_name, u_char *, np->n_namelen + 1, M_NFSREQ, M_WAITOK); bcopy(cnp->cn_nameptr, np->n_name, np->n_namelen); np->n_name[np->n_namelen] = '\0'; if (flags & FWRITE) np->n_wfc = *fcp; else np->n_rfc = *fcp; /*printf("opened new file %s\n", np->n_name);*/ nfs4_vnop_loadattrcache(vp, &getattr.fa, NULL); *vpp = vp; } else { /*printf("openend \"old\" %s\n", np->n_name != NULL ? np->n_name : "");*/ if (flags & O_TRUNC && np->n_size != 0) { struct vattr va; VATTR_NULL(&va); va.va_size = 0; error = nfs4_setattrrpc(vp, &va, cnp->cn_cred, cnp->cn_thread); } np->n_attrstamp = 0; } nfsmout: if (mrep != NULL) m_freem(mrep); return (error); } static int nfs4_open_confirm(struct vnode *vp, struct nfs4_compound *cpp, struct nfs4_oparg_open *openap, struct nfs4_oparg_getfh *gfh, struct ucred *cred, struct thread *td) { caddr_t bpos, dpos; int error = 0; struct mbuf *mreq, *mrep = NULL, *md, *mb; nfs_v4initcompound(cpp); cpp->nmp = VFSTONFS(vp->v_mount); mreq = nfsm_reqhead(vp, NFSV4PROC_COMPOUND, 0); mb = mreq; bpos = mtod(mb, caddr_t); nfsm_v4build_compound(cpp, "nfs4_open_confirm()"); nfsm_v4build_putfh_nv(cpp, gfh); nfsm_v4build_open_confirm(cpp, openap); nfsm_v4build_finalize(cpp); nfsm_request(vp, NFSV4PROC_COMPOUND, td, cred); if (error != 0) goto nfsmout; nfsm_v4dissect_compound(cpp); nfsm_v4dissect_putfh(cpp); nfsm_v4dissect_open_confirm(cpp, openap); nfsmout: error = nfs_v4postop(cpp, error); if (mrep != NULL) m_freem(mrep); return (error); } /* * nfs open vnode op * Check to see if the type is ok * and that deletion is not in progress. * For paged in text files, you will need to flush the page cache * if consistency is lost. */ /* ARGSUSED */ static int nfs4_open(struct vop_open_args *ap) { struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); enum vtype vtype = vp->v_type; int mode = ap->a_mode; struct componentname cn; if (vtype != VREG) { if (vtype != VDIR && vtype != VLNK) { #ifdef DIAGNOSTIC printf("open eacces vtyp=%d\n", vp->v_type); #endif return (EACCES); } else return (0); } if (np->n_flag & NCREATED) { np->n_flag &= ~NCREATED; return (0); } cn.cn_nameptr = np->n_name; cn.cn_namelen = np->n_namelen; cn.cn_cred = ap->a_cred; cn.cn_thread = ap->a_td; return (nfs4_openrpc(np->n_dvp, &vp, &cn, mode, NULL)); } static int nfs4_closerpc(struct vnode *vp, struct ucred *cred, struct thread *td, int flags) { caddr_t bpos, dpos; int error = 0; struct mbuf *mreq, *mrep = NULL, *md, *mb; struct nfs4_fctx *fcp; struct nfs4_compound cp; struct nfsnode *np = VTONFS(vp); fcp = flags & FWRITE ? &np->n_wfc : &np->n_rfc; nfs_v4initcompound(&cp); if (--fcp->refcnt != 0) return (0); /*printf("closing %s\n", np->n_name != NULL ? np->n_name : "");*/ cp.fcp = fcp; mreq = nfsm_reqhead(vp, NFSV4PROC_COMPOUND, 0); mb = mreq; bpos = mtod(mb, caddr_t); nfsm_v4build_compound(&cp, "nfs4_closerpc()"); nfsm_v4build_putfh(&cp, vp); nfsm_v4build_close(&cp, fcp); nfsm_v4build_finalize(&cp); nfsm_request(vp, NFSV4PROC_COMPOUND, td, cred); if (error != 0) goto nfsmout; nfsm_v4dissect_compound(&cp); nfsm_v4dissect_putfh(&cp); nfsm_v4dissect_close(&cp, fcp); nfsmout: error = nfs_v4postop(&cp, error); if (mrep != NULL) m_freem(mrep); return (error); } /* * nfs close vnode op * What an NFS client should do upon close after writing is a debatable issue. * Most NFS clients push delayed writes to the server upon close, basically for * two reasons: * 1 - So that any write errors may be reported back to the client process * doing the close system call. By far the two most likely errors are * NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure. * 2 - To put a worst case upper bound on cache inconsistency between * multiple clients for the file. * There is also a consistency problem for Version 2 of the protocol w.r.t. * not being able to tell if other clients are writing a file concurrently, * since there is no way of knowing if the changed modify time in the reply * is only due to the write for this client. * (NFS Version 3 provides weak cache consistency data in the reply that * should be sufficient to detect and handle this case.) * * The current code does the following: * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers * for NFS Version 3 - flush dirty buffers to the server but don't invalidate * or commit them (this satisfies 1 and 2 except for the * case where the server crashes after this close but * before the commit RPC, which is felt to be "good * enough". Changing the last argument to nfs_flush() to * a 1 would force a commit operation, if it is felt a * commit is necessary now. */ /* ARGSUSED */ static int nfs4_close(struct vop_close_args *ap) { struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); int error = 0; if (vp->v_type != VREG) return (0); if (np->n_flag & NMODIFIED) { if (NFS_ISV3(vp)) { /* * Under NFSv3 we have dirty buffers to * dispose of. We must flush them to the NFS * server. We have the option of waiting all * the way through the commit rpc or just * waiting for the initial write. The default * is to only wait through the initial write * so the data is in the server's cache, which * is roughly similar to the state a standard * disk subsystem leaves the file in on * close(). * * We cannot clear the NMODIFIED bit in * np->n_flag due to potential races with * other processes, and certainly cannot clear * it if we don't commit. */ int cm = nfsv3_commit_on_close ? 1 : 0; error = nfs4_flush(vp, ap->a_cred, MNT_WAIT, ap->a_td, cm); /* np->n_flag &= ~NMODIFIED; */ } else { vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_td); error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_td, 1); VOP_UNLOCK(vp, 0, ap->a_td); } np->n_attrstamp = 0; } error = nfs4_closerpc(vp, ap->a_cred, ap->a_td, ap->a_fflag); if (!error && np->n_flag & NWRITEERR) { np->n_flag &= ~NWRITEERR; error = np->n_error; } return (error); } /* * nfs getattr call from vfs. */ static int nfs4_getattr(struct vop_getattr_args *ap) { struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); caddr_t bpos, dpos; int error = 0; struct mbuf *mreq, *mrep = NULL, *md, *mb; struct nfs4_oparg_getattr ga; struct nfs4_compound cp; /* * Update local times for special files. */ if (np->n_flag & (NACC | NUPD)) np->n_flag |= NCHG; /* * First look in the cache. */ if (nfs_getattrcache(vp, ap->a_vap) == 0) return (0); nfsstats.rpccnt[NFSPROC_GETATTR]++; mreq = nfsm_reqhead(vp, NFSV4PROC_COMPOUND, NFSX_FH(1)); mb = mreq; bpos = mtod(mb, caddr_t); ga.bm = &nfsv4_getattrbm; nfs_v4initcompound(&cp); nfsm_v4build_compound(&cp, "nfs4_getattr()"); nfsm_v4build_putfh(&cp, vp); nfsm_v4build_getattr(&cp, &ga); nfsm_v4build_finalize(&cp); nfsm_request(vp, NFSV4PROC_COMPOUND, ap->a_td, ap->a_cred); if (error != 0) goto nfsmout; nfsm_v4dissect_compound(&cp); nfsm_v4dissect_putfh(&cp); nfsm_v4dissect_getattr(&cp, &ga); nfs4_vnop_loadattrcache(vp, &ga.fa, ap->a_vap); nfsmout: error = nfs_v4postop(&cp, error); if (mrep != NULL) m_freem(mrep); return (error); } /* * nfs setattr call. */ static int nfs4_setattr(struct vop_setattr_args *ap) { struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); struct vattr *vap = ap->a_vap; int error = 0; u_quad_t tsize; #ifndef nolint tsize = (u_quad_t)0; #endif /* * Setting of flags is not supported. */ if (vap->va_flags != VNOVAL) return (EOPNOTSUPP); /* * Disallow write attempts if the filesystem is mounted read-only. */ if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) && (vp->v_mount->mnt_flag & MNT_RDONLY)) return (EROFS); if (vap->va_size != VNOVAL) { switch (vp->v_type) { case VDIR: return (EISDIR); case VCHR: case VBLK: case VSOCK: case VFIFO: if (vap->va_mtime.tv_sec == VNOVAL && vap->va_atime.tv_sec == VNOVAL && vap->va_mode == (mode_t)VNOVAL && vap->va_uid == (uid_t)VNOVAL && vap->va_gid == (gid_t)VNOVAL) return (0); vap->va_size = VNOVAL; break; default: /* * Disallow write attempts if the filesystem is * mounted read-only. */ if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); /* * We run vnode_pager_setsize() early (why?), * we must set np->n_size now to avoid vinvalbuf * V_SAVE races that might setsize a lower * value. */ tsize = np->n_size; error = nfs_meta_setsize(vp, ap->a_cred, ap->a_td, vap->va_size); if (np->n_flag & NMODIFIED) { if (vap->va_size == 0) error = nfs_vinvalbuf(vp, 0, ap->a_cred, ap->a_td, 1); else error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_td, 1); if (error) { vnode_pager_setsize(vp, np->n_size); return (error); } } /* * np->n_size has already been set to vap->va_size * in nfs_meta_setsize(). We must set it again since * nfs_loadattrcache() could be called through * nfs_meta_setsize() and could modify np->n_size. */ np->n_vattr.va_size = np->n_size = vap->va_size; }; } else if ((vap->va_mtime.tv_sec != VNOVAL || vap->va_atime.tv_sec != VNOVAL) && (np->n_flag & NMODIFIED) && vp->v_type == VREG && (error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_td, 1)) == EINTR) return (error); error = nfs4_setattrrpc(vp, vap, ap->a_cred, ap->a_td); if (error && vap->va_size != VNOVAL) { np->n_size = np->n_vattr.va_size = tsize; vnode_pager_setsize(vp, np->n_size); } return (error); } /* * Do an nfs setattr rpc. */ static int nfs4_setattrrpc(struct vnode *vp, struct vattr *vap, struct ucred *cred, struct thread *td) { caddr_t bpos, dpos; int error = 0; struct mbuf *mreq, *mrep = NULL, *md, *mb; struct nfs4_compound cp; struct nfs4_oparg_getattr ga; struct nfsnode *np = VTONFS(vp); struct nfs4_fctx *fcp = &np->n_wfc; nfsstats.rpccnt[NFSPROC_SETATTR]++; mreq = nfsm_reqhead(vp, NFSV4PROC_COMPOUND, 0); mb = mreq; bpos = mtod(mb, caddr_t); ga.bm = &nfsv4_getattrbm; nfs_v4initcompound(&cp); nfsm_v4build_compound(&cp, "nfs4_setattrrpc"); nfsm_v4build_putfh(&cp, vp); nfsm_v4build_setattr(&cp, vap, fcp); nfsm_v4build_getattr(&cp, &ga); nfsm_v4build_finalize(&cp); nfsm_request(vp, NFSV4PROC_COMPOUND, td, cred); if (error != 0) goto nfsmout; nfsm_v4dissect_compound(&cp); nfsm_v4dissect_putfh(&cp); nfsm_v4dissect_setattr(&cp); nfsm_v4dissect_getattr(&cp, &ga); nfs4_vnop_loadattrcache(vp, &ga.fa, NULL); /* XXX -- need to implement this in nfs4_setattr*/ if (np->n_flag & NTRUNCATE) { error = nfs4_closerpc(vp, cred, td, FWRITE); np->n_flag &= ~NTRUNCATE; } nfsmout: error = nfs_v4postop(&cp, error); if (mrep != NULL) m_freem(mrep); return (error); } /* * nfs lookup call, one step at a time... * First look in cache * If not found, unlock the directory nfsnode and do the rpc */ static int nfs4_lookup(struct vop_lookup_args *ap) { struct componentname *cnp = ap->a_cnp; struct vnode *dvp = ap->a_dvp; struct vnode **vpp = ap->a_vpp; int isdot, flags = cnp->cn_flags; struct vnode *newvp; struct nfsmount *nmp; caddr_t bpos, dpos; struct mbuf *mreq, *mrep = NULL, *md, *mb; long len; nfsfh_t *fhp; struct nfsnode *np; int lockparent, wantparent, error = 0, fhsize; struct thread *td = cnp->cn_thread; struct nfs4_compound cp; struct nfs4_oparg_getattr ga, dga; struct nfs4_oparg_lookup l; struct nfs4_oparg_getfh gfh; *vpp = NULLVP; cnp->cn_flags &= ~PDIRUNLOCK; if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) return (EROFS); if (dvp->v_type != VDIR) return (ENOTDIR); lockparent = flags & LOCKPARENT; wantparent = flags & (LOCKPARENT|WANTPARENT); nmp = VFSTONFS(dvp->v_mount); np = VTONFS(dvp); isdot = cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.'; if ((error = cache_lookup(dvp, vpp, cnp)) && error != ENOENT) { struct vattr vattr; int vpid; if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td)) != 0) { *vpp = NULLVP; return (error); } vhold(*vpp); newvp = *vpp; vpid = newvp->v_id; /* * See the comment starting `Step through' in ufs/ufs_lookup.c * for an explanation of the locking protocol */ if (dvp == newvp) { VREF(newvp); error = 0; } else if (flags & ISDOTDOT) { VOP_UNLOCK(dvp, 0, td); cnp->cn_flags |= PDIRUNLOCK; error = vget(newvp, LK_EXCLUSIVE, td); if (!error && lockparent && (flags & ISLASTCN)) { error = vn_lock(dvp, LK_EXCLUSIVE, td); if (error == 0) cnp->cn_flags &= ~PDIRUNLOCK; } } else { error = vget(newvp, LK_EXCLUSIVE, td); if (!lockparent || error || !(flags & ISLASTCN)) { VOP_UNLOCK(dvp, 0, td); cnp->cn_flags |= PDIRUNLOCK; } } if (!error) { if (vpid == newvp->v_id) { if (!VOP_GETATTR(newvp, &vattr, cnp->cn_cred, td) && vattr.va_ctime.tv_sec == VTONFS(newvp)->n_ctime) { nfsstats.lookupcache_hits++; if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) cnp->cn_flags |= SAVENAME; vdrop(newvp); return (0); } cache_purge(newvp); } vput(newvp); if (lockparent && dvp != newvp && (flags & ISLASTCN)) VOP_UNLOCK(dvp, 0, td); } vdrop(newvp); error = vn_lock(dvp, LK_EXCLUSIVE, td); *vpp = NULLVP; if (error) { cnp->cn_flags |= PDIRUNLOCK; return (error); } cnp->cn_flags &= ~PDIRUNLOCK; } error = 0; newvp = NULLVP; nfsstats.lookupcache_misses++; nfsstats.rpccnt[NFSPROC_LOOKUP]++; len = cnp->cn_namelen; mreq = nfsm_reqhead(NULL, NFSV4PROC_COMPOUND, 0); mb = mreq; bpos = mtod(mb, caddr_t); ga.bm = &nfsv4_getattrbm; dga.bm = &nfsv4_getattrbm; nfs_v4initcompound(&cp); nfsm_v4build_compound(&cp, "nfs4_lookup()"); nfsm_v4build_putfh(&cp, dvp); nfsm_v4build_getattr(&cp, &dga); if (flags & ISDOTDOT) nfsm_v4build_lookupp(&cp); else if (!isdot) { l.name = cnp->cn_nameptr; l.namelen = len; nfsm_v4build_lookup(&cp, &l); } nfsm_v4build_getattr(&cp, &ga); nfsm_v4build_getfh(&cp, &gfh); nfsm_v4build_finalize(&cp); nfsm_request(dvp, NFSV4PROC_COMPOUND, cnp->cn_thread, cnp->cn_cred); if (error != 0) goto nfsmout; nfsm_v4dissect_compound(&cp); nfsm_v4dissect_putfh(&cp); nfsm_v4dissect_getattr(&cp, &dga); if (flags & ISDOTDOT) nfsm_v4dissect_lookupp(&cp); else if (!isdot) nfsm_v4dissect_lookup(&cp); nfsm_v4dissect_getattr(&cp, &ga); nfsm_v4dissect_getfh(&cp, &gfh); nfs4_vnop_loadattrcache(dvp, &dga.fa, NULL); fhp = &gfh.fh_val; fhsize = gfh.fh_len; /* * Handle RENAME case... */ if (cnp->cn_nameiop == RENAME && wantparent && (flags & ISLASTCN)) { if (NFS_CMPFH(np, fhp, fhsize)) return (EISDIR); error = nfs_nget(dvp->v_mount, fhp, fhsize, &np); if (error) return (error); newvp = NFSTOV(np); nfs4_vnop_loadattrcache(newvp, &ga.fa, NULL); *vpp = newvp; cnp->cn_flags |= SAVENAME; if (!lockparent) { VOP_UNLOCK(dvp, 0, td); cnp->cn_flags |= PDIRUNLOCK; } return (0); } if (flags & ISDOTDOT) { VOP_UNLOCK(dvp, 0, td); error = nfs_nget(dvp->v_mount, fhp, fhsize, &np); if (error) { vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td); return (error); } newvp = NFSTOV(np); nfs4_vnop_loadattrcache(newvp, &ga.fa, NULL); if (lockparent && (flags & ISLASTCN)) { error = vn_lock(dvp, LK_EXCLUSIVE, td); if (error) { cnp->cn_flags |= PDIRUNLOCK; vput(newvp); return (error); } } else cnp->cn_flags |= PDIRUNLOCK; } else if (NFS_CMPFH(np, fhp, fhsize)) { VREF(dvp); newvp = dvp; } else { error = nfs_nget(dvp->v_mount, fhp, fhsize, &np); if (error) return (error); if (!lockparent || !(flags & ISLASTCN)) { cnp->cn_flags |= PDIRUNLOCK; VOP_UNLOCK(dvp, 0, td); } newvp = NFSTOV(np); /* Fill in np used by open. */ np->n_dvp = dvp; np->n_namelen = cnp->cn_namelen; if (np->n_name != NULL) FREE(np->n_name, M_NFSREQ); MALLOC(np->n_name, u_char *, np->n_namelen + 1, M_NFSREQ, M_WAITOK); bcopy(cnp->cn_nameptr, np->n_name, np->n_namelen); np->n_name[np->n_namelen] = '\0'; nfs4_vnop_loadattrcache(newvp, &ga.fa, NULL); } if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) cnp->cn_flags |= SAVENAME; if ((cnp->cn_flags & MAKEENTRY) && (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN))) { np->n_ctime = np->n_vattr.va_ctime.tv_sec; cache_enter(dvp, newvp, cnp); } *vpp = newvp; m_freem(mrep); nfsmout: error = nfs_v4postop(&cp, error); if (error) { if (newvp != NULLVP) { vrele(newvp); *vpp = NULLVP; } if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) && (flags & ISLASTCN) && error == ENOENT) { if (!lockparent) { VOP_UNLOCK(dvp, 0, td); cnp->cn_flags |= PDIRUNLOCK; } if (dvp->v_mount->mnt_flag & MNT_RDONLY) error = EROFS; else error = EJUSTRETURN; } if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) cnp->cn_flags |= SAVENAME; } return (error); } /* * nfs read call. * Just call nfs_bioread() to do the work. */ static int nfs4_read(struct vop_read_args *ap) { struct vnode *vp = ap->a_vp; switch (vp->v_type) { case VREG: return (nfs_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred)); case VDIR: return (EISDIR); default: return (EOPNOTSUPP); } } /* * nfs readlink call */ static int nfs4_readlink(struct vop_readlink_args *ap) { struct vnode *vp = ap->a_vp; if (vp->v_type != VLNK) return (EINVAL); return (nfs_bioread(vp, ap->a_uio, 0, ap->a_cred)); } /* * Do a readlink rpc. * Called by nfs_doio() from below the buffer cache. */ int nfs4_readlinkrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) { caddr_t bpos, dpos; int error = 0; struct mbuf *mreq, *mrep = NULL, *md, *mb; struct nfs4_compound cp; nfsstats.rpccnt[NFSPROC_READLINK]++; mreq = nfsm_reqhead(vp, NFSV4PROC_COMPOUND, 0); mb = mreq; bpos = mtod(mb, caddr_t); nfs_v4initcompound(&cp); nfsm_v4build_compound(&cp, "nfs4_readlinkrpc()"); nfsm_v4build_putfh(&cp, vp); nfsm_v4build_readlink(&cp); nfsm_v4build_finalize(&cp); nfsm_request(vp, NFSV4PROC_COMPOUND, uiop->uio_td, cred); if (error != 0) goto nfsmout; nfsm_v4dissect_compound(&cp); nfsm_v4dissect_putfh(&cp); nfsm_v4dissect_readlink(&cp, uiop); nfsmout: error = nfs_v4postop(&cp, error); if (m_freem != NULL) m_freem(mrep); return (error); } /* * nfs read rpc call * Ditto above */ int nfs4_readrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) { caddr_t bpos, dpos; struct mbuf *mreq, *mrep = NULL, *md, *mb; struct nfsmount *nmp; int error = 0, len, tsiz; struct nfs4_compound cp; struct nfs4_oparg_read read; struct nfsnode *np = VTONFS(vp); nmp = VFSTONFS(vp->v_mount); tsiz = uiop->uio_resid; if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize) return (EFBIG); if (tsiz == 0) return (0); read.uiop = uiop; read.fcp = np->n_rfc.refcnt > 0 ? &np->n_rfc : &np->n_wfc; while (tsiz > 0) { nfsstats.rpccnt[NFSPROC_READ]++; len = (tsiz > nmp->nm_rsize) ? nmp->nm_rsize : tsiz; read.off = uiop->uio_offset; read.maxcnt = len; nfs_v4initcompound(&cp); mreq = nfsm_reqhead(vp, NFSV4PROC_COMPOUND, 0); mb = mreq; bpos = mtod(mb, caddr_t); nfsm_v4build_compound(&cp, "nfs4_readrpc()"); nfsm_v4build_putfh(&cp, vp); nfsm_v4build_read(&cp, &read); nfsm_v4build_finalize(&cp); nfsm_request(vp, NFSV4PROC_COMPOUND, uiop->uio_td, cred); if (error != 0) { error = nfs_v4postop(&cp, error); goto nfsmout; } nfsm_v4dissect_compound(&cp); nfsm_v4dissect_putfh(&cp); nfsm_v4dissect_read(&cp, &read); if (read.eof || read.retlen == 0) tsiz = 0; else tsiz -= read.retlen; error = nfs_v4postop(&cp, error); m_freem(mrep); mrep = NULL; } nfsmout: if (mrep != NULL) m_freem(mrep); return (error); } /* * nfs write call */ int nfs4_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, int *iomode, int *must_commit) { int32_t backup; caddr_t bpos, dpos; struct mbuf *mreq, *mrep = NULL, *md, *mb; struct nfsmount *nmp = VFSTONFS(vp->v_mount); int error = 0, len, tsiz, wccflag = 1, rlen; struct nfs4_compound cp; struct nfs4_oparg_write write; nfsv4stablehow commit, committed = NSHFILESYNC; caddr_t verf; struct nfsnode *np = VTONFS(vp); #ifndef DIAGNOSTIC if (uiop->uio_iovcnt != 1) panic("nfs: writerpc iovcnt > 1"); #endif *must_commit = 0; tsiz = uiop->uio_resid; if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize) return (EFBIG); if (tsiz == 0) return (0); write.stable = (nfsv4stablehow)*iomode; write.uiop = uiop; write.fcp = &np->n_wfc; while (tsiz > 0) { nfsstats.rpccnt[NFSPROC_WRITE]++; len = (tsiz > nmp->nm_wsize) ? nmp->nm_wsize : tsiz; write.off = uiop->uio_offset; write.cnt = len; nfs_v4initcompound(&cp); mreq = nfsm_reqhead(vp, NFSV4PROC_COMPOUND, 0); mb = mreq; bpos = mtod(mb, caddr_t); nfsm_v4build_compound(&cp, "nfs4_writerpc()"); nfsm_v4build_putfh(&cp, vp); nfsm_v4build_write(&cp, &write); nfsm_v4build_finalize(&cp); nfsm_request(vp, NFSV4PROC_COMPOUND, uiop->uio_td, cred); if (error != 0) { error = nfs_v4postop(&cp, error); goto nfsmout; } nfsm_v4dissect_compound(&cp); nfsm_v4dissect_putfh(&cp); nfsm_v4dissect_write(&cp, &write); rlen = write.retlen; if (rlen == 0) { error = NFSERR_IO; break; } else if (rlen < len) { backup = len - rlen; (char *)uiop->uio_iov->iov_base -= backup; uiop->uio_iov->iov_len += backup; uiop->uio_offset -= backup; uiop->uio_resid += backup; len = rlen; } commit = write.committed; if (committed == NSHFILESYNC || (committed = NSHDATASYNC && commit == NSHUNSTABLE)) committed = commit; verf = (caddr_t)write.wverf; if ((nmp->nm_flag & NFSSTA_HASWRITEVERF) == 0) { bcopy(verf, nmp->nm_verf, NFSX_V4VERF); nmp->nm_flag |= NFSMNT_HASWRITEVERF; } else if (bcmp(verf, nmp->nm_verf, NFSX_V4VERF)) { *must_commit = 1; bcopy(verf, nmp->nm_verf, NFSX_V4VERF); } /* XXX wccflag */ if (wccflag) VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.va_mtime.tv_sec; error = nfs_v4postop(&cp, error); m_freem(mrep); mrep = NULL; if (error) break; tsiz -= len; } nfsmout: if (mrep != NULL) m_freem(mrep); *iomode = committed; if (error) uiop->uio_resid = tsiz; return (error); } /* ARGSUSED */ static int nfs4_mknod(struct vop_mknod_args *ap) { struct vattr *vap = ap->a_vap; struct vnode *newvp = NULL; int error; error = nfs4_createrpc(ap->a_dvp, &newvp, ap->a_cnp, (nfstype)vap->va_type, vap, NULL); /* XXX - is this actually referenced here? */ if (error == 0) { *ap->a_vpp = newvp; vrele(newvp); } return (error); } static int nfs4_createrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, nfstype ftype, struct vattr *vap, char *linktarget) { struct nfsnode *dnp = VTONFS(dvp); struct nfsnode *np = NULL; struct vnode *newvp = NULL; struct nfs4_compound cp; struct nfs4_oparg_create c; struct nfs4_oparg_getattr ga; struct nfs4_oparg_getfh gfh; caddr_t bpos, dpos; struct mbuf *mreq, *mrep = NULL, *md, *mb; int error = 0; nfsstats.rpccnt[NFSPROC_CREATE]++; mreq = nfsm_reqhead(dvp, NFSV4PROC_COMPOUND, 0); mb = mreq; bpos = mtod(mb, caddr_t); bzero(&c, sizeof(c)); bzero(&ga, sizeof(ga)); c.type = ftype; c.vap = vap; c.linktext = linktarget; c.name = cnp->cn_nameptr; c.namelen = cnp->cn_namelen; ga.bm = &nfsv4_getattrbm; nfs_v4initcompound(&cp); nfsm_v4build_compound(&cp, "nfs4_createrpc()"); nfsm_v4build_putfh(&cp, dvp); nfsm_v4build_create(&cp, &c); nfsm_v4build_getattr(&cp, &ga); nfsm_v4build_getfh(&cp, &gfh); nfsm_v4build_finalize(&cp); nfsm_request(dvp, NFSV4PROC_COMPOUND, cnp->cn_thread, cnp->cn_cred); if (error != 0) goto nfsmout; nfsm_v4dissect_compound(&cp); nfsm_v4dissect_putfh(&cp); nfsm_v4dissect_create(&cp, &c); nfsm_v4dissect_getattr(&cp, &ga); nfsm_v4dissect_getfh(&cp, &gfh); error = nfs_nget(dvp->v_mount, &gfh.fh_val, gfh.fh_len, &np); if (error != 0) goto nfsmout; newvp = NFSTOV(np); nfs4_vnop_loadattrcache(newvp, &ga.fa, NULL); if (cnp->cn_flags & MAKEENTRY) cache_enter(dvp, newvp, cnp); dnp->n_flag |= NMODIFIED; dnp->n_attrstamp = 0; nfsmout: error = nfs_v4postop(&cp, error); if (mrep != NULL) m_freem(mrep); /* XXX */ /*FREE(cnp->cn_pnbuf, M_NAMEI);*/ if (error != 0 && newvp != NULL) vrele(newvp); else if (error == 0) *vpp = newvp; return (error); } static int nfs4_renamerpc(struct vnode *fdvp, const char *fnameptr, int fnamelen, struct vnode *tdvp, const char *tnameptr, int tnamelen, struct ucred *cred, struct thread *td) { struct nfsnode *fnp = VTONFS(fdvp), *tnp = VTONFS(tdvp); caddr_t bpos, dpos; struct mbuf *mreq, *mrep = NULL, *md, *mb; struct nfs4_compound cp; struct nfs4_oparg_rename r; int error = 0; nfsstats.rpccnt[NFSPROC_RENAME]++; r.fname = fnameptr; r.fnamelen = fnamelen; r.tname = tnameptr; r.tnamelen = tnamelen; nfs_v4initcompound(&cp); mreq = nfsm_reqhead(fdvp, NFSV4PROC_COMPOUND, 0); mb = mreq; bpos = mtod(mb, caddr_t); nfsm_v4build_compound(&cp, "nfs4_renamerpc()"); nfsm_v4build_putfh(&cp, fdvp); nfsm_v4build_savefh(&cp); nfsm_v4build_putfh(&cp, tdvp); nfsm_v4build_rename(&cp, &r); nfsm_v4build_finalize(&cp); nfsm_request(fdvp, NFSV4PROC_COMPOUND, td, cred); if (error != 0) goto nfsmout; nfsm_v4dissect_compound(&cp); nfsm_v4dissect_putfh(&cp); nfsm_v4dissect_savefh(&cp); nfsm_v4dissect_putfh(&cp); nfsm_v4dissect_rename(&cp); /* XXX should this always be performed? */ fnp->n_flag |= NMODIFIED; tnp->n_flag |= NMODIFIED; fnp->n_attrstamp = tnp->n_attrstamp = 0; nfsmout: error = nfs_v4postop(&cp, error); if (mrep != NULL) m_freem(mrep); return (error); } /* * nfs file create call */ static int nfs4_create(struct vop_create_args *ap) { struct vnode *dvp = ap->a_dvp; struct vattr *vap = ap->a_vap; struct nfsnode *dnp = VTONFS(dvp); struct componentname *cnp = ap->a_cnp; struct vnode *newvp = NULL; int error = 0, fmode = (O_CREAT | FREAD | FWRITE); struct vattr vattr; if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_thread)) != 0) return (error); if (vap->va_vaflags & VA_EXCLUSIVE) fmode |= O_EXCL; error = nfs4_openrpc(dvp, &newvp, cnp, fmode, vap); if (error != 0) goto out; VTONFS(newvp)->n_flag |= NCREATED; if (cnp->cn_flags & MAKEENTRY) cache_enter(dvp, newvp, cnp); *ap->a_vpp = newvp; dnp->n_flag |= NMODIFIED; dnp->n_attrstamp = 0; /* XXX; wccflag */ out: return (error); } /* * nfs file remove call * To try and make nfs semantics closer to ufs semantics, a file that has * other processes using the vnode is renamed instead of removed and then * removed later on the last close. * - If v_usecount > 1 * If a rename is not already in the works * call nfs4_sillyrename() to set it up * else * do the remove rpc */ static int nfs4_remove(struct vop_remove_args *ap) { struct vnode *vp = ap->a_vp; struct vnode *dvp = ap->a_dvp; struct componentname *cnp = ap->a_cnp; struct nfsnode *np = VTONFS(vp); int error = 0; struct vattr vattr; #ifndef DIAGNOSTIC if ((cnp->cn_flags & HASBUF) == 0) panic("nfs4_remove: no name"); if (vrefcnt(vp) < 1) panic("nfs4_remove: bad v_usecount"); #endif if (vp->v_type == VDIR) error = EPERM; else if (vrefcnt(vp) == 1 || (np->n_sillyrename && VOP_GETATTR(vp, &vattr, cnp->cn_cred, cnp->cn_thread) == 0 && vattr.va_nlink > 1)) { /* * Purge the name cache so that the chance of a lookup for * the name succeeding while the remove is in progress is * minimized. Without node locking it can still happen, such * that an I/O op returns ESTALE, but since you get this if * another host removes the file.. */ cache_purge(vp); /* * throw away biocache buffers, mainly to avoid * unnecessary delayed writes later. */ error = nfs_vinvalbuf(vp, 0, cnp->cn_cred, cnp->cn_thread, 1); /* Do the rpc */ if (error != EINTR) error = nfs4_removerpc(dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread); /* * Kludge City: If the first reply to the remove rpc is lost.. * the reply to the retransmitted request will be ENOENT * since the file was in fact removed * Therefore, we cheat and return success. */ if (error == ENOENT) error = 0; } else if (!np->n_sillyrename) error = nfs4_sillyrename(dvp, vp, cnp); np->n_attrstamp = 0; return (error); } /* * nfs file remove rpc called from nfs_inactive */ int nfs4_removeit(struct sillyrename *sp) { /* * Make sure that the directory vnode is still valid. * XXX we should lock sp->s_dvp here. */ if (sp->s_dvp->v_type == VBAD) return (0); return (nfs4_removerpc(sp->s_dvp, sp->s_name, sp->s_namlen, sp->s_cred, NULL)); } /* * Nfs remove rpc, called from nfs4_remove() and nfs4_removeit(). */ static int nfs4_removerpc(struct vnode *dvp, const char *name, int namelen, struct ucred *cred, struct thread *td) { caddr_t bpos, dpos; int error = 0; struct mbuf *mreq, *mrep = NULL, *md, *mb; struct nfs4_compound cp; nfsstats.rpccnt[NFSPROC_REMOVE]++; mreq = nfsm_reqhead(dvp, NFSV4PROC_COMPOUND, 0); mb = mreq; bpos = mtod(mb, caddr_t); nfs_v4initcompound(&cp); nfsm_v4build_compound(&cp, "nfs4_removerpc()"); nfsm_v4build_putfh(&cp, dvp); nfsm_v4build_remove(&cp, name, namelen); nfsm_v4build_finalize(&cp); nfsm_request(dvp, NFSV4PROC_COMPOUND, td, cred); if (error != 0) goto nfsmout; nfsm_v4dissect_compound(&cp); nfsm_v4dissect_putfh(&cp); nfsm_v4dissect_remove(&cp); nfsmout: error = nfs_v4postop(&cp, error); if (mrep != NULL) m_freem(mrep); VTONFS(dvp)->n_flag |= NMODIFIED; VTONFS(dvp)->n_attrstamp = 0; /* XXX wccflag */ return (error); } /* * nfs file rename call */ static int nfs4_rename(struct vop_rename_args *ap) { struct vnode *fvp = ap->a_fvp; struct vnode *tvp = ap->a_tvp; struct vnode *fdvp = ap->a_fdvp; struct vnode *tdvp = ap->a_tdvp; struct componentname *tcnp = ap->a_tcnp; struct componentname *fcnp = ap->a_fcnp; int error; #ifndef DIAGNOSTIC if ((tcnp->cn_flags & HASBUF) == 0 || (fcnp->cn_flags & HASBUF) == 0) panic("nfs4_rename: no name"); #endif /* Check for cross-device rename */ if ((fvp->v_mount != tdvp->v_mount) || (tvp && (fvp->v_mount != tvp->v_mount))) { error = EXDEV; goto out; } if (fvp == tvp) { printf("nfs4_rename: fvp == tvp (can't happen)\n"); error = 0; goto out; } if ((error = vn_lock(fvp, LK_EXCLUSIVE, fcnp->cn_thread)) != 0) goto out; /* * We have to flush B_DELWRI data prior to renaming * the file. If we don't, the delayed-write buffers * can be flushed out later after the file has gone stale * under NFSV3. NFSV2 does not have this problem because * ( as far as I can tell ) it flushes dirty buffers more * often. */ VOP_FSYNC(fvp, fcnp->cn_cred, MNT_WAIT, fcnp->cn_thread); VOP_UNLOCK(fvp, 0, fcnp->cn_thread); if (tvp) VOP_FSYNC(tvp, tcnp->cn_cred, MNT_WAIT, tcnp->cn_thread); /* * If the tvp exists and is in use, sillyrename it before doing the * rename of the new file over it. * XXX Can't sillyrename a directory. */ if (tvp && vrefcnt(tvp) > 1 && !VTONFS(tvp)->n_sillyrename && tvp->v_type != VDIR && !nfs4_sillyrename(tdvp, tvp, tcnp)) { vput(tvp); tvp = NULL; } error = nfs4_renamerpc(fdvp, fcnp->cn_nameptr, fcnp->cn_namelen, tdvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred, tcnp->cn_thread); if (fvp->v_type == VDIR) { if (tvp != NULL && tvp->v_type == VDIR) cache_purge(tdvp); cache_purge(fdvp); } out: if (tdvp == tvp) vrele(tdvp); else vput(tdvp); if (tvp) vput(tvp); vrele(fdvp); vrele(fvp); /* * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry. */ if (error == ENOENT) error = 0; return (error); } /* * nfs file rename rpc called from nfs4_remove() above */ static int nfs4_renameit(struct vnode *sdvp, struct componentname *scnp, struct sillyrename *sp) { return (nfs4_renamerpc(sdvp, scnp->cn_nameptr, scnp->cn_namelen, sdvp, sp->s_name, sp->s_namlen, scnp->cn_cred, scnp->cn_thread)); } /* * nfs hard link create call */ static int nfs4_link(struct vop_link_args *ap) { struct vnode *vp = ap->a_vp; struct vnode *tdvp = ap->a_tdvp; struct componentname *cnp = ap->a_cnp; caddr_t bpos, dpos; int error = 0; struct mbuf *mreq, *mrep = NULL, *md, *mb; struct nfs4_compound cp; struct nfs4_oparg_link l; if (vp->v_mount != tdvp->v_mount) { return (EXDEV); } /* * Push all writes to the server, so that the attribute cache * doesn't get "out of sync" with the server. * XXX There should be a better way! */ VOP_FSYNC(vp, cnp->cn_cred, MNT_WAIT, cnp->cn_thread); nfsstats.rpccnt[NFSPROC_LINK]++; l.name = cnp->cn_nameptr; l.namelen = cnp->cn_namelen; nfs_v4initcompound(&cp); mreq = nfsm_reqhead(vp, NFSV4PROC_COMPOUND, 0); mb = mreq; bpos = mtod(mb, caddr_t); nfsm_v4build_compound(&cp, "nfs4_link()"); nfsm_v4build_putfh(&cp, vp); nfsm_v4build_savefh(&cp); nfsm_v4build_putfh(&cp, tdvp); nfsm_v4build_link(&cp, &l); nfsm_v4build_finalize(&cp); nfsm_request(vp, NFSV4PROC_COMPOUND, cnp->cn_thread, cnp->cn_cred); if (error != 0) goto nfsmout; nfsm_v4dissect_compound(&cp); nfsm_v4dissect_putfh(&cp); nfsm_v4dissect_savefh(&cp); nfsm_v4dissect_putfh(&cp); nfsm_v4dissect_link(&cp); VTONFS(tdvp)->n_flag |= NMODIFIED; VTONFS(vp)->n_attrstamp = 0; VTONFS(tdvp)->n_attrstamp = 0; nfsmout: error = nfs_v4postop(&cp, error); if (mrep != NULL) m_freem(mrep); return (error); } /* * nfs symbolic link create call */ static int nfs4_symlink(struct vop_symlink_args *ap) { struct vnode *dvp = ap->a_dvp; int error = 0; struct vnode *newvp = NULL; nfsstats.rpccnt[NFSPROC_SYMLINK]++; error = nfs4_createrpc(ap->a_dvp, &newvp, ap->a_cnp, NFLNK, ap->a_vap, ap->a_target); if (error != 0 && newvp != NULL) vput(newvp); else if (error == 0) *ap->a_vpp = newvp; VTONFS(dvp)->n_flag |= NMODIFIED; VTONFS(dvp)->n_attrstamp = 0; /* XXX wccflags */ return (error); } /* * nfs make dir call */ static int nfs4_mkdir(struct vop_mkdir_args *ap) { return (nfs4_createrpc(ap->a_dvp, ap->a_vpp, ap->a_cnp, NFDIR, ap->a_vap, NULL)); } /* * nfs remove directory call */ static int nfs4_rmdir(struct vop_rmdir_args *ap) { struct vnode *vp = ap->a_vp; struct vnode *dvp = ap->a_dvp; struct nfsnode *dnp = VTONFS(dvp); struct componentname *cnp = ap->a_cnp; int error = 0; if (dvp == vp) return (EINVAL); error = (nfs4_removerpc(dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, NULL)); if (error) return (error); dnp->n_flag |= NMODIFIED; dnp->n_attrstamp = 0; cache_purge(dvp); cache_purge(vp); return (error); } /* * nfs readdir call */ static int nfs4_readdir(struct vop_readdir_args *ap) { struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); struct uio *uio = ap->a_uio; int tresid, error; struct vattr vattr; if (vp->v_type != VDIR) return (EPERM); /* * First, check for hit on the EOF offset cache */ if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset && (np->n_flag & NMODIFIED) == 0) { if (VOP_GETATTR(vp, &vattr, ap->a_cred, uio->uio_td) == 0 && np->n_mtime == vattr.va_mtime.tv_sec) { nfsstats.direofcache_hits++; return (0); } } /* * Call nfs_bioread() to do the real work. */ tresid = uio->uio_resid; error = nfs_bioread(vp, uio, 0, ap->a_cred); if (!error && uio->uio_resid == tresid) nfsstats.direofcache_misses++; return (error); } static u_char fty_to_dty[] = { DT_UNKNOWN, /* NFNON */ DT_REG, /* NFREG */ DT_DIR, /* NFDIR */ DT_BLK, /* NFBLK */ DT_CHR, /* NFCHR */ DT_LNK, /* NFLNK */ DT_SOCK, /* NFSOCK */ DT_FIFO, /* NFFIFO */ DT_UNKNOWN, /* NFATTRDIT */ DT_UNKNOWN, /* NFNAMEDATTR */ DT_UNKNOWN, /* NFBAD */ }; /* * Readdir rpc call. * Called from below the buffer cache by nfs_doio(). */ int nfs4_readdirrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) { int len, left; struct dirent *dp = NULL; u_int32_t *tl; caddr_t p; uint64_t *cookiep; caddr_t bpos, dpos; struct mbuf *mreq, *mrep = NULL, *md, *mb; uint64_t cookie; struct nfsmount *nmp = VFSTONFS(vp->v_mount); struct nfsnode *dnp = VTONFS(vp); int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1; struct nfs4_compound cp; struct nfs4_oparg_readdir readdir; struct nfsv4_fattr fattr; u_int fty; #ifndef DIAGNOSTIC if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) || (uiop->uio_resid & (DIRBLKSIZ - 1))) panic("nfs readdirrpc bad uio"); #endif /* * If there is no cookie, assume directory was stale. */ cookiep = nfs4_getcookie(dnp, uiop->uio_offset, 0); if (cookiep) cookie = *cookiep; else return (NFSERR_BAD_COOKIE); /* Generate fake entries for "." and ".." */ while (cookie < 2 && bigenough) { cookie++; len = 4 + DIRHDSIZ; if (len > uiop->uio_resid) { bigenough = 0; break; } dp = (struct dirent *)uiop->uio_iov->iov_base; dp->d_namlen = cookie; dp->d_reclen = len; dp->d_type = DT_DIR; if (cookie == 1) dp->d_fileno = dnp->n_vattr.va_fileid; /* XXX has problems with pynfs virtualhandles */ else dp->d_fileno = dnp->n_dvp != NULL ? VTONFS(dnp->n_dvp)->n_vattr.va_fileid : cookie; p = dp->d_name; *p++ = '.'; if (cookie == 2) *p++ = '.'; *p = '\0'; blksiz += len; if (blksiz == DIRBLKSIZ) blksiz = 0; uiop->uio_offset += len; uiop->uio_resid -= len; (char *)uiop->uio_iov->iov_base += len; uiop->uio_iov->iov_len -= len; } if (cookie == 2) cookie = 0; /* This is sort of ugly, to prevent v4postop() from acting weird */ bzero(&cp, sizeof(cp)); /* * Loop around doing readdir rpc's of size nm_readdirsize * truncated to a multiple of DIRBLKSIZ. * The stopping criteria is EOF or buffer full. */ /* * XXX this is sort of ugly for nfsv4; we don't maintain the * strict abstraction, but do the decoding inline. that's ok. */ while (more_dirs && bigenough) { nfsstats.rpccnt[NFSPROC_READDIR]++; mreq = nfsm_reqhead(vp, NFSV4PROC_COMPOUND, 0); mb = mreq; bpos = mtod(mb, caddr_t); readdir.cnt = nmp->nm_readdirsize; readdir.cookie = cookie; readdir.bm = &nfsv4_readdirbm; if (cookie == 0) bzero(&readdir.verf, sizeof(readdir.verf)); else bcopy(&dnp->n_cookieverf, &readdir.verf, sizeof(readdir.verf)); nfs_v4initcompound(&cp); nfsm_v4build_compound(&cp, "nfs4_readdirrpc()"); nfsm_v4build_putfh(&cp, vp); nfsm_v4build_readdir(&cp, &readdir); nfsm_v4build_finalize(&cp); nfsm_request(vp, NFSV4PROC_COMPOUND, uiop->uio_td, cred); if (error != 0) goto nfsmout; nfsm_v4dissect_compound(&cp); nfsm_v4dissect_putfh(&cp); /* * XXX - Readdir gets handled inline like in * NFSv{2,3}. This is a nasty inconsistency and * should be fixed. */ tl = nfsm_dissect(uint32_t *, 5 * NFSX_UNSIGNED); if (fxdr_unsigned(uint32_t, *tl++) != NFSV4OP_READDIR) { error = EBADRPC; goto nfsmout; } if (fxdr_unsigned(uint32_t, *tl++) != 0) { error = EBADRPC; goto nfsmout; } bcopy(tl, &dnp->n_cookieverf, NFSX_V4VERF); tl += 2; more_dirs = fxdr_unsigned(int, *tl++); /* loop thru the dir entries, doctoring them to 4bsd form */ while (more_dirs && bigenough) { tl = nfsm_dissect(uint32_t *, 3 * NFSX_UNSIGNED); cookie = fxdr_hyper(tl); tl += 2; /* XXX cookie sanity check */ len = fxdr_unsigned(int, *tl++); if (len <= 0 || len > NFS_MAXNAMLEN) { error = EBADRPC; goto nfsmout; } tlen = nfsm_rndup(len); if (tlen == len) tlen += 4; /* To ensure null termination */ left = DIRBLKSIZ - blksiz; if ((tlen + DIRHDSIZ) > left) { dp->d_reclen += left; uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base + left; uiop->uio_iov->iov_len -= left; uiop->uio_offset += left; uiop->uio_resid -= left; blksiz = 0; } if ((tlen + DIRHDSIZ) > uiop->uio_resid) bigenough = 0; if (bigenough) { dp = (struct dirent *)uiop->uio_iov->iov_base; dp->d_namlen = len; dp->d_reclen = tlen + DIRHDSIZ; blksiz += dp->d_reclen; if (blksiz == DIRBLKSIZ) blksiz = 0; uiop->uio_offset += DIRHDSIZ; uiop->uio_resid -= DIRHDSIZ; uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base + DIRHDSIZ; uiop->uio_iov->iov_len -= DIRHDSIZ; /* Copy name */ nfsm_mtouio(uiop, len); p = uiop->uio_iov->iov_base; tlen -= len; *p = '\0'; /* null terminate */ /* printf("nfs4_readdirrpc: name: \"%s\" cookie %d\n", p - len, (int) cookie);*/ uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base + tlen; uiop->uio_iov->iov_len -= tlen; uiop->uio_offset += tlen; uiop->uio_resid -= tlen; /* Copy attributes */ nfsm_v4dissect_attrs(&fattr); dp->d_fileno = nfs_v4fileid4_to_fileid( fattr.fa4_valid & FA4V_FILEID && fattr.fa4_fileid ? fattr.fa4_fileid : cookie); fty = (u_int)fattr.fa4_type; dp->d_type = fattr.fa4_valid & FA4V_TYPE && (fty < sizeof(fty_to_dty)) ? fty_to_dty[fty] : DT_UNKNOWN; } else nfsm_adv(nfsm_rndup(len)); tl = nfsm_dissect(uint32_t *, NFSX_UNSIGNED); more_dirs = fxdr_unsigned(int, *tl++); } /* * If at end of rpc data, get the eof boolean */ if (!more_dirs) { tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); more_dirs = (fxdr_unsigned(int, *tl) == 0); } error = nfs_v4postop(&cp, error); m_freem(mrep); mrep = NULL; } /* * Fill last record, iff any, out to a multiple of DIRBLKSIZ * by increasing d_reclen for the last record. */ if (blksiz > 0) { left = DIRBLKSIZ - blksiz; dp->d_reclen += left; uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base + left; uiop->uio_iov->iov_len -= left; uiop->uio_offset += left; uiop->uio_resid -= left; } /* * We are now either at the end of the directory or have filled the * block. */ if (bigenough) dnp->n_direofoffset = uiop->uio_offset; else { if (uiop->uio_resid > 0) printf("EEK! readdirrpc resid > 0\n"); cookiep = nfs4_getcookie(dnp, uiop->uio_offset, 1); *cookiep = cookie; } nfsmout: if (mrep != NULL) m_freem(mrep); return (error); } /* * Silly rename. To make the NFS filesystem that is stateless look a little * more like the "ufs" a remove of an active vnode is translated to a rename * to a funny looking filename that is removed by nfs_inactive on the * nfsnode. There is the potential for another process on a different client * to create the same funny name between the nfs_lookitup() fails and the * nfs_rename() completes, but... */ static int nfs4_sillyrename(struct vnode *dvp, struct vnode *vp, struct componentname *cnp) { struct sillyrename *sp; struct nfsnode *np; int error; short pid; cache_purge(dvp); np = VTONFS(vp); #ifndef DIAGNOSTIC if (vp->v_type == VDIR) panic("nfs: sillyrename dir"); #endif MALLOC(sp, struct sillyrename *, sizeof (struct sillyrename), M_NFSREQ, M_WAITOK); sp->s_cred = crhold(cnp->cn_cred); sp->s_dvp = dvp; sp->s_removeit = nfs4_removeit; VREF(dvp); /* Fudge together a funny name */ pid = cnp->cn_thread->td_proc->p_pid; sp->s_namlen = sprintf(sp->s_name, ".nfsA%04x4.4", pid); /* Try lookitups until we get one that isn't there */ while (nfs4_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, cnp->cn_thread, NULL) == 0) { sp->s_name[4]++; if (sp->s_name[4] > 'z') { error = EINVAL; goto bad; } } error = nfs4_renameit(dvp, cnp, sp); if (error) goto bad; error = nfs4_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, cnp->cn_thread, &np); np->n_sillyrename = sp; return (0); bad: vrele(sp->s_dvp); crfree(sp->s_cred); free((caddr_t)sp, M_NFSREQ); return (error); } /* * Look up a file name and optionally either update the file handle or * allocate an nfsnode, depending on the value of npp. * npp == NULL --> just do the lookup * *npp == NULL --> allocate a new nfsnode and make sure attributes are * handled too * *npp != NULL --> update the file handle in the vnode */ static int nfs4_lookitup(struct vnode *dvp, const char *name, int len, struct ucred *cred, struct thread *td, struct nfsnode **npp) { struct vnode *newvp = NULL; struct nfsnode *np, *dnp = VTONFS(dvp); caddr_t bpos, dpos; int error = 0, fhlen; struct mbuf *mreq, *mrep = NULL, *md, *mb; nfsfh_t *nfhp; struct nfs4_compound cp; struct nfs4_oparg_lookup l; struct nfs4_oparg_getfh gfh; struct nfs4_oparg_getattr ga; nfsstats.rpccnt[NFSPROC_RENAME]++; mreq = nfsm_reqhead(dvp, NFSV4PROC_COMPOUND, 0); mb = mreq; bpos = mtod(mb, caddr_t); l.name = name; l.namelen = len; nfs_v4initcompound(&cp); ga.bm = &nfsv4_getattrbm; nfsm_v4build_compound(&cp, "nfs4_renamerpc()"); nfsm_v4build_putfh(&cp, dvp); nfsm_v4build_lookup(&cp, &l); nfsm_v4build_getfh(&cp, &gfh); nfsm_v4build_getattr(&cp, &ga); nfsm_request(dvp, NFSV4PROC_COMPOUND, td, cred); if (error != 0) goto nfsmout; nfsm_v4dissect_compound(&cp); nfsm_v4dissect_putfh(&cp); nfsm_v4dissect_lookup(&cp); nfsm_v4dissect_getfh(&cp, &gfh); nfsm_v4dissect_getattr(&cp, &ga); if (npp != NULL && error == 0) { nfhp = &gfh.fh_val; fhlen = gfh.fh_len; if (*npp != NULL) { np = *npp; if (np->n_fhsize > NFS_SMALLFH && fhlen <= NFS_SMALLFH) { free((caddr_t)np->n_fhp, M_NFSBIGFH); np->n_fhp = &np->n_fh; } else if (np->n_fhsize <= NFS_SMALLFH && fhlen>NFS_SMALLFH) np->n_fhp =(nfsfh_t *)malloc(fhlen, M_NFSBIGFH, M_WAITOK); bcopy((caddr_t)nfhp, (caddr_t)np->n_fhp, fhlen); np->n_fhsize = fhlen; newvp = NFSTOV(np); } else if (NFS_CMPFH(dnp, nfhp, fhlen)) { VREF(dvp); newvp = dvp; } else { error = nfs_nget(dvp->v_mount, nfhp, fhlen, &np); if (error) { m_freem(mrep); return (error); } newvp = NFSTOV(np); } if (newvp != dvp) { np->n_dvp = dvp; np->n_namelen = len; if (np->n_name != NULL) FREE(np->n_name, M_NFSREQ); MALLOC(np->n_name, u_char *, np->n_namelen + 1, M_NFSREQ, M_WAITOK); memcpy(np->n_name, name, len); np->n_name[len] = '\0'; } nfs4_vnop_loadattrcache(newvp, &ga.fa, NULL); } nfsmout: error = nfs_v4postop(&cp, error); if (mrep != NULL) m_freem(mrep); if (npp && *npp == NULL) { if (error) { if (newvp) { if (newvp == dvp) vrele(newvp); else vput(newvp); } } else *npp = np; } return (error); } /* * Nfs Version 3 commit rpc */ int nfs4_commit(struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred, struct thread *td) { struct nfsmount *nmp = VFSTONFS(vp->v_mount); caddr_t bpos, dpos; int error = 0; struct mbuf *mreq, *mrep = NULL, *md, *mb; struct nfs4_compound cp; struct nfs4_oparg_commit commit; if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0) return (0); nfsstats.rpccnt[NFSPROC_COMMIT]++; mreq = nfsm_reqhead(vp, NFSV4PROC_COMPOUND, 0); mb = mreq; bpos = mtod(mb, caddr_t); commit.start = offset; commit.len = cnt; nfs_v4initcompound(&cp); nfsm_v4build_compound(&cp, "nfs4_commit()"); nfsm_v4build_putfh(&cp, vp); nfsm_v4build_commit(&cp, &commit); nfsm_v4build_finalize(&cp); nfsm_request(vp, NFSV4PROC_COMPOUND, td, cred); if (error != 0) goto nfsmout; nfsm_v4dissect_compound(&cp); nfsm_v4dissect_putfh(&cp); nfsm_v4dissect_commit(&cp, &commit); /* XXX */ /* nfsm_wcc_data(vp, wccflag);*/ if (bcmp(nmp->nm_verf, commit.verf, NFSX_V4VERF)) { bcopy(commit.verf, nmp->nm_verf, NFSX_V4VERF); error = NFSERR_STALEWRITEVERF; } nfsmout: error = nfs_v4postop(&cp, error); if (mrep == NULL) m_freem(mrep); return (error); } /* * Strategy routine. * For async requests when nfsiod(s) are running, queue the request by * calling nfs_asyncio(), otherwise just all nfs_doio() to do the * request. */ static int nfs4_strategy(struct vop_strategy_args *ap) { struct buf *bp = ap->a_bp; struct ucred *cr; struct thread *td; int error = 0; KASSERT(ap->a_vp == ap->a_bp->b_vp, ("%s(%p != %p)", __func__, ap->a_vp, ap->a_bp->b_vp)); KASSERT(!(bp->b_flags & B_DONE), ("nfs4_strategy: buffer %p unexpectedly marked B_DONE", bp)); KASSERT(BUF_REFCNT(bp) > 0, ("nfs4_strategy: buffer %p not locked", bp)); if (bp->b_flags & B_ASYNC) td = NULL; else td = curthread; /* XXX */ if (bp->b_iocmd == BIO_READ) cr = bp->b_rcred; else cr = bp->b_wcred; /* * If the op is asynchronous and an i/o daemon is waiting * queue the request, wake it up and wait for completion * otherwise just do it ourselves. */ if ((bp->b_flags & B_ASYNC) == 0 || nfs_asyncio(bp, NOCRED, td)) error = nfs_doio(bp, cr, td); return (error); } /* * fsync vnode op. Just call nfs4_flush() with commit == 1. */ /* ARGSUSED */ static int nfs4_fsync(struct vop_fsync_args *ap) { return (nfs4_flush(ap->a_vp, ap->a_cred, ap->a_waitfor, ap->a_td, 1)); } /* * Flush all the blocks associated with a vnode. * Walk through the buffer pool and push any dirty pages * associated with the vnode. */ static int nfs4_flush(struct vnode *vp, struct ucred *cred, int waitfor, struct thread *td, int commit) { struct nfsnode *np = VTONFS(vp); struct buf *bp; int i; struct buf *nbp; struct nfsmount *nmp = VFSTONFS(vp->v_mount); int s, error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos; int passone = 1; u_quad_t off, endoff, toff; struct ucred* wcred = NULL; struct buf **bvec = NULL; #ifndef NFS_COMMITBVECSIZ #define NFS_COMMITBVECSIZ 20 #endif struct buf *bvec_on_stack[NFS_COMMITBVECSIZ]; int bvecsize = 0, bveccount; if (nmp->nm_flag & NFSMNT_INT) slpflag = PCATCH; if (!commit) passone = 0; /* * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the * server, but nas not been committed to stable storage on the server * yet. On the first pass, the byte range is worked out and the commit * rpc is done. On the second pass, nfs_writebp() is called to do the * job. */ again: off = (u_quad_t)-1; endoff = 0; bvecpos = 0; if (NFS_ISV3(vp) && commit) { s = splbio(); if (bvec != NULL && bvec != bvec_on_stack) free(bvec, M_TEMP); /* * Count up how many buffers waiting for a commit. */ bveccount = 0; VI_LOCK(vp); for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { nbp = TAILQ_NEXT(bp, b_vnbufs); if (BUF_REFCNT(bp) == 0 && (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) == (B_DELWRI | B_NEEDCOMMIT)) bveccount++; } /* * Allocate space to remember the list of bufs to commit. It is * important to use M_NOWAIT here to avoid a race with nfs4_write. * If we can't get memory (for whatever reason), we will end up * committing the buffers one-by-one in the loop below. */ if (bveccount > NFS_COMMITBVECSIZ) { /* * Release the vnode interlock to avoid a lock * order reversal. */ VI_UNLOCK(vp); bvec = (struct buf **) malloc(bveccount * sizeof(struct buf *), M_TEMP, M_NOWAIT); VI_LOCK(vp); if (bvec == NULL) { bvec = bvec_on_stack; bvecsize = NFS_COMMITBVECSIZ; } else bvecsize = bveccount; } else { bvec = bvec_on_stack; bvecsize = NFS_COMMITBVECSIZ; } for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { if (bvecpos >= bvecsize) break; if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { nbp = TAILQ_NEXT(bp, b_vnbufs); continue; } if ((bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) != (B_DELWRI | B_NEEDCOMMIT)) { BUF_UNLOCK(bp); nbp = TAILQ_NEXT(bp, b_vnbufs); continue; } VI_UNLOCK(vp); bremfree(bp); /* * Work out if all buffers are using the same cred * so we can deal with them all with one commit. * * NOTE: we are not clearing B_DONE here, so we have * to do it later on in this routine if we intend to * initiate I/O on the bp. * * Note: to avoid loopback deadlocks, we do not * assign b_runningbufspace. */ if (wcred == NULL) wcred = bp->b_wcred; else if (wcred != bp->b_wcred) wcred = NOCRED; bp->b_flags |= B_WRITEINPROG; vfs_busy_pages(bp, 1); VI_LOCK(vp); /* * bp is protected by being locked, but nbp is not * and vfs_busy_pages() may sleep. We have to * recalculate nbp. */ nbp = TAILQ_NEXT(bp, b_vnbufs); /* * A list of these buffers is kept so that the * second loop knows which buffers have actually * been committed. This is necessary, since there * may be a race between the commit rpc and new * uncommitted writes on the file. */ bvec[bvecpos++] = bp; toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff; if (toff < off) off = toff; toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff); if (toff > endoff) endoff = toff; } splx(s); VI_UNLOCK(vp); } if (bvecpos > 0) { /* * Commit data on the server, as required. * If all bufs are using the same wcred, then use that with * one call for all of them, otherwise commit each one * separately. */ if (wcred != NOCRED) retv = nfs4_commit(vp, off, (int)(endoff - off), wcred, td); else { retv = 0; for (i = 0; i < bvecpos; i++) { off_t off, size; bp = bvec[i]; off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff; size = (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff); retv = nfs4_commit(vp, off, (int)size, bp->b_wcred, td); if (retv) break; } } if (retv == NFSERR_STALEWRITEVERF) nfs_clearcommit(vp->v_mount); /* * Now, either mark the blocks I/O done or mark the * blocks dirty, depending on whether the commit * succeeded. */ for (i = 0; i < bvecpos; i++) { bp = bvec[i]; bp->b_flags &= ~(B_NEEDCOMMIT | B_WRITEINPROG | B_CLUSTEROK); if (retv) { /* * Error, leave B_DELWRI intact */ vfs_unbusy_pages(bp); brelse(bp); } else { /* * Success, remove B_DELWRI ( bundirty() ). * * b_dirtyoff/b_dirtyend seem to be NFS * specific. We should probably move that * into bundirty(). XXX */ s = splbio(); VI_LOCK(vp); vp->v_numoutput++; VI_UNLOCK(vp); bp->b_flags |= B_ASYNC; bundirty(bp); bp->b_flags &= ~B_DONE; bp->b_ioflags &= ~BIO_ERROR; bp->b_dirtyoff = bp->b_dirtyend = 0; splx(s); bufdone(bp); } } } /* * Start/do any write(s) that are required. */ loop: s = splbio(); VI_LOCK(vp); for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { nbp = TAILQ_NEXT(bp, b_vnbufs); if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { if (waitfor != MNT_WAIT || passone) continue; error = BUF_TIMELOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, VI_MTX(vp), "nfsfsync", slpflag, slptimeo); splx(s); if (error == 0) panic("nfs4_fsync: inconsistent lock"); if (error == ENOLCK) goto loop; if (nfs4_sigintr(nmp, NULL, td)) { error = EINTR; goto done; } if (slpflag == PCATCH) { slpflag = 0; slptimeo = 2 * hz; } goto loop; } if ((bp->b_flags & B_DELWRI) == 0) panic("nfs4_fsync: not dirty"); if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT)) { BUF_UNLOCK(bp); continue; } VI_UNLOCK(vp); bremfree(bp); if (passone || !commit) bp->b_flags |= B_ASYNC; else bp->b_flags |= B_ASYNC | B_WRITEINPROG; splx(s); BUF_WRITE(bp); goto loop; } splx(s); if (passone) { passone = 0; VI_UNLOCK(vp); goto again; } if (waitfor == MNT_WAIT) { while (vp->v_numoutput) { vp->v_iflag |= VI_BWAIT; error = msleep((caddr_t)&vp->v_numoutput, VI_MTX(vp), slpflag | (PRIBIO + 1), "nfsfsync", slptimeo); if (error) { VI_UNLOCK(vp); if (nfs4_sigintr(nmp, NULL, td)) { error = EINTR; goto done; } if (slpflag == PCATCH) { slpflag = 0; slptimeo = 2 * hz; } VI_LOCK(vp); } } if (!TAILQ_EMPTY(&vp->v_dirtyblkhd) && commit) { VI_UNLOCK(vp); goto loop; } } VI_UNLOCK(vp); if (np->n_flag & NWRITEERR) { error = np->n_error; np->n_flag &= ~NWRITEERR; } done: if (bvec != NULL && bvec != bvec_on_stack) free(bvec, M_TEMP); return (error); } /* * NFS advisory byte-level locks. */ static int nfs4_advlock(struct vop_advlock_args *ap) { return (EPERM); if ((VFSTONFS(ap->a_vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) { struct nfsnode *np = VTONFS(ap->a_vp); return (lf_advlock(ap, &(np->n_lockf), np->n_size)); } return (nfs_dolock(ap)); } /* * Print out the contents of an nfsnode. */ static int nfs4_print(struct vop_print_args *ap) { struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); printf("\tfileid %ld fsid 0x%x", np->n_vattr.va_fileid, np->n_vattr.va_fsid); if (vp->v_type == VFIFO) fifo_printinfo(vp); printf("\n"); return (0); } /* * This is the "real" nfs::bwrite(struct buf*). * B_WRITEINPROG isn't set unless the force flag is one and it * handles the B_NEEDCOMMIT flag. * We set B_CACHE if this is a VMIO buffer. */ int nfs4_writebp(struct buf *bp, int force, struct thread *td) { int s; int oldflags = bp->b_flags; #if 0 int retv = 1; off_t off; #endif if (BUF_REFCNT(bp) == 0) panic("bwrite: buffer is not locked???"); if (bp->b_flags & B_INVAL) { brelse(bp); return(0); } bp->b_flags |= B_CACHE; /* * Undirty the bp. We will redirty it later if the I/O fails. */ s = splbio(); bundirty(bp); bp->b_flags &= ~B_DONE; bp->b_ioflags &= ~BIO_ERROR; bp->b_iocmd = BIO_WRITE; VI_LOCK(bp->b_vp); bp->b_vp->v_numoutput++; VI_UNLOCK(bp->b_vp); curthread->td_proc->p_stats->p_ru.ru_oublock++; splx(s); /* * Note: to avoid loopback deadlocks, we do not * assign b_runningbufspace. */ vfs_busy_pages(bp, 1); if (force) bp->b_flags |= B_WRITEINPROG; BUF_KERNPROC(bp); bp->b_iooffset = dbtob(bp->b_blkno); VOP_STRATEGY(bp->b_vp, bp); if( (oldflags & B_ASYNC) == 0) { int rtval = bufwait(bp); if (oldflags & B_DELWRI) { s = splbio(); reassignbuf(bp, bp->b_vp); splx(s); } brelse(bp); return (rtval); } return (0); } /* * nfs special file access vnode op. * Essentially just get vattr and then imitate iaccess() since the device is * local to the client. */ static int nfsspec_access(struct vop_access_args *ap) { struct vattr *vap; struct ucred *cred = ap->a_cred; struct vnode *vp = ap->a_vp; mode_t mode = ap->a_mode; struct vattr vattr; int error; /* * Disallow write attempts on filesystems mounted read-only; * unless the file is a socket, fifo, or a block or character * device resident on the filesystem. */ if ((mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) { switch (vp->v_type) { case VREG: case VDIR: case VLNK: return (EROFS); default: break; } } vap = &vattr; error = VOP_GETATTR(vp, vap, cred, ap->a_td); if (error) return (error); return (vaccess(vp->v_type, vap->va_mode, vap->va_uid, vap->va_gid, mode, cred, NULL)); } /* * Read wrapper for special devices. */ static int nfsspec_read(struct vop_read_args *ap) { struct nfsnode *np = VTONFS(ap->a_vp); /* * Set access flag. */ np->n_flag |= NACC; getnanotime(&np->n_atim); return (VOCALL(spec_vnodeop_p, VOFFSET(vop_read), ap)); } /* * Write wrapper for special devices. */ static int nfsspec_write(struct vop_write_args *ap) { struct nfsnode *np = VTONFS(ap->a_vp); /* * Set update flag. */ np->n_flag |= NUPD; getnanotime(&np->n_mtim); return (VOCALL(spec_vnodeop_p, VOFFSET(vop_write), ap)); } /* * Close wrapper for special devices. * * Update the times on the nfsnode then do device close. */ static int nfsspec_close(struct vop_close_args *ap) { struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); struct vattr vattr; if (np->n_flag & (NACC | NUPD)) { np->n_flag |= NCHG; if (vrefcnt(vp) == 1 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { VATTR_NULL(&vattr); if (np->n_flag & NACC) vattr.va_atime = np->n_atim; if (np->n_flag & NUPD) vattr.va_mtime = np->n_mtim; (void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_td); } } return (VOCALL(spec_vnodeop_p, VOFFSET(vop_close), ap)); } /* * Read wrapper for fifos. */ static int nfsfifo_read(struct vop_read_args *ap) { struct nfsnode *np = VTONFS(ap->a_vp); /* * Set access flag. */ np->n_flag |= NACC; getnanotime(&np->n_atim); return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_read), ap)); } /* * Write wrapper for fifos. */ static int nfsfifo_write(struct vop_write_args *ap) { struct nfsnode *np = VTONFS(ap->a_vp); /* * Set update flag. */ np->n_flag |= NUPD; getnanotime(&np->n_mtim); return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_write), ap)); } /* * Close wrapper for fifos. * * Update the times on the nfsnode then do fifo close. */ static int nfsfifo_close(struct vop_close_args *ap) { struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); struct vattr vattr; struct timespec ts; if (np->n_flag & (NACC | NUPD)) { getnanotime(&ts); if (np->n_flag & NACC) np->n_atim = ts; if (np->n_flag & NUPD) np->n_mtim = ts; np->n_flag |= NCHG; if (vrefcnt(vp) == 1 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { VATTR_NULL(&vattr); if (np->n_flag & NACC) vattr.va_atime = np->n_atim; if (np->n_flag & NUPD) vattr.va_mtime = np->n_mtim; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_td); (void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_td); VOP_UNLOCK(vp, 0, ap->a_td); } } return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_close), ap)); }