diff --git a/sys/fs/unionfs/union.h b/sys/fs/unionfs/union.h index 1fc5f996cab0..02a897101472 100644 --- a/sys/fs/unionfs/union.h +++ b/sys/fs/unionfs/union.h @@ -49,8 +49,8 @@ struct union_args { #define UNMNT_OPMASK 0x0003 struct union_mount { - struct vnode *um_uppervp; - struct vnode *um_lowervp; + struct vnode *um_uppervp; /* UN_ULOCK holds locking state */ + struct vnode *um_lowervp; /* Left unlocked */ struct ucred *um_cred; /* Credentials of user calling mount */ int um_cmode; /* cmask from mount process */ int um_op; /* Operation mode */ @@ -58,6 +58,10 @@ struct union_mount { #ifdef KERNEL +#ifndef DIAGNOSTIC +#define DIAGNOSTIC +#endif + /* * DEFDIRMODE is the mode bits used to create a shadow directory. */ @@ -67,9 +71,14 @@ struct union_mount { #define UN_FILEMODE ((VRWMODE)|(VRWMODE>>3)|(VRWMODE>>6)) /* - * A cache of vnode references + * A cache of vnode references (hangs off v_data) + * + * Placing un_lock as the first elements theoretically allows us to + * use the vop_stdlock functions. However, we need to make sure of + * certain side effects so we will still punch in our own code. */ struct union_node { + struct lock un_lock; LIST_ENTRY(union_node) un_cache; /* Hash chain */ struct vnode *un_vnode; /* Back pointer */ struct vnode *un_uppervp; /* overlaying object */ @@ -79,6 +88,7 @@ struct union_node { char *un_path; /* saved component name */ int un_hash; /* saved un_path hash value */ int un_openl; /* # of opens on lowervp */ + int un_exclcnt; /* exclusive count */ unsigned int un_flags; struct vnode **un_dircache; /* cached union stack */ off_t un_uppersz; /* size of upper object */ @@ -88,14 +98,25 @@ struct union_node { #endif }; -#define UN_WANT 0x01 -#define UN_LOCKED 0x02 -#define UN_ULOCK 0x04 /* Upper node is locked */ -#define UN_KLOCK 0x08 /* Keep upper node locked on vput */ -#define UN_CACHED 0x10 /* In union cache */ +/* + * XXX UN_ULOCK - indicates that the uppervp is locked + * + * UN_CACHED - node is in the union cache + */ + +/*#define UN_ULOCK 0x04*/ /* Upper node is locked */ +#define UN_CACHED 0x10 /* In union cache */ + +/* + * Hash table locking flags + */ + +#define UNVP_WANT 0x01 +#define UNVP_LOCKED 0x02 extern int union_allocvp __P((struct vnode **, struct mount *, - struct vnode *, struct vnode *, + struct vnode *, + struct vnode *, struct componentname *, struct vnode *, struct vnode *, int)); extern int union_freevp __P((struct vnode *)); @@ -113,6 +134,7 @@ extern int union_cn_close __P((struct vnode *, int, struct ucred *, extern void union_removed_upper __P((struct union_node *un)); extern struct vnode *union_lowervp __P((struct vnode *)); extern void union_newsize __P((struct vnode *, off_t, off_t)); +extern void union_vm_coherency __P((struct vnode *, struct uio *, int)); extern int (*union_dircheckp) __P((struct proc *, struct vnode **, struct file *)); @@ -124,6 +146,11 @@ extern int (*union_dircheckp) __P((struct proc *, struct vnode **, #define UPPERVP(vp) (VTOUNION(vp)->un_uppervp) #define OTHERVP(vp) (UPPERVP(vp) ? UPPERVP(vp) : LOWERVP(vp)) +#define UDEBUG(x) if (uniondebug) printf x +#define UDEBUG_ENABLED 1 + extern vop_t **union_vnodeop_p; extern struct vfsops union_vfsops; +extern int uniondebug; + #endif /* KERNEL */ diff --git a/sys/fs/unionfs/union_subr.c b/sys/fs/unionfs/union_subr.c index ed09a65fbb4a..c03153c4894b 100644 --- a/sys/fs/unionfs/union_subr.c +++ b/sys/fs/unionfs/union_subr.c @@ -53,6 +53,7 @@ #include #include /* for vnode_pager_setsize */ #include +#include /* for vm cache coherency */ #include #include @@ -97,7 +98,7 @@ union_init() for (i = 0; i < NHASH; i++) LIST_INIT(&unhead[i]); - bzero((caddr_t) unvplock, sizeof(unvplock)); + bzero((caddr_t)unvplock, sizeof(unvplock)); return (0); } @@ -105,15 +106,12 @@ static int union_list_lock(ix) int ix; { - - if (unvplock[ix] & UN_LOCKED) { - unvplock[ix] |= UN_WANT; + if (unvplock[ix] & UNVP_LOCKED) { + unvplock[ix] |= UNVP_WANT; (void) tsleep((caddr_t) &unvplock[ix], PINOD, "unllck", 0); return (1); } - - unvplock[ix] |= UN_LOCKED; - + unvplock[ix] |= UNVP_LOCKED; return (0); } @@ -121,15 +119,25 @@ static void union_list_unlock(ix) int ix; { + unvplock[ix] &= ~UNVP_LOCKED; - unvplock[ix] &= ~UN_LOCKED; - - if (unvplock[ix] & UN_WANT) { - unvplock[ix] &= ~UN_WANT; + if (unvplock[ix] & UNVP_WANT) { + unvplock[ix] &= ~UNVP_WANT; wakeup((caddr_t) &unvplock[ix]); } } +/* + * union_updatevp: + * + * The uppervp, if not NULL, must be referenced and not locked by us + * The lowervp, if not NULL, must be referenced. + * + * if uppervp and lowervp match pointers already installed, nothing + * happens. The passed vp's (when matching) are not adjusted. This + * routine may only be called by union_newupper() and union_newlower(). + */ + static void union_updatevp(un, uppervp, lowervp) struct union_node *un; @@ -153,9 +161,10 @@ union_updatevp(un, uppervp, lowervp) uhash = nhash; } - if (lhash != uhash) + if (lhash != uhash) { while (union_list_lock(lhash)) continue; + } while (union_list_lock(uhash)) continue; @@ -177,10 +186,6 @@ union_updatevp(un, uppervp, lowervp) free(un->un_path, M_TEMP); un->un_path = 0; } - if (un->un_dirvp) { - vrele(un->un_dirvp); - un->un_dirvp = NULLVP; - } } un->un_lowervp = lowervp; un->un_lowersz = VNOVAL; @@ -189,7 +194,6 @@ union_updatevp(un, uppervp, lowervp) if (un->un_uppervp != uppervp) { if (un->un_uppervp) vrele(un->un_uppervp); - un->un_uppervp = uppervp; un->un_uppersz = VNOVAL; } @@ -202,21 +206,30 @@ union_updatevp(un, uppervp, lowervp) union_list_unlock(nhash); } +/* + * Set a new lowervp. The passed lowervp must be referenced and will be + * stored in the vp in a referenced state. + */ + static void union_newlower(un, lowervp) struct union_node *un; struct vnode *lowervp; { - union_updatevp(un, un->un_uppervp, lowervp); } +/* + * Set a new uppervp. The passed uppervp must be locked and will be + * stored in the vp in a locked state. The caller should not unlock + * uppervp. + */ + static void union_newupper(un, uppervp) struct union_node *un; struct vnode *uppervp; { - union_updatevp(un, uppervp, un->un_lowervp); } @@ -253,27 +266,51 @@ union_newsize(vp, uppersz, lowersz) } if (sz != VNOVAL) { -#ifdef DEBUG - printf("union: %s size now %ld\n", - uppersz != VNOVAL ? "upper" : "lower", (long) sz); -#endif + UDEBUG(("union: %s size now %ld\n", + (uppersz != VNOVAL ? "upper" : "lower"), (long)sz)); vnode_pager_setsize(vp, sz); } } /* - * allocate a union_node/vnode pair. the vnode is - * referenced and locked. the new vnode is returned - * via (vpp). (mp) is the mountpoint of the union filesystem, - * (dvp) is the parent directory where the upper layer object - * should exist (but doesn't) and (cnp) is the componentname - * information which is partially copied to allow the upper - * layer object to be created at a later time. (uppervp) - * and (lowervp) reference the upper and lower layer objects - * being mapped. either, but not both, can be nil. - * if supplied, (uppervp) is locked. - * the reference is either maintained in the new union_node - * object which is allocated, or they are vrele'd. + * union_allocvp: allocate a union_node and associate it with a + * parent union_node and one or two vnodes. + * + * vpp Holds the returned vnode locked and referenced if no + * error occurs. + * + * mp Holds the mount point. mp may or may not be busied. + * allocvp makes no changes to mp. + * + * dvp Holds the parent union_node to the one we wish to create. + * XXX may only be used to traverse an uncopied lowervp-based + * tree? XXX + * + * dvp may or may not be locked. allocvp makes no changes + * to dvp. + * + * upperdvp Holds the parent vnode to uppervp, generally used along + * with path component information to create a shadow of + * lowervp when uppervp does not exist. + * + * upperdvp is referenced but unlocked on entry, and will be + * dereferenced on return. + * + * uppervp Holds the new uppervp vnode to be stored in the + * union_node we are allocating. uppervp is referenced but + * not locked, and will be dereferenced on return. + * + * lowervp Holds the new lowervp vnode to be stored in the + * union_node we are allocating. uppervp is referenced but + * not locked, and will be dereferenced on return. + * + * cnp Holds path component information to be coupled with + * lowervp and upperdvp to allow unionfs to create an uppervp + * later on. Only used if lowervp is valid. The conents + * of cnp is only valid for the duration of the call. + * + * docache Determine whether this node should be entered in the + * cache or whether it should be destroyed as soon as possible. * * all union_nodes are maintained on a singly-linked * list. new nodes are only allocated when they cannot @@ -292,12 +329,13 @@ union_newsize(vp, uppersz, lowersz) * zero references to it and so it needs to removed from * the vnode free list. */ + int -union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache) +union_allocvp(vpp, mp, dvp, upperdvp, cnp, uppervp, lowervp, docache) struct vnode **vpp; struct mount *mp; - struct vnode *undvp; /* parent union vnode */ - struct vnode *dvp; /* may be null */ + struct vnode *dvp; /* parent union vnode */ + struct vnode *upperdvp; /* parent vnode of uppervp */ struct componentname *cnp; /* may be null */ struct vnode *uppervp; /* may be null */ struct vnode *lowervp; /* may be null */ @@ -307,6 +345,7 @@ union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache) struct union_node *un = 0; struct vnode *xlowervp = NULLVP; struct union_mount *um = MOUNTTOUNIONMOUNT(mp); + struct proc *p = (cnp) ? cnp->cn_proc : curproc; int hash = 0; int vflag; int try; @@ -382,65 +421,76 @@ loop: if (un) { /* - * Obtain a lock on the union_node. - * uppervp is locked, though un->un_uppervp - * may not be. this doesn't break the locking - * hierarchy since in the case that un->un_uppervp - * is not yet locked it will be vrele'd and replaced - * with uppervp. + * Obtain a lock on the union_node. Everything is unlocked + * except for dvp, so check that case. If they match, our + * new un is already locked. Otherwise we have to lock our + * new un. + * + * A potential deadlock situation occurs when we are holding + * one lock while trying to get another. We must follow + * strict ordering rules to avoid it. We try to locate dvp + * by scanning up from un_vnode, since the most likely + * scenario is un being under dvp. */ - if ((dvp != NULLVP) && (uppervp == dvp)) { - /* - * Access ``.'', so (un) will already - * be locked. Since this process has - * the lock on (uppervp) no other - * process can hold the lock on (un). - */ -#ifdef DIAGNOSTIC - if ((un->un_flags & UN_LOCKED) == 0) - panic("union: . not locked"); - else if (curproc && un->un_pid != curproc->p_pid && - un->un_pid > -1 && curproc->p_pid > -1) - panic("union: allocvp not lock owner"); -#endif - } else { - if (un->un_flags & UN_LOCKED) { - vrele(UNIONTOV(un)); - un->un_flags |= UN_WANT; - (void) tsleep((caddr_t) &un->un_flags, PINOD, "unalvp", 0); - goto loop; - } - un->un_flags |= UN_LOCKED; + if (dvp && un->un_vnode != dvp) { + struct vnode *scan = un->un_vnode; -#ifdef DIAGNOSTIC - if (curproc) - un->un_pid = curproc->p_pid; - else - un->un_pid = -1; -#endif + do { + scan = VTOUNION(scan)->un_pvp; + } while (scan && scan->v_tag == VT_UNION && scan != dvp); + if (scan != dvp) { + /* + * our new un is above dvp (we never saw dvp + * while moving up the tree). + */ + VREF(dvp); + VOP_UNLOCK(dvp, 0, p); + error = vn_lock(un->un_vnode, LK_EXCLUSIVE, p); + vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); + vrele(dvp); + } else { + /* + * our new un is under dvp + */ + error = vn_lock(un->un_vnode, LK_EXCLUSIVE, p); + } + } else if (dvp == NULLVP) { + /* + * dvp is NULL, we need to lock un. + */ + error = vn_lock(un->un_vnode, LK_EXCLUSIVE, p); + } else { + /* + * dvp == un->un_vnode, we are already locked. + */ + error = 0; } - /* - * At this point, the union_node is locked, - * un->un_uppervp may not be locked, and uppervp - * is locked or nil. - */ + if (error) + goto loop; /* - * Save information about the upper layer. + * At this point, the union_node is locked and referenced. + * + * uppervp is locked and referenced or NULL, lowervp is + * referenced or NULL. */ + UDEBUG(("Modify existing un %p vn %p upper %p(refs %d) -> %p(refs %d)\n", + un, un->un_vnode, un->un_uppervp, + (un->un_uppervp ? un->un_uppervp->v_usecount : -99), + uppervp, + (uppervp ? uppervp->v_usecount : -99) + )); + if (uppervp != un->un_uppervp) { + KASSERT(uppervp == NULL || uppervp->v_usecount > 0, ("union_allocvp: too few refs %d (at least 1 required) on uppervp", uppervp->v_usecount)); union_newupper(un, uppervp); } else if (uppervp) { + KASSERT(uppervp->v_usecount > 1, ("union_allocvp: too few refs %d (at least 2 required) on uppervp", uppervp->v_usecount)); vrele(uppervp); } - if (un->un_uppervp) { - un->un_flags |= UN_ULOCK; - un->un_flags &= ~UN_KLOCK; - } - /* * Save information about the lower layer. * This needs to keep track of pathname @@ -456,12 +506,22 @@ loop: bcopy(cnp->cn_nameptr, un->un_path, cnp->cn_namelen); un->un_path[cnp->cn_namelen] = '\0'; - VREF(dvp); - un->un_dirvp = dvp; } } else if (lowervp) { vrele(lowervp); } + + /* + * and upperdvp + */ + if (upperdvp != un->un_dirvp) { + if (un->un_dirvp) + vrele(un->un_dirvp); + un->un_dirvp = upperdvp; + } else if (upperdvp) { + vrele(upperdvp); + } + *vpp = UNIONTOV(un); return (0); } @@ -477,17 +537,22 @@ loop: goto loop; } + /* + * Create new node rather then replace old node + */ + error = getnewvnode(VT_UNION, mp, union_vnodeop_p, vpp); if (error) { - if (uppervp) { - if (dvp == uppervp) - vrele(uppervp); - else - vput(uppervp); - } + /* + * If an error occurs clear out vnodes. + */ if (lowervp) vrele(lowervp); - + if (uppervp) + vrele(uppervp); + if (upperdvp) + vrele(upperdvp); + *vpp = NULL; goto out; } @@ -499,37 +564,34 @@ loop: (*vpp)->v_type = uppervp->v_type; else (*vpp)->v_type = lowervp->v_type; + un = VTOUNION(*vpp); + bzero(un, sizeof(*un)); + + lockinit(&un->un_lock, PVFS, "unlock", 0, 0); + vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, p); + un->un_vnode = *vpp; un->un_uppervp = uppervp; un->un_uppersz = VNOVAL; un->un_lowervp = lowervp; un->un_lowersz = VNOVAL; - un->un_pvp = undvp; - if (undvp != NULLVP) - VREF(undvp); + un->un_dirvp = upperdvp; + un->un_pvp = dvp; /* only parent dir in new allocation */ + if (dvp != NULLVP) + VREF(dvp); un->un_dircache = 0; un->un_openl = 0; - un->un_flags = UN_LOCKED; - if (un->un_uppervp) - un->un_flags |= UN_ULOCK; -#ifdef DIAGNOSTIC - if (curproc) - un->un_pid = curproc->p_pid; - else - un->un_pid = -1; -#endif + if (cnp && (lowervp != NULLVP)) { un->un_hash = cnp->cn_hash; un->un_path = malloc(cnp->cn_namelen+1, M_TEMP, M_WAITOK); bcopy(cnp->cn_nameptr, un->un_path, cnp->cn_namelen); un->un_path[cnp->cn_namelen] = '\0'; - VREF(dvp); - un->un_dirvp = dvp; } else { un->un_hash = 0; un->un_path = 0; - un->un_dirvp = 0; + un->un_dirvp = NULL; } if (docache) { @@ -537,10 +599,10 @@ loop: un->un_flags |= UN_CACHED; } +out: if (xlowervp) vrele(xlowervp); -out: if (docache) union_list_unlock(hash); @@ -558,16 +620,26 @@ union_freevp(vp) LIST_REMOVE(un, un_cache); } - if (un->un_pvp != NULLVP) + if (un->un_pvp != NULLVP) { vrele(un->un_pvp); - if (un->un_uppervp != NULLVP) + un->un_pvp = NULL; + } + if (un->un_uppervp != NULLVP) { vrele(un->un_uppervp); - if (un->un_lowervp != NULLVP) + un->un_uppervp = NULL; + } + if (un->un_lowervp != NULLVP) { vrele(un->un_lowervp); - if (un->un_dirvp != NULLVP) + un->un_lowervp = NULL; + } + if (un->un_dirvp != NULLVP) { vrele(un->un_dirvp); - if (un->un_path) + un->un_dirvp = NULL; + } + if (un->un_path) { free(un->un_path, M_TEMP); + un->un_path = NULL; + } FREE(vp->v_data, M_TEMP); vp->v_data = 0; @@ -579,6 +651,9 @@ union_freevp(vp) * copyfile. copy the vnode (fvp) to the vnode (tvp) * using a sequence of reads and writes. both (fvp) * and (tvp) are locked on entry and exit. + * + * fvp and tvp are both exclusive locked on call, but their refcount's + * haven't been bumped at all. */ static int union_copyfile(fvp, tvp, cred, p) @@ -600,48 +675,62 @@ union_copyfile(fvp, tvp, cred, p) * give up at the first sign of trouble. */ + bzero(&uio, sizeof(uio)); + uio.uio_procp = p; uio.uio_segflg = UIO_SYSSPACE; uio.uio_offset = 0; - VOP_UNLOCK(fvp, 0, p); /* XXX */ VOP_LEASE(fvp, p, cred, LEASE_READ); - vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY, p); /* XXX */ - VOP_UNLOCK(tvp, 0, p); /* XXX */ VOP_LEASE(tvp, p, cred, LEASE_WRITE); - vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p); /* XXX */ buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK); /* ugly loop follows... */ do { off_t offset = uio.uio_offset; + int count; + int bufoffset; + /* + * Setup for big read + */ uio.uio_iov = &iov; uio.uio_iovcnt = 1; iov.iov_base = buf; iov.iov_len = MAXBSIZE; uio.uio_resid = iov.iov_len; uio.uio_rw = UIO_READ; - error = VOP_READ(fvp, &uio, 0, cred); - if (error == 0) { + if ((error = VOP_READ(fvp, &uio, 0, cred)) != 0) + break; + + /* + * Get bytes read, handle read eof case and setup for + * write loop + */ + if ((count = MAXBSIZE - uio.uio_resid) == 0) + break; + bufoffset = 0; + + /* + * Write until an error occurs or our buffer has been + * exhausted, then update the offset for the next read. + */ + while (bufoffset < count) { uio.uio_iov = &iov; uio.uio_iovcnt = 1; - iov.iov_base = buf; - iov.iov_len = MAXBSIZE - uio.uio_resid; - uio.uio_offset = offset; + iov.iov_base = buf + bufoffset; + iov.iov_len = count - bufoffset; + uio.uio_offset = offset + bufoffset; uio.uio_rw = UIO_WRITE; uio.uio_resid = iov.iov_len; - if (uio.uio_resid == 0) + if ((error = VOP_WRITE(tvp, &uio, 0, cred)) != 0) break; - - do { - error = VOP_WRITE(tvp, &uio, 0, cred); - } while ((uio.uio_resid > 0) && (error == 0)); + bufoffset += (count - bufoffset) - uio.uio_resid; } - + uio.uio_offset = offset + bufoffset; } while (error == 0); free(buf, M_TEMP); @@ -649,9 +738,10 @@ union_copyfile(fvp, tvp, cred, p) } /* - * (un) is assumed to be locked on entry and remains - * locked on exit. + * + * un's vnode is assumed to be locked on entry and remains locked on exit. */ + int union_copyup(un, docopy, cred, p) struct union_node *un; @@ -676,12 +766,9 @@ union_copyup(un, docopy, cred, p) if (error) return (error); - /* at this point, uppervp is locked */ - union_newupper(un, uvp); - un->un_flags |= UN_ULOCK; - lvp = un->un_lowervp; + KASSERT(uvp->v_usecount > 0, ("copy: uvp refcount 0: %d", uvp->v_usecount)); if (docopy) { /* * XX - should not ignore errors @@ -689,23 +776,22 @@ union_copyup(un, docopy, cred, p) */ vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY, p); error = VOP_OPEN(lvp, FREAD, cred, p); + if (error == 0 && vn_canvmio(lvp) == TRUE) + error = vfs_object_create(lvp, p, cred); if (error == 0) { error = union_copyfile(lvp, uvp, cred, p); VOP_UNLOCK(lvp, 0, p); (void) VOP_CLOSE(lvp, FREAD, cred, p); } -#ifdef DEBUG if (error == 0) - uprintf("union: copied up %s\n", un->un_path); -#endif + UDEBUG(("union: copied up %s\n", un->un_path)); } - un->un_flags &= ~UN_ULOCK; VOP_UNLOCK(uvp, 0, p); + union_newupper(un, uvp); + KASSERT(uvp->v_usecount > 0, ("copy: uvp refcount 0: %d", uvp->v_usecount)); union_vn_close(uvp, FWRITE, cred, p); - vn_lock(uvp, LK_EXCLUSIVE | LK_RETRY, p); - un->un_flags |= UN_ULOCK; - + KASSERT(uvp->v_usecount > 0, ("copy: uvp refcount 0: %d", uvp->v_usecount)); /* * Subsequent IOs will go to the top layer, so * call close on the lower vnode and open on the @@ -721,6 +807,8 @@ union_copyup(un, docopy, cred, p) (void) VOP_CLOSE(lvp, FREAD, cred, p); (void) VOP_OPEN(uvp, FREAD, cred, p); } + if (vn_canvmio(uvp) == TRUE) + error = vfs_object_create(uvp, p, cred); un->un_openl = 0; } @@ -728,6 +816,17 @@ union_copyup(un, docopy, cred, p) } +/* + * union_relookup: + * + * dvp should be locked on entry and will be locked on return. No + * net change in the ref count will occur. + * + * If an error is returned, *vpp will be invalid, otherwise it + * will hold a locked, referenced vnode. If *vpp == dvp then + * remember that only one exclusive lock is held. + */ + static int union_relookup(um, dvp, vpp, cnp, cn, path, pathlen) struct union_mount *um; @@ -757,7 +856,7 @@ union_relookup(um, dvp, vpp, cnp, cn, path, pathlen) cn->cn_pnbuf[cn->cn_namelen] = '\0'; cn->cn_nameiop = CREATE; - cn->cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN); + cn->cn_flags = (LOCKPARENT|LOCKLEAF|HASBUF|SAVENAME|ISLASTCN); cn->cn_proc = cnp->cn_proc; if (um->um_op == UNMNT_ABOVE) cn->cn_cred = cnp->cn_cred; @@ -768,15 +867,30 @@ union_relookup(um, dvp, vpp, cnp, cn, path, pathlen) cn->cn_consume = cnp->cn_consume; VREF(dvp); - error = relookup(dvp, vpp, cn); - if (!error) - vrele(dvp); - else { + VOP_UNLOCK(dvp, 0, cnp->cn_proc); + + /* + * Pass dvp unlocked and referenced on call to relookup(). + * + * If an error occurs, dvp will be returned unlocked and dereferenced. + */ + + if ((error = relookup(dvp, vpp, cn)) != 0) { zfree(namei_zone, cn->cn_pnbuf); cn->cn_pnbuf = NULL; + vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, cnp->cn_proc); + return(error); } - return (error); + /* + * If no error occurs, dvp will be returned locked with the reference + * left as before, and vpp will be returned referenced and locked. + * + * We want to return with dvp as it was passed to us, so we get + * rid of our reference. + */ + vrele(dvp); + return (0); } /* @@ -785,11 +899,11 @@ union_relookup(um, dvp, vpp, cnp, cn, path, pathlen) * * (um) points to the union mount structure for access to the * the mounting process's credentials. - * (dvp) is the directory in which to create the shadow directory. - * it is unlocked on entry and exit. + * (dvp) is the directory in which to create the shadow directory, + * it is locked (but not ref'd) on entry and return. * (cnp) is the componentname to be created. * (vpp) is the returned newly created shadow directory, which - * is returned locked. + * is returned locked and ref'd */ int union_mkshadow(um, dvp, cnp, vpp) @@ -810,8 +924,10 @@ union_mkshadow(um, dvp, cnp, vpp) if (*vpp) { VOP_ABORTOP(dvp, &cn); - VOP_UNLOCK(dvp, 0, p); - vrele(*vpp); + if (dvp == *vpp) + vrele(*vpp); + else + vput(*vpp); *vpp = NULLVP; return (EEXIST); } @@ -832,7 +948,7 @@ union_mkshadow(um, dvp, cnp, vpp) VOP_LEASE(dvp, p, cn.cn_cred, LEASE_WRITE); error = VOP_MKDIR(dvp, vpp, &cn, &va); - vput(dvp); + /*vput(dvp);*/ return (error); } @@ -842,7 +958,7 @@ union_mkshadow(um, dvp, cnp, vpp) * (um) points to the union mount structure for access to the * the mounting process's credentials. * (dvp) is the directory in which to create the whiteout. - * it is locked on entry and exit. + * it is locked on entry and return. * (cnp) is the componentname to be created. */ int @@ -857,17 +973,16 @@ union_mkwhiteout(um, dvp, cnp, path) struct vnode *wvp; struct componentname cn; - VOP_UNLOCK(dvp, 0, p); error = union_relookup(um, dvp, &wvp, cnp, &cn, path, strlen(path)); - if (error) { - vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); + if (error) return (error); - } if (wvp) { VOP_ABORTOP(dvp, &cn); - vrele(dvp); - vrele(wvp); + if (wvp == dvp) + vrele(wvp); + else + vput(wvp); return (EEXIST); } @@ -877,9 +992,6 @@ union_mkwhiteout(um, dvp, cnp, path) error = VOP_WHITEOUT(dvp, &cn, CREATE); if (error) VOP_ABORTOP(dvp, &cn); - - vrele(dvp); - return (error); } @@ -890,6 +1002,12 @@ union_mkwhiteout(um, dvp, cnp, path) * the problem with calling namei is that a) it locks too many * things, and b) it doesn't start at the "right" directory, * whereas relookup is told where to start. + * + * On entry, the vnode associated with un is locked. It remains locked + * on return. + * + * If no error occurs, *vpp contains a locked referenced vnode for your + * use. If an error occurs *vpp iis undefined. */ static int union_vn_create(vpp, un, p) @@ -921,26 +1039,34 @@ union_vn_create(vpp, un, p) cn.cn_pnbuf = zalloc(namei_zone); bcopy(un->un_path, cn.cn_pnbuf, cn.cn_namelen+1); cn.cn_nameiop = CREATE; - cn.cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN); + cn.cn_flags = (LOCKPARENT|LOCKLEAF|HASBUF|SAVENAME|ISLASTCN); cn.cn_proc = p; cn.cn_cred = p->p_ucred; cn.cn_nameptr = cn.cn_pnbuf; cn.cn_hash = un->un_hash; cn.cn_consume = 0; + /* + * Pass dvp unlocked and referenced on call to relookup(). + * + * If an error occurs, dvp will be returned unlocked and dereferenced. + */ VREF(un->un_dirvp); error = relookup(un->un_dirvp, &vp, &cn); if (error) return (error); - vrele(un->un_dirvp); + /* + * If no error occurs, dvp will be returned locked with the reference + * left as before, and vpp will be returned referenced and locked. + */ if (vp) { + vput(un->un_dirvp); VOP_ABORTOP(un->un_dirvp, &cn); - if (un->un_dirvp == vp) - vrele(un->un_dirvp); + if (vp == un->un_dirvp) + vrele(vp); else - vput(un->un_dirvp); - vrele(vp); + vput(vp); return (EEXIST); } @@ -964,11 +1090,12 @@ union_vn_create(vpp, un, p) return (error); error = VOP_OPEN(vp, fmode, cred, p); + if (error == 0 && vn_canvmio(vp) == TRUE) + error = vfs_object_create(vp, p, cred); if (error) { vput(vp); return (error); } - vp->v_writecount++; *vpp = vp; return (0); @@ -987,6 +1114,14 @@ union_vn_close(vp, fmode, cred, p) return (VOP_CLOSE(vp, fmode, cred, p)); } +#if 0 + +/* + * union_removed_upper: + * + * called with union_node unlocked. XXX + */ + void union_removed_upper(un) struct union_node *un; @@ -999,9 +1134,7 @@ union_removed_upper(un) * union node will have neither uppervp nor lowervp. We remove * the union node from cache, so that it will not be referrenced. */ -#if 0 union_newupper(un, NULLVP); -#endif if (un->un_dircache != 0) { for (vpp = un->un_dircache; *vpp != NULLVP; vpp++) vrele(*vpp); @@ -1013,28 +1146,8 @@ union_removed_upper(un) un->un_flags &= ~UN_CACHED; LIST_REMOVE(un, un_cache); } - - if (un->un_flags & UN_ULOCK) { - un->un_flags &= ~UN_ULOCK; - VOP_UNLOCK(un->un_uppervp, 0, p); - } } -#if 0 -struct vnode * -union_lowervp(vp) - struct vnode *vp; -{ - struct union_node *un = VTOUNION(vp); - - if ((un->un_lowervp != NULLVP) && - (vp->v_type == un->un_lowervp->v_type)) { - if (vget(un->un_lowervp, 0) == 0) - return (un->un_lowervp); - } - - return (NULLVP); -} #endif /* @@ -1104,13 +1217,12 @@ union_dircache(vp, p) nvp = NULLVP; - if (dircache == 0) { + if (dircache == NULL) { cnt = 0; union_dircache_r(vp, 0, &cnt); cnt++; - dircache = (struct vnode **) - malloc(cnt * sizeof(struct vnode *), - M_TEMP, M_WAITOK); + dircache = malloc(cnt * sizeof(struct vnode *), + M_TEMP, M_WAITOK); vpp = dircache; union_dircache_r(vp, &vpp, &cnt); *vpp = NULLVP; @@ -1126,9 +1238,11 @@ union_dircache(vp, p) if (*vpp == NULLVP) goto out; - vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, p); + /*vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, p);*/ + UDEBUG(("ALLOCVP-3 %p ref %d\n", *vpp, (*vpp ? (*vpp)->v_usecount : -99))); VREF(*vpp); - error = union_allocvp(&nvp, vp->v_mount, NULLVP, NULLVP, 0, *vpp, NULLVP, 0); + error = union_allocvp(&nvp, vp->v_mount, NULLVP, NULLVP, NULL, *vpp, NULLVP, 0); + UDEBUG(("ALLOCVP-3B %p ref %d\n", nvp, (*vpp ? (*vpp)->v_usecount : -99))); if (error) goto out; @@ -1141,6 +1255,40 @@ out: return (nvp); } +/* + * Guarentee coherency with the VM cache by invalidating any clean VM pages + * associated with this write and updating any dirty VM pages. Since our + * vnode is locked, other processes will not be able to read the pages in + * again until after our write completes. + * + * We also have to be coherent with reads, by flushing any pending dirty + * pages prior to issuing the read. + * + * XXX this is somewhat of a hack at the moment. To support this properly + * we would have to be able to run VOP_READ and VOP_WRITE through the VM + * cache. Then we wouldn't need to worry about coherency. + */ + +void +union_vm_coherency(struct vnode *vp, struct uio *uio, int cleanfls) +{ + vm_object_t object; + vm_pindex_t pstart; + vm_pindex_t pend; + int pgoff; + + if ((object = vp->v_object) == NULL) + return; + + pgoff = uio->uio_offset & PAGE_MASK; + pstart = uio->uio_offset / PAGE_SIZE; + pend = pstart + (uio->uio_resid + pgoff + PAGE_MASK) / PAGE_SIZE; + + vm_object_page_clean(object, pstart, pend, OBJPC_SYNC); + if (cleanfls) + vm_object_page_remove(object, pstart, pend, TRUE); +} + /* * Module glue to remove #ifdef UNION from vfs_syscalls.c */ @@ -1169,6 +1317,8 @@ union_dircheck(struct proc *p, struct vnode **vp, struct file *fp) if (lvp != NULLVP) { error = VOP_OPEN(lvp, FREAD, fp->f_cred, p); + if (error == 0 && vn_canvmio(lvp) == TRUE) + error = vfs_object_create(lvp, p, fp->f_cred); if (error) { vput(lvp); return (error); @@ -1201,9 +1351,11 @@ union_modevent(module_t mod, int type, void *data) } return 0; } + static moduledata_t union_mod = { "union_dircheck", union_modevent, NULL }; + DECLARE_MODULE(union_dircheck, union_mod, SI_SUB_VFS, SI_ORDER_ANY); diff --git a/sys/fs/unionfs/union_vfsops.c b/sys/fs/unionfs/union_vfsops.c index af828ac64a8c..1a53f88bcc7b 100644 --- a/sys/fs/unionfs/union_vfsops.c +++ b/sys/fs/unionfs/union_vfsops.c @@ -85,9 +85,7 @@ union_mount(mp, path, data, ndp, p) int len; u_int size; -#ifdef DEBUG - printf("union_mount(mp = %p)\n", (void *)mp); -#endif + UDEBUG(("union_mount(mp = %p)\n", (void *)mp)); /* * Disable clustered write, otherwise system becomes unstable. @@ -114,24 +112,35 @@ union_mount(mp, path, data, ndp, p) if (error) goto bad; + /* + * Obtain lower vnode. Vnode is stored in mp->mnt_vnodecovered. + * We need to reference it but not lock it. + */ + lowerrootvp = mp->mnt_vnodecovered; VREF(lowerrootvp); +#if 0 /* * Unlock lower node to avoid deadlock. */ if (lowerrootvp->v_op == union_vnodeop_p) VOP_UNLOCK(lowerrootvp, 0, p); +#endif /* - * Find upper node. + * Obtain upper vnode by calling namei() on the path. The + * upperrootvp will be turned referenced but not locked. */ NDINIT(ndp, LOOKUP, FOLLOW|WANTPARENT, UIO_USERSPACE, args.target, p); error = namei(ndp); + +#if 0 if (lowerrootvp->v_op == union_vnodeop_p) vn_lock(lowerrootvp, LK_EXCLUSIVE | LK_RETRY, p); +#endif if (error) goto bad; @@ -139,8 +148,11 @@ union_mount(mp, path, data, ndp, p) vrele(ndp->ni_dvp); ndp->ni_dvp = NULL; + UDEBUG(("mount_root UPPERVP %p locked = %d\n", upperrootvp, VOP_ISLOCKED(upperrootvp))); + /* * Check multi union mount to avoid `lock myself again' panic. + * Also require that it be a directory. */ if (upperrootvp == VTOUNION(lowerrootvp)->un_uppervp) { #ifdef DIAGNOSTIC @@ -155,35 +167,43 @@ union_mount(mp, path, data, ndp, p) goto bad; } - um = (struct union_mount *) malloc(sizeof(struct union_mount), - M_UNIONFSMNT, M_WAITOK); /* XXX */ - /* - * Keep a held reference to the target vnodes. - * They are vrele'd in union_unmount. - * - * Depending on the _BELOW flag, the filesystems are - * viewed in a different order. In effect, this is the - * same as providing a mount under option to the mount syscall. + * Allocate our union_mount structure and populate the fields. + * The vnode references are stored in the union_mount as held, + * unlocked references. Depending on the _BELOW flag, the + * filesystems are viewed in a different order. In effect this + * is the same as providing a mount-under option to the mount + * syscall. */ + um = (struct union_mount *) malloc(sizeof(struct union_mount), + M_UNIONFSMNT, M_WAITOK); + + bzero(um, sizeof(struct union_mount)); + um->um_op = args.mntflags & UNMNT_OPMASK; + switch (um->um_op) { case UNMNT_ABOVE: um->um_lowervp = lowerrootvp; um->um_uppervp = upperrootvp; + upperrootvp = NULL; + lowerrootvp = NULL; break; case UNMNT_BELOW: um->um_lowervp = upperrootvp; um->um_uppervp = lowerrootvp; + upperrootvp = NULL; + lowerrootvp = NULL; break; case UNMNT_REPLACE: vrele(lowerrootvp); - lowerrootvp = NULLVP; + lowerrootvp = NULL; um->um_uppervp = upperrootvp; um->um_lowervp = lowerrootvp; + upperrootvp = NULL; break; default: @@ -196,7 +216,7 @@ union_mount(mp, path, data, ndp, p) * supports whiteout operations */ if ((mp->mnt_flag & MNT_RDONLY) == 0) { - error = VOP_WHITEOUT(um->um_uppervp, (struct componentname *) 0, LOOKUP); + error = VOP_WHITEOUT(um->um_uppervp, NULL, LOOKUP); if (error) goto bad; } @@ -258,15 +278,19 @@ union_mount(mp, path, data, ndp, p) (void)union_statfs(mp, &mp->mnt_stat, p); -#ifdef DEBUG - printf("union_mount: from %s, on %s\n", - mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname); -#endif + UDEBUG(("union_mount: from %s, on %s\n", + mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname)); return (0); bad: - if (um) + if (um) { + if (um->um_uppervp) + vrele(um->um_uppervp); + if (um->um_lowervp) + vrele(um->um_lowervp); + /* XXX other fields */ free(um, M_UNIONFSMNT); + } if (cred) crfree(cred); if (upperrootvp) @@ -291,9 +315,7 @@ union_unmount(mp, mntflags, p) int freeing; int flags = 0; -#ifdef DEBUG - printf("union_unmount(mp = %p)\n", (void *)mp); -#endif + UDEBUG(("union_unmount(mp = %p)\n", (void *)mp)); if (mntflags & MNT_FORCE) flags |= FORCECLOSE; @@ -365,55 +387,25 @@ union_root(mp, vpp) struct mount *mp; struct vnode **vpp; { - struct proc *p = curproc; /* XXX */ struct union_mount *um = MOUNTTOUNIONMOUNT(mp); int error; - int loselock; - int lockadj = 0; - - if (um->um_lowervp && um->um_op != UNMNT_BELOW && - VOP_ISLOCKED(um->um_lowervp)) { - VREF(um->um_lowervp); - VOP_UNLOCK(um->um_lowervp, 0, p); - lockadj = 1; - } /* - * Return locked reference to root. + * Supply an unlocked reference to um_uppervp and to um_lowervp. It + * is possible for um_uppervp to be locked without the associated + * root union_node being locked. We let union_allocvp() deal with + * it. */ + UDEBUG(("union_root UPPERVP %p locked = %d\n", um->um_uppervp, VOP_ISLOCKED(um->um_uppervp))); + VREF(um->um_uppervp); - if ((um->um_op == UNMNT_BELOW) && - VOP_ISLOCKED(um->um_uppervp)) { - loselock = 1; - } else { - vn_lock(um->um_uppervp, LK_EXCLUSIVE | LK_RETRY, p); - loselock = 0; - } if (um->um_lowervp) VREF(um->um_lowervp); - error = union_allocvp(vpp, mp, - (struct vnode *) 0, - (struct vnode *) 0, - (struct componentname *) 0, - um->um_uppervp, - um->um_lowervp, - 1); - if (error) { - if (loselock) - vrele(um->um_uppervp); - else - vput(um->um_uppervp); - if (um->um_lowervp) - vrele(um->um_lowervp); - } else { - if (loselock) - VTOUNION(*vpp)->un_flags &= ~UN_ULOCK; - } - if (lockadj) { - vn_lock(um->um_lowervp, LK_EXCLUSIVE | LK_RETRY, p); - vrele(um->um_lowervp); - } + error = union_allocvp(vpp, mp, NULLVP, NULLVP, NULL, + um->um_uppervp, um->um_lowervp, 1); + UDEBUG(("error %d\n", error)); + UDEBUG(("union_root2 UPPERVP %p locked = %d\n", um->um_uppervp, VOP_ISLOCKED(um->um_uppervp))); return (error); } @@ -429,10 +421,8 @@ union_statfs(mp, sbp, p) struct statfs mstat; int lbsize; -#ifdef DEBUG - printf("union_statfs(mp = %p, lvp = %p, uvp = %p)\n", - (void *)mp, (void *)um->um_lowervp, (void *)um->um_uppervp); -#endif + UDEBUG(("union_statfs(mp = %p, lvp = %p, uvp = %p)\n", + (void *)mp, (void *)um->um_lowervp, (void *)um->um_uppervp)); bzero(&mstat, sizeof(mstat)); diff --git a/sys/fs/unionfs/union_vnops.c b/sys/fs/unionfs/union_vnops.c index 145f8ca6f0ad..128e59ebaa21 100644 --- a/sys/fs/unionfs/union_vnops.c +++ b/sys/fs/unionfs/union_vnops.c @@ -50,13 +50,25 @@ #include #include #include +#include #include -#define FIXUP(un, p) { \ - if (((un)->un_flags & UN_ULOCK) == 0) { \ - union_fixup(un, p); \ - } \ -} +#include +#include + +#include +#include +#include +#include +#include + +int uniondebug = 0; + +#if UDEBUG_ENABLED +SYSCTL_INT(_vfs, OID_AUTO, uniondebug, CTLFLAG_RW, &uniondebug, 0, ""); +#else +SYSCTL_INT(_vfs, OID_AUTO, uniondebug, CTLFLAG_RD, &uniondebug, 0, ""); +#endif static int union_abortop __P((struct vop_abortop_args *ap)); static int union_access __P((struct vop_access_args *ap)); @@ -64,17 +76,15 @@ static int union_advlock __P((struct vop_advlock_args *ap)); static int union_bmap __P((struct vop_bmap_args *ap)); static int union_close __P((struct vop_close_args *ap)); static int union_create __P((struct vop_create_args *ap)); -static void union_fixup __P((struct union_node *un, struct proc *p)); static int union_fsync __P((struct vop_fsync_args *ap)); static int union_getattr __P((struct vop_getattr_args *ap)); static int union_inactive __P((struct vop_inactive_args *ap)); static int union_ioctl __P((struct vop_ioctl_args *ap)); -static int union_islocked __P((struct vop_islocked_args *ap)); static int union_lease __P((struct vop_lease_args *ap)); static int union_link __P((struct vop_link_args *ap)); static int union_lock __P((struct vop_lock_args *ap)); static int union_lookup __P((struct vop_lookup_args *ap)); -static int union_lookup1 __P((struct vnode *udvp, struct vnode **dvpp, +static int union_lookup1 __P((struct vnode *udvp, struct vnode **dvp, struct vnode **vpp, struct componentname *cnp)); static int union_mkdir __P((struct vop_mkdir_args *ap)); @@ -94,36 +104,89 @@ static int union_rmdir __P((struct vop_rmdir_args *ap)); static int union_poll __P((struct vop_poll_args *ap)); static int union_setattr __P((struct vop_setattr_args *ap)); static int union_strategy __P((struct vop_strategy_args *ap)); +static int union_getpages __P((struct vop_getpages_args *ap)); +static int union_putpages __P((struct vop_putpages_args *ap)); static int union_symlink __P((struct vop_symlink_args *ap)); static int union_unlock __P((struct vop_unlock_args *ap)); static int union_whiteout __P((struct vop_whiteout_args *ap)); static int union_write __P((struct vop_read_args *ap)); -static void -union_fixup(un, p) - struct union_node *un; - struct proc *p; +static __inline +struct vnode * +union_lock_upper(struct union_node *un, struct proc *p) { + struct vnode *uppervp; - vn_lock(un->un_uppervp, LK_EXCLUSIVE | LK_RETRY, p); - un->un_flags |= UN_ULOCK; + if ((uppervp = un->un_uppervp) != NULL) { + VREF(uppervp); + vn_lock(uppervp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY, p); + } + KASSERT((uppervp == NULL || uppervp->v_usecount > 0), ("uppervp usecount is 0")); + return(uppervp); } +static __inline +void +union_unlock_upper(struct vnode *uppervp, struct proc *p) +{ + vput(uppervp); +} + +static __inline +struct vnode * +union_lock_other(struct union_node *un, struct proc *p) +{ + struct vnode *vp; + + if (un->un_uppervp != NULL) { + vp = union_lock_upper(un, p); + } else if ((vp = un->un_lowervp) != NULL) { + VREF(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY, p); + } + return(vp); +} + +static __inline +void +union_unlock_other(struct vnode *vp, struct proc *p) +{ + vput(vp); +} + +/* + * union_lookup: + * + * udvp must be exclusively locked on call and will remain + * exclusively locked on return. This is the mount point + * for out filesystem. + * + * dvp Our base directory, locked and referenced. + * The passed dvp will be dereferenced and unlocked on return + * and a new dvp will be returned which is locked and + * referenced in the same variable. + * + * vpp is filled in with the result if no error occured, + * locked and ref'd. + * + * If an error is returned, *vpp is set to NULLVP. If no + * error occurs, *vpp is returned with a reference and an + * exclusive lock. + */ + static int -union_lookup1(udvp, dvpp, vpp, cnp) +union_lookup1(udvp, pdvp, vpp, cnp) struct vnode *udvp; - struct vnode **dvpp; + struct vnode **pdvp; struct vnode **vpp; struct componentname *cnp; { int error; struct proc *p = cnp->cn_proc; + struct vnode *dvp = *pdvp; struct vnode *tdvp; - struct vnode *dvp; struct mount *mp; - dvp = *dvpp; - /* * If stepping up the directory tree, check for going * back across the mount point, in which case do what @@ -139,49 +202,79 @@ union_lookup1(udvp, dvpp, vpp, cnp) * filesystems. */ tdvp = dvp; - *dvpp = dvp = dvp->v_mount->mnt_vnodecovered; - vput(tdvp); + dvp = dvp->v_mount->mnt_vnodecovered; VREF(dvp); + vput(tdvp); vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); } } + /* + * Set return dvp to be the upperdvp 'parent directory. + */ + *pdvp = dvp; + + /* + * If the VOP_LOOKUP call generates an error, tdvp is invalid and no + * changes will have been made to dvp, so we are set to return. + */ + error = VOP_LOOKUP(dvp, &tdvp, cnp); - if (error) + if (error) { + UDEBUG(("dvp %p error %d flags %lx\n", dvp, error, cnp->cn_flags)); + *vpp = NULL; return (error); + } /* * The parent directory will have been unlocked, unless lookup - * found the last component. In which case, re-lock the node - * here to allow it to be unlocked again (phew) in union_lookup. + * found the last component or if dvp == tdvp (tdvp must be locked). + * + * We want our dvp to remain locked and ref'd. We also want tdvp + * to remain locked and ref'd. */ - if (dvp != tdvp && !(cnp->cn_flags & ISLASTCN)) - vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); + UDEBUG(("parentdir %p result %p flag %lx\n", dvp, tdvp, cnp->cn_flags)); - dvp = tdvp; + if (dvp != tdvp && (cnp->cn_flags & ISLASTCN) == 0) + vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); /* * Lastly check if the current node is a mount point in * which case walk up the mount hierarchy making sure not to * bump into the root of the mount tree (ie. dvp != udvp). + * + * We use dvp as a temporary variable here, it is no longer related + * to the dvp above. However, we have to ensure that both *pdvp and + * tdvp are locked on return. */ - while (dvp != udvp && (dvp->v_type == VDIR) && - (mp = dvp->v_mountedhere)) { + + dvp = tdvp; + while ( + dvp != udvp && + (dvp->v_type == VDIR) && + (mp = dvp->v_mountedhere) + ) { + int relock_pdvp = 0; if (vfs_busy(mp, 0, 0, p)) continue; - error = VFS_ROOT(mp, &tdvp); + if (dvp == *pdvp) + relock_pdvp = 1; + vput(dvp); + dvp = NULL; + error = VFS_ROOT(mp, &dvp); + vfs_unbusy(mp, p); + + if (relock_pdvp) + vn_lock(*pdvp, LK_EXCLUSIVE | LK_RETRY, p); + if (error) { - vput(dvp); + *vpp = NULL; return (error); } - - vput(dvp); - dvp = tdvp; } - *vpp = dvp; return (0); } @@ -199,8 +292,8 @@ union_lookup(ap) int uerror, lerror; struct vnode *uppervp, *lowervp; struct vnode *upperdvp, *lowerdvp; - struct vnode *dvp = ap->a_dvp; - struct union_node *dun = VTOUNION(dvp); + struct vnode *dvp = ap->a_dvp; /* starting dir */ + struct union_node *dun = VTOUNION(dvp); /* associated union node */ struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; int lockparent = cnp->cn_flags & LOCKPARENT; @@ -209,44 +302,38 @@ union_lookup(ap) int iswhiteout; struct vattr va; + *ap->a_vpp = NULLVP; /* * Disallow write attemps to the filesystem mounted read-only. */ - if ((cnp->cn_flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && - (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) + if ((cnp->cn_flags & ISLASTCN) && + (dvp->v_mount->mnt_flag & MNT_RDONLY) && + (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { return (EROFS); - -#ifdef notyet - if (cnp->cn_namelen == 3 && - cnp->cn_nameptr[2] == '.' && - cnp->cn_nameptr[1] == '.' && - cnp->cn_nameptr[0] == '.') { - dvp = *ap->a_vpp = LOWERVP(ap->a_dvp); - if (dvp == NULLVP) - return (ENOENT); - VREF(dvp); - vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); - if (!lockparent || !(cnp->cn_flags & ISLASTCN)) - VOP_UNLOCK(ap->a_dvp, 0, p); - return (0); } -#endif + /* + * For any lookup's we do, always return with the parent locked + */ cnp->cn_flags |= LOCKPARENT; - upperdvp = dun->un_uppervp; lowerdvp = dun->un_lowervp; uppervp = NULLVP; lowervp = NULLVP; iswhiteout = 0; - if (cnp->cn_flags & ISDOTDOT) { - if (upperdvp != NULL) - VREF(upperdvp); - if (lowerdvp != NULL) - VREF(lowerdvp); - } + uerror = ENOENT; + lerror = ENOENT; + + /* + * Get a private lock on uppervp and a reference, effectively + * taking it out of the union_node's control. + * + * We must lock upperdvp while holding our lock on dvp + * to avoid a deadlock. + */ + upperdvp = union_lock_upper(dun, p); /* * do the lookup in the upper level. @@ -255,62 +342,64 @@ union_lookup(ap) * on and just return that vnode. */ if (upperdvp != NULLVP) { - FIXUP(dun, p); /* - * If we're doing `..' in the underlying filesystem, - * we must drop our lock on the union node before - * going up the tree in the lower file system--if we block - * on the lowervp lock, and that's held by someone else - * coming down the tree and who's waiting for our lock, - * we would be hosed. + * We do not have to worry about the DOTDOT case, we've + * already unlocked dvp. */ - if (cnp->cn_flags & ISDOTDOT) { - /* retain lock on underlying VP: */ - dun->un_flags |= UN_KLOCK; - VOP_UNLOCK(dvp, 0, p); - } - uerror = union_lookup1(um->um_uppervp, &upperdvp, - &uppervp, cnp); + UDEBUG(("A %p\n", upperdvp)); + + /* + * Do the lookup. We must supply a locked and referenced + * upperdvp to the function and will get a new locked and + * referenced upperdvp back with the old having been + * dereferenced. + * + * If an error is returned, uppervp will be NULLVP. If no + * error occurs, uppervp will be the locked and referenced + * return vnode or possibly NULL, depending on what is being + * requested. It is possible that the returned uppervp + * will be the same as upperdvp. + */ + uerror = union_lookup1(um->um_uppervp, &upperdvp, &uppervp, cnp); + UDEBUG(( + "uerror %d upperdvp %p %d/%d, uppervp %p ref=%d/lck=%d\n", + uerror, + upperdvp, + upperdvp->v_usecount, + VOP_ISLOCKED(upperdvp), + uppervp, + (uppervp ? uppervp->v_usecount : -99), + (uppervp ? VOP_ISLOCKED(uppervp) : -99) + )); + /* * Disallow write attemps to the filesystem mounted read-only. */ if (uerror == EJUSTRETURN && (cnp->cn_flags & ISLASTCN) && - (dvp->v_mount->mnt_flag & MNT_RDONLY) && - (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME)) { - if (!lockparent) - cnp->cn_flags &= ~LOCKPARENT; - return (EROFS); - } - - if (cnp->cn_flags & ISDOTDOT) { - if (dun->un_uppervp == upperdvp) { - /* - * We got the underlying bugger back locked... - * now take back the union node lock. Since we - * hold the uppervp lock, we can diddle union - * locking flags at will. :) - */ - dun->un_flags |= UN_ULOCK; - } - /* - * If upperdvp got swapped out, it means we did - * some mount point magic, and we do not have - * dun->un_uppervp locked currently--so we get it - * locked here (don't set the UN_ULOCK flag). - */ - vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); - } - - /*if (uppervp == upperdvp) - dun->un_flags |= UN_KLOCK;*/ - - if (cnp->cn_consume != 0) { - *ap->a_vpp = uppervp; - if (!lockparent) - cnp->cn_flags &= ~LOCKPARENT; - error = uerror; + (dvp->v_mount->mnt_flag & MNT_RDONLY) && + (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME)) { + error = EROFS; goto out; } + + /* + * Special case. If cn_consume != 0 skip out. The result + * of the lookup is transfered to our return variable. If + * an error occured we have to throw away the results. + */ + + if (cnp->cn_consume != 0) { + if ((error = uerror) == 0) { + *ap->a_vpp = uppervp; + uppervp = NULL; + } + goto out; + } + + /* + * Calculate whiteout, fall through + */ + if (uerror == ENOENT || uerror == EJUSTRETURN) { if (cnp->cn_flags & ISWHITEOUT) { iswhiteout = 1; @@ -321,8 +410,6 @@ union_lookup(ap) iswhiteout = 1; } } - } else { - uerror = ENOENT; } /* @@ -332,13 +419,14 @@ union_lookup(ap) * back from the upper layer and return the lower vnode * instead. */ + if (lowerdvp != NULLVP && !iswhiteout) { int nameiop; - vn_lock(lowerdvp, LK_EXCLUSIVE | LK_RETRY, p); + UDEBUG(("B %p\n", lowerdvp)); /* - * Only do a LOOKUP on the bottom node, since + * Force only LOOKUPs on the lower node, since * we won't be making changes to it anyway. */ nameiop = cnp->cn_nameiop; @@ -347,42 +435,42 @@ union_lookup(ap) saved_cred = cnp->cn_cred; cnp->cn_cred = um->um_cred; } + /* * We shouldn't have to worry about locking interactions * between the lower layer and our union layer (w.r.t. * `..' processing) because we don't futz with lowervp * locks in the union-node instantiation code path. + * + * union_lookup1() requires lowervp to be locked on entry, + * and it will be unlocked on return. The ref count will + * not change. On return lowervp doesn't represent anything + * to us so we NULL it out. */ - lerror = union_lookup1(um->um_lowervp, &lowerdvp, - &lowervp, cnp); + VREF(lowerdvp); + vn_lock(lowerdvp, LK_EXCLUSIVE | LK_RETRY, p); + lerror = union_lookup1(um->um_lowervp, &lowerdvp, &lowervp, cnp); + if (lowerdvp == lowervp) + vrele(lowerdvp); + else + vput(lowerdvp); + lowerdvp = NULL; /* lowerdvp invalid after vput */ + if (um->um_op == UNMNT_BELOW) cnp->cn_cred = saved_cred; cnp->cn_nameiop = nameiop; - if (lowervp != lowerdvp) - VOP_UNLOCK(lowerdvp, 0, p); - if (cnp->cn_consume != 0 || lerror == EACCES) { - if (lerror == EACCES) - lowervp = NULLVP; - if (uppervp != NULLVP) { - if (uppervp == upperdvp) - vrele(uppervp); - else - vput(uppervp); - uppervp = NULLVP; + if ((error = lerror) == 0) { + *ap->a_vpp = lowervp; + lowervp = NULL; } - *ap->a_vpp = lowervp; - if (!lockparent) - cnp->cn_flags &= ~LOCKPARENT; - error = lerror; goto out; } } else { - lerror = ENOENT; + UDEBUG(("C %p\n", lowerdvp)); if ((cnp->cn_flags & ISDOTDOT) && dun->un_pvp != NULLVP) { - lowervp = LOWERVP(dun->un_pvp); - if (lowervp != NULLVP) { + if ((lowervp = LOWERVP(dun->un_pvp)) != NULL) { VREF(lowervp); vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY, p); lerror = 0; @@ -390,35 +478,27 @@ union_lookup(ap) } } - if (!lockparent) - cnp->cn_flags &= ~LOCKPARENT; - /* - * at this point, we have uerror and lerror indicating - * possible errors with the lookups in the upper and lower - * layers. additionally, uppervp and lowervp are (locked) - * references to existing vnodes in the upper and lower layers. + * Ok. Now we have uerror, uppervp, upperdvp, lerror, and lowervp. * - * there are now three cases to consider. - * 1. if both layers returned an error, then return whatever - * error the upper layer generated. + * 1. If both layers returned an error, select the upper layer. * - * 2. if the top layer failed and the bottom layer succeeded - * then two subcases occur. - * a. the bottom vnode is not a directory, in which - * case just return a new union vnode referencing - * an empty top layer and the existing bottom layer. - * b. the bottom vnode is a directory, in which case - * create a new directory in the top-level and - * continue as in case 3. + * 2. If the upper layer faile and the bottom layer succeeded, + * two subcases occur: * - * 3. if the top layer succeeded then return a new union + * a. The bottom vnode is not a directory, in which case + * just return a new union vnode referencing an + * empty top layer and the existing bottom layer. + * + * b. The button vnode is a directory, in which case + * create a new directory in the top layer and + * and fall through to case 3. + * + * 3. If the top layer succeeded then return a new union * vnode referencing whatever the new top layer and * whatever the bottom layer returned. */ - *ap->a_vpp = NULLVP; - /* case 1. */ if ((uerror != 0) && (lerror != 0)) { error = uerror; @@ -428,59 +508,126 @@ union_lookup(ap) /* case 2. */ if (uerror != 0 /* && (lerror == 0) */ ) { if (lowervp->v_type == VDIR) { /* case 2b. */ - dun->un_flags &= ~UN_ULOCK; - VOP_UNLOCK(upperdvp, 0, p); + KASSERT(uppervp == NULL, ("uppervp unexpectedly non-NULL")); + /* + * oops, uppervp has a problem, we may have to shadow. + */ uerror = union_mkshadow(um, upperdvp, cnp, &uppervp); - vn_lock(upperdvp, LK_EXCLUSIVE | LK_RETRY, p); - dun->un_flags |= UN_ULOCK; - if (uerror) { - if (lowervp != NULLVP) { - vput(lowervp); - lowervp = NULLVP; - } error = uerror; goto out; } } } - if (lowervp != NULLVP) + /* + * Must call union_allocvp with both the upper and lower vnodes + * referenced and the upper vnode locked. ap->a_vpp is returned + * referenced and locked. lowervp, uppervp, and upperdvp are + * absorbed by union_allocvp() whether it succeeds or fails. + * + * upperdvp is the parent directory of uppervp which may be + * different, depending on the path, from dvp->un_uppervp. That's + * why it is a separate argument. Note that it must be unlocked. + * + * dvp must be locked on entry to the call and will be locked on + * return. + */ + + if (uppervp && uppervp != upperdvp) + VOP_UNLOCK(uppervp, 0, p); + if (lowervp) VOP_UNLOCK(lowervp, 0, p); + if (upperdvp) + VOP_UNLOCK(upperdvp, 0, p); error = union_allocvp(ap->a_vpp, dvp->v_mount, dvp, upperdvp, cnp, uppervp, lowervp, 1); - if (error) { - if (uppervp != NULLVP) - vput(uppervp); - if (lowervp != NULLVP) - vrele(lowervp); - } else { - if (*ap->a_vpp != dvp) - if (!lockparent || !(cnp->cn_flags & ISLASTCN)) - VOP_UNLOCK(dvp, 0, p); -#ifdef DIAGNOSTIC - if (cnp->cn_namelen == 1 && - cnp->cn_nameptr[0] == '.' && - *ap->a_vpp != dvp) { - panic("union_lookup returning . (%p) not same as startdir (%p)", - ap->a_vpp, dvp); - } -#endif - } + UDEBUG(("Create %p = %p %p refs=%d\n", *ap->a_vpp, uppervp, lowervp, (*ap->a_vpp) ? ((*ap->a_vpp)->v_usecount) : -99)); + + uppervp = NULL; + upperdvp = NULL; + lowervp = NULL; + + /* + * Termination Code + * + * - put away any extra junk laying around. Note that lowervp + * (if not NULL) will never be the same as *ap->a_vp and + * neither will uppervp, because when we set that state we + * NULL-out lowervp or uppervp. On the otherhand, upperdvp + * may match uppervp or *ap->a_vpp. + * + * - relock/unlock dvp if appropriate. + */ out: - if (cnp->cn_flags & ISDOTDOT) { - if (upperdvp != NULL) - vrele(upperdvp); - if (lowerdvp != NULL) - vrele(lowerdvp); - } + if (upperdvp) { + if (upperdvp == uppervp || upperdvp == *ap->a_vpp) + vrele(upperdvp); + else + vput(upperdvp); + } + + if (uppervp) + vput(uppervp); + + if (lowervp) + vput(lowervp); + + /* + * Restore LOCKPARENT state + */ + + if (!lockparent) + cnp->cn_flags &= ~LOCKPARENT; + + UDEBUG(("Out %d vpp %p/%d lower %p upper %p\n", error, *ap->a_vpp, + ((*ap->a_vpp) ? (*ap->a_vpp)->v_usecount : -99), + lowervp, uppervp)); + + /* + * dvp lock state, determine whether to relock dvp. dvp is expected + * to be locked on return if: + * + * - there was an error (except not EJUSTRETURN), or + * - we hit the last component and lockparent is true + * + * dvp_is_locked is the current state of the dvp lock, not counting + * the possibility that *ap->a_vpp == dvp (in which case it is locked + * anyway). Note that *ap->a_vpp == dvp only if no error occured. + */ + + if (*ap->a_vpp != dvp) { + if ((error == 0 || error == EJUSTRETURN) && + (!lockparent || (cnp->cn_flags & ISLASTCN) == 0)) { + VOP_UNLOCK(dvp, 0, p); + } + } + + /* + * Diagnostics + */ + +#ifdef DIAGNOSTIC + if (cnp->cn_namelen == 1 && + cnp->cn_nameptr[0] == '.' && + *ap->a_vpp != dvp) { + panic("union_lookup returning . (%p) not same as startdir (%p)", ap->a_vpp, dvp); + } +#endif return (error); } +/* + * union_create: + * + * a_dvp is locked on entry and remains locked on return. a_vpp is returned + * locked if no error occurs, otherwise it is garbage. + */ + static int union_create(ap) struct vop_create_args /* { @@ -491,36 +638,27 @@ union_create(ap) } */ *ap; { struct union_node *dun = VTOUNION(ap->a_dvp); - struct vnode *dvp = dun->un_uppervp; struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; + struct vnode *dvp; + int error = EROFS; - if (dvp != NULLVP) { + if ((dvp = union_lock_upper(dun, p)) != NULL) { struct vnode *vp; struct mount *mp; - int error; - FIXUP(dun, p); - - dun->un_flags |= UN_KLOCK; - VOP_UNLOCK(ap->a_dvp, 0, p); error = VOP_CREATE(dvp, &vp, cnp, ap->a_vap); - if (error) { - dun->un_flags |= UN_ULOCK; - return (error); + if (error == 0) { + mp = ap->a_dvp->v_mount; + VOP_UNLOCK(vp, 0, p); + UDEBUG(("ALLOCVP-1 FROM %p REFS %d\n", vp, vp->v_usecount)); + error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP, + cnp, vp, NULLVP, 1); + UDEBUG(("ALLOCVP-2B FROM %p REFS %d\n", *ap->a_vpp, vp->v_usecount)); } - - mp = ap->a_dvp->v_mount; - VOP_UNLOCK(dvp, 0, p); - error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP, cnp, vp, - NULLVP, 1); - if (error) - vput(vp); - vn_lock(ap->a_dvp, LK_EXCLUSIVE| LK_RETRY, p); - return (error); + union_unlock_upper(dvp, p); } - - return (EROFS); + return (error); } static int @@ -533,15 +671,23 @@ union_whiteout(ap) { struct union_node *un = VTOUNION(ap->a_dvp); struct componentname *cnp = ap->a_cnp; - struct proc *p = cnp->cn_proc; + struct vnode *uppervp; + int error = EOPNOTSUPP; - if (un->un_uppervp == NULLVP) - return (EOPNOTSUPP); - - FIXUP(un, p); - return (VOP_WHITEOUT(un->un_uppervp, cnp, ap->a_flags)); + if ((uppervp = union_lock_upper(un, cnp->cn_proc)) != NULLVP) { + error = VOP_WHITEOUT(un->un_uppervp, cnp, ap->a_flags); + union_unlock_upper(uppervp, cnp->cn_proc); + } + return(error); } +/* + * union_mknod: + * + * a_dvp is locked on entry and should remain locked on return. + * a_vpp is garbagre whether an error occurs or not. + */ + static int union_mknod(ap) struct vop_mknod_args /* { @@ -552,42 +698,28 @@ union_mknod(ap) } */ *ap; { struct union_node *dun = VTOUNION(ap->a_dvp); - struct vnode *dvp = dun->un_uppervp; struct componentname *cnp = ap->a_cnp; - struct proc *p = cnp->cn_proc; + struct vnode *dvp; + int error = EROFS; - if (dvp != NULLVP) { + if ((dvp = union_lock_upper(dun, cnp->cn_proc)) != NULL) { struct vnode *vp; - struct mount *mp; - int error; - - FIXUP(dun, p); - - dun->un_flags |= UN_KLOCK; - VOP_UNLOCK(ap->a_dvp, 0, p); error = VOP_MKNOD(dvp, &vp, cnp, ap->a_vap); - if (error) { - dun->un_flags |= UN_ULOCK; - return (error); - } - - if (vp != NULLVP) { - mp = ap->a_dvp->v_mount; - VOP_UNLOCK(dvp, 0, p); - error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP, - cnp, vp, NULLVP, 1); - if (error) - vput(vp); - vn_lock(ap->a_dvp, LK_EXCLUSIVE| LK_RETRY, p); - } else { - dun->un_flags |= UN_ULOCK; - } - return (error); + /* vp is garbage whether an error occurs or not */ + union_unlock_upper(dvp, cnp->cn_proc); } - - return (EROFS); + return (error); } +/* + * union_open: + * + * run open VOP. When opening the underlying vnode we have to mimic + * vn_open. What we *really* need to do to avoid screwups if the + * open semantics change is to call vn_open(). For example, ufs blows + * up if you open a file but do not vmio it prior to writing. + */ + static int union_open(ap) struct vop_open_args /* { @@ -603,13 +735,18 @@ union_open(ap) int mode = ap->a_mode; struct ucred *cred = ap->a_cred; struct proc *p = ap->a_p; - int error; + int error = 0; + int tvpisupper = 1; /* * If there is an existing upper vp then simply open that. + * The upper vp takes precedence over the lower vp. When opening + * a lower vp for writing copy it to the uppervp and then open the + * uppervp. + * + * At the end of this section tvp will be left locked. */ - tvp = un->un_uppervp; - if (tvp == NULLVP) { + if ((tvp = union_lock_upper(un, p)) == NULLVP) { /* * If the lower vnode is being opened for writing, then * copy the file contents to the upper vnode and open that, @@ -617,30 +754,50 @@ union_open(ap) */ tvp = un->un_lowervp; if ((ap->a_mode & FWRITE) && (tvp->v_type == VREG)) { - error = union_copyup(un, (mode&O_TRUNC) == 0, cred, p); - if (error == 0) - error = VOP_OPEN(un->un_uppervp, mode, cred, p); - return (error); + int docopy = !(mode & O_TRUNC); + error = union_copyup(un, docopy, cred, p); + tvp = union_lock_upper(un, p); + } else { + un->un_openl++; + VREF(tvp); + vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p); + tvpisupper = 0; } - - /* - * Just open the lower vnode - */ - un->un_openl++; - vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p); - error = VOP_OPEN(tvp, mode, cred, p); - VOP_UNLOCK(tvp, 0, p); - - return (error); } - FIXUP(un, p); + /* + * We are holding the correct vnode, open it + */ - error = VOP_OPEN(tvp, mode, cred, p); + if (error == 0) + error = VOP_OPEN(tvp, mode, cred, p); + /* + * Absolutely necessary or UFS will blowup + */ + if (error == 0 && vn_canvmio(tvp) == TRUE) { + error = vfs_object_create(tvp, p, cred); + } + + /* + * Release any locks held + */ + if (tvpisupper) { + if (tvp) + union_unlock_upper(tvp, p); + } else { + vput(tvp); + } return (error); } +/* + * union_close: + * + * It is unclear whether a_vp is passed locked or unlocked. Whatever + * the case we do not change it. + */ + static int union_close(ap) struct vop_close_args /* { @@ -661,7 +818,6 @@ union_close(ap) --un->un_openl; vp = un->un_lowervp; } - ap->a_vp = vp; return (VCALL(vp, VOFFSET(vop_close), ap)); } @@ -688,12 +844,12 @@ union_access(ap) struct proc *p = ap->a_p; int error = EACCES; struct vnode *vp; - struct vnode *savedvp; /* * Disallow write attempts on filesystems mounted read-only. */ - if (ap->a_mode & VWRITE && (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)) { + if ((ap->a_mode & VWRITE) && + (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)) { switch (ap->a_vp->v_type) { case VREG: case VDIR: @@ -703,19 +859,30 @@ union_access(ap) break; } } - if ((vp = un->un_uppervp) != NULLVP) { - FIXUP(un, p); + + if ((vp = union_lock_upper(un, p)) != NULLVP) { ap->a_vp = vp; - return (VCALL(vp, VOFFSET(vop_access), ap)); + error = VCALL(vp, VOFFSET(vop_access), ap); + union_unlock_upper(vp, p); + return(error); } if ((vp = un->un_lowervp) != NULLVP) { vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - savedvp = ap->a_vp; ap->a_vp = vp; + + /* + * Remove VWRITE from a_mode if our mount point is RW, because + * we want to allow writes and lowervp may be read-only. + */ + if ((un->un_vnode->v_mount->mnt_flag & MNT_RDONLY) == 0) + ap->a_mode &= ~VWRITE; + error = VCALL(vp, VOFFSET(vop_access), ap); if (error == 0) { - struct union_mount *um = MOUNTTOUNIONMOUNT(savedvp->v_mount); + struct union_mount *um; + + um = MOUNTTOUNIONMOUNT(un->un_vnode->v_mount); if (um->um_op == UNMNT_BELOW) { ap->a_cred = um->um_cred; @@ -723,17 +890,26 @@ union_access(ap) } } VOP_UNLOCK(vp, 0, p); - if (error) - return (error); } - - return (error); + return(error); } /* * We handle getattr only to change the fsid and * track object sizes + * + * It's not clear whether VOP_GETATTR is to be + * called with the vnode locked or not. stat() calls + * it with (vp) locked, and fstat calls it with + * (vp) unlocked. + * + * Because of this we cannot use our normal locking functions + * if we do not intend to lock the main a_vp node. At the moment + * we are running without any specific locking at all, but beware + * to any programmer that care must be taken if locking is added + * to this function. */ + static int union_getattr(ap) struct vop_getattr_args /* { @@ -745,12 +921,10 @@ union_getattr(ap) { int error; struct union_node *un = VTOUNION(ap->a_vp); - struct vnode *vp = un->un_uppervp; - struct proc *p = ap->a_p; + struct vnode *vp; struct vattr *vap; struct vattr va; - /* * Some programs walk the filesystem hierarchy by counting * links to directories to avoid stat'ing all the time. @@ -762,22 +936,11 @@ union_getattr(ap) vap = ap->a_vap; - vp = un->un_uppervp; - if (vp != NULLVP) { - /* - * It's not clear whether VOP_GETATTR is to be - * called with the vnode locked or not. stat() calls - * it with (vp) locked, and fstat calls it with - * (vp) unlocked. - * In the mean time, compensate here by checking - * the union_node's lock flag. - */ - if (un->un_flags & UN_LOCKED) - FIXUP(un, p); - + if ((vp = un->un_uppervp) != NULLVP) { error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_p); if (error) return (error); + /* XXX isn't this dangerouso without a lock? */ union_newsize(ap->a_vp, vap->va_size, VNOVAL); } @@ -794,12 +957,12 @@ union_getattr(ap) error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_p); if (error) return (error); + /* XXX isn't this dangerous without a lock? */ union_newsize(ap->a_vp, VNOVAL, vap->va_size); } if ((vap != ap->a_vap) && (vap->va_type == VDIR)) ap->a_vap->va_nlink += vap->va_nlink; - return (0); } @@ -815,27 +978,28 @@ union_setattr(ap) struct union_node *un = VTOUNION(ap->a_vp); struct proc *p = ap->a_p; struct vattr *vap = ap->a_vap; + struct vnode *uppervp; int error; /* * Disallow write attempts on filesystems mounted read-only. */ if ((ap->a_vp->v_mount->mnt_flag & MNT_RDONLY) && - (vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || - vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || - vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL)) + (vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || + vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || + vap->va_mtime.tv_sec != VNOVAL || + vap->va_mode != (mode_t)VNOVAL)) { return (EROFS); + } /* * Handle case of truncating lower object to zero size, * by creating a zero length upper object. This is to * handle the case of open with O_TRUNC and O_CREAT. */ - if ((un->un_uppervp == NULLVP) && - /* assert(un->un_lowervp != NULLVP) */ - (un->un_lowervp->v_type == VREG)) { + if (un->un_uppervp == NULLVP && (un->un_lowervp->v_type == VREG)) { error = union_copyup(un, (ap->a_vap->va_size != 0), - ap->a_cred, ap->a_p); + ap->a_cred, ap->a_p); if (error) return (error); } @@ -844,19 +1008,45 @@ union_setattr(ap) * Try to set attributes in upper layer, * otherwise return read-only filesystem error. */ - if (un->un_uppervp != NULLVP) { - FIXUP(un, p); + error = EROFS; + if ((uppervp = union_lock_upper(un, p)) != NULLVP) { error = VOP_SETATTR(un->un_uppervp, ap->a_vap, ap->a_cred, ap->a_p); if ((error == 0) && (ap->a_vap->va_size != VNOVAL)) union_newsize(ap->a_vp, ap->a_vap->va_size, VNOVAL); - } else { - error = EROFS; + union_unlock_upper(uppervp, p); } - return (error); } +/* + * union_getpages: + */ + +static int +union_getpages(struct vop_getpages_args *ap) +{ + int r; + + r = vnode_pager_generic_getpages(ap->a_vp, ap->a_m, + ap->a_count, ap->a_reqpage); + return(r); +} + +/* + * union_putpages: + */ + +static int +union_putpages(struct vop_putpages_args *ap) +{ + int r; + + r = vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count, + ap->a_sync, ap->a_rtvals); + return(r); +} + static int union_read(ap) struct vop_read_args /* { @@ -866,18 +1056,19 @@ union_read(ap) struct ucred *a_cred; } */ *ap; { - int error; + struct union_node *un = VTOUNION(ap->a_vp); struct proc *p = ap->a_uio->uio_procp; - struct vnode *vp = OTHERVP(ap->a_vp); - int dolock = (vp == LOWERVP(ap->a_vp)); + struct vnode *uvp; + int error; - if (dolock) - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - else - FIXUP(VTOUNION(ap->a_vp), p); - error = VOP_READ(vp, ap->a_uio, ap->a_ioflag, ap->a_cred); - if (dolock) - VOP_UNLOCK(vp, 0, p); + uvp = union_lock_other(un, p); + KASSERT(uvp != NULL, ("union_read: backing vnode missing!")); + + if (ap->a_vp->v_flag & VOBJBUF) + union_vm_coherency(ap->a_vp, ap->a_uio, 0); + + error = VOP_READ(uvp, ap->a_uio, ap->a_ioflag, ap->a_cred); + union_unlock_other(uvp, p); /* * XXX @@ -889,7 +1080,7 @@ union_read(ap) struct union_node *un = VTOUNION(ap->a_vp); off_t cur = ap->a_uio->uio_offset; - if (vp == un->un_uppervp) { + if (uvp == un->un_uppervp) { if (cur > un->un_uppersz) union_newsize(ap->a_vp, cur, VNOVAL); } else { @@ -897,7 +1088,6 @@ union_read(ap) union_newsize(ap->a_vp, VNOVAL, cur); } } - return (error); } @@ -910,17 +1100,36 @@ union_write(ap) struct ucred *a_cred; } */ *ap; { - int error; - struct vnode *vp; struct union_node *un = VTOUNION(ap->a_vp); struct proc *p = ap->a_uio->uio_procp; + struct vnode *uppervp; + int error; - vp = UPPERVP(ap->a_vp); - if (vp == NULLVP) + if ((uppervp = union_lock_upper(un, p)) == NULLVP) panic("union: missing upper layer in write"); - FIXUP(un, p); - error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag, ap->a_cred); + /* + * Since our VM pages are associated with our vnode rather then + * the real vnode, and since we do not run our reads and writes + * through our own VM cache, we have a VM/VFS coherency problem. + * We solve them by invalidating or flushing the associated VM + * pages prior to allowing a normal read or write to occur. + * + * VM-backed writes (UIO_NOCOPY) have to be converted to normal + * writes because we are not cache-coherent. Normal writes need + * to be made coherent with our VM-backing store, which we do by + * first flushing any dirty VM pages associated with the write + * range, and then destroying any clean VM pages associated with + * the write range. + */ + + if (ap->a_uio->uio_segflg == UIO_NOCOPY) { + ap->a_uio->uio_segflg = UIO_SYSSPACE; + } else if (ap->a_vp->v_flag & VOBJBUF) { + union_vm_coherency(ap->a_vp, ap->a_uio, 1); + } + + error = VOP_WRITE(uppervp, ap->a_uio, ap->a_ioflag, ap->a_cred); /* * the size of the underlying object may be changed by the @@ -932,7 +1141,7 @@ union_write(ap) if (cur > un->un_uppersz) union_newsize(ap->a_vp, cur, VNOVAL); } - + union_unlock_upper(uppervp, p); return (error); } @@ -945,7 +1154,7 @@ union_lease(ap) int a_flag; } */ *ap; { - register struct vnode *ovp = OTHERVP(ap->a_vp); + struct vnode *ovp = OTHERVP(ap->a_vp); ap->a_vp = ovp; return (VCALL(ovp, VOFFSET(vop_lease), ap)); @@ -962,7 +1171,7 @@ union_ioctl(ap) struct proc *a_p; } */ *ap; { - register struct vnode *ovp = OTHERVP(ap->a_vp); + struct vnode *ovp = OTHERVP(ap->a_vp); ap->a_vp = ovp; return (VCALL(ovp, VOFFSET(vop_ioctl), ap)); @@ -977,7 +1186,7 @@ union_poll(ap) struct proc *a_p; } */ *ap; { - register struct vnode *ovp = OTHERVP(ap->a_vp); + struct vnode *ovp = OTHERVP(ap->a_vp); ap->a_vp = ovp; return (VCALL(ovp, VOFFSET(vop_poll), ap)); @@ -1010,7 +1219,7 @@ union_mmap(ap) struct proc *a_p; } */ *ap; { - register struct vnode *ovp = OTHERVP(ap->a_vp); + struct vnode *ovp = OTHERVP(ap->a_vp); ap->a_vp = ovp; return (VCALL(ovp, VOFFSET(vop_mmap), ap)); @@ -1027,35 +1236,24 @@ union_fsync(ap) { int error = 0; struct proc *p = ap->a_p; - struct vnode *targetvp = OTHERVP(ap->a_vp); - struct union_node *un; - - if (targetvp != NULLVP) { - int dolock = (targetvp == LOWERVP(ap->a_vp)); - - un = VTOUNION(ap->a_vp); - if (dolock) - vn_lock(targetvp, LK_EXCLUSIVE | LK_RETRY, p); - else { - un = VTOUNION(ap->a_vp); - if ((un->un_flags & UN_ULOCK) == 0 && - targetvp->v_data != NULL && - ((struct lock *)targetvp->v_data)->lk_lockholder - == curproc->p_pid && - VOP_ISLOCKED(targetvp) != 0) - return 0; /* XXX */ - - FIXUP(un, p); - } + struct vnode *targetvp; + struct union_node *un = VTOUNION(ap->a_vp); + if ((targetvp = union_lock_other(un, p)) != NULLVP) { error = VOP_FSYNC(targetvp, ap->a_cred, ap->a_waitfor, p); - if (dolock) - VOP_UNLOCK(targetvp, 0, p); + union_unlock_other(targetvp, p); } return (error); } +/* + * union_remove: + * + * Remove the specified cnp. The dvp and vp are passed to us locked + * and must remain locked on return. + */ + static int union_remove(ap) struct vop_remove_args /* { @@ -1068,42 +1266,40 @@ union_remove(ap) struct union_node *un = VTOUNION(ap->a_vp); struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; + struct vnode *uppervp; + struct vnode *upperdvp; int error; - if (dun->un_uppervp == NULLVP) + if ((upperdvp = union_lock_upper(dun, p)) == NULLVP) panic("union remove: null upper vnode"); - if (un->un_uppervp != NULLVP) { - struct vnode *dvp = dun->un_uppervp; - struct vnode *vp = un->un_uppervp; - - FIXUP(dun, p); - dun->un_flags |= UN_KLOCK; - VOP_UNLOCK(ap->a_dvp, 0, p); - FIXUP(un, p); - un->un_flags |= UN_KLOCK; - VOP_UNLOCK(ap->a_vp, 0, p); - + if ((uppervp = union_lock_upper(un, p)) != NULLVP) { if (union_dowhiteout(un, cnp->cn_cred, p)) cnp->cn_flags |= DOWHITEOUT; - error = VOP_REMOVE(dvp, vp, cnp); + error = VOP_REMOVE(upperdvp, uppervp, cnp); #if 0 /* XXX */ if (!error) union_removed_upper(un); #endif - dun->un_flags |= UN_ULOCK; - un->un_flags |= UN_ULOCK; + union_unlock_upper(uppervp, p); } else { - FIXUP(dun, p); error = union_mkwhiteout( - MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount), - dun->un_uppervp, ap->a_cnp, un->un_path); + MOUNTTOUNIONMOUNT(ap->a_dvp->v_mount), + upperdvp, ap->a_cnp, un->un_path); } - + union_unlock_upper(upperdvp, p); return (error); } +/* + * union_link: + * + * tdvp will be locked on entry, vp will not be locked on entry. + * tdvp should remain locked on return and vp should remain unlocked + * on return. + */ + static int union_link(ap) struct vop_link_args /* { @@ -1119,43 +1315,56 @@ union_link(ap) struct vnode *tdvp; int error = 0; - if (ap->a_tdvp->v_op != ap->a_vp->v_op) { vp = ap->a_vp; } else { struct union_node *tun = VTOUNION(ap->a_vp); + if (tun->un_uppervp == NULLVP) { vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY, p); +#if 0 if (dun->un_uppervp == tun->un_dirvp) { - dun->un_flags &= ~UN_ULOCK; - VOP_UNLOCK(dun->un_uppervp, 0, p); + if (dun->un_flags & UN_ULOCK) { + dun->un_flags &= ~UN_ULOCK; + VOP_UNLOCK(dun->un_uppervp, 0, p); + } } +#endif error = union_copyup(tun, 1, cnp->cn_cred, p); +#if 0 if (dun->un_uppervp == tun->un_dirvp) { vn_lock(dun->un_uppervp, - LK_EXCLUSIVE | LK_RETRY, p); + LK_EXCLUSIVE | LK_RETRY, p); dun->un_flags |= UN_ULOCK; } +#endif VOP_UNLOCK(ap->a_vp, 0, p); } vp = tun->un_uppervp; } - tdvp = dun->un_uppervp; - if (tdvp == NULLVP) - error = EROFS; - if (error) return (error); - FIXUP(dun, p); - dun->un_flags |= UN_KLOCK; - VOP_UNLOCK(ap->a_tdvp, 0, p); + /* + * Make sure upper is locked, then unlock the union directory we were + * called with to avoid a deadlock while we are calling VOP_LINK on + * the upper (with tdvp locked and vp not locked). Our ap->a_tdvp + * is expected to be locked on return. + */ - error = VOP_LINK(tdvp, vp, cnp); + if ((tdvp = union_lock_upper(dun, p)) == NULLVP) + return (EROFS); - dun->un_flags |= UN_ULOCK; + VOP_UNLOCK(ap->a_tdvp, 0, p); /* unlock calling node */ + error = VOP_LINK(tdvp, vp, cnp); /* call link on upper */ + /* + * We have to unlock tdvp prior to relocking our calling node in + * order to avoid a deadlock. + */ + union_unlock_upper(tdvp, p); + vn_lock(ap->a_tdvp, LK_EXCLUSIVE | LK_RETRY, p); return (error); } @@ -1171,12 +1380,16 @@ union_rename(ap) } */ *ap; { int error; - struct vnode *fdvp = ap->a_fdvp; struct vnode *fvp = ap->a_fvp; struct vnode *tdvp = ap->a_tdvp; struct vnode *tvp = ap->a_tvp; + /* + * Figure out what fdvp to pass to our upper or lower vnode. If we + * replace the fdvp, release the original one and ref the new one. + */ + if (fdvp->v_op == union_vnodeop_p) { /* always true */ struct union_node *un = VTOUNION(fdvp); if (un->un_uppervp == NULLVP) { @@ -1189,30 +1402,77 @@ union_rename(ap) error = EXDEV; goto bad; } - fdvp = un->un_uppervp; VREF(fdvp); vrele(ap->a_fdvp); } + /* + * Figure out what fvp to pass to our upper or lower vnode. If we + * replace the fvp, release the original one and ref the new one. + */ + if (fvp->v_op == union_vnodeop_p) { /* always true */ struct union_node *un = VTOUNION(fvp); +#if 0 + struct union_mount *um = MOUNTTOUNIONMOUNT(fvp->v_mount); +#endif + if (un->un_uppervp == NULLVP) { - /* XXX: should do a copyup */ - error = EXDEV; - goto bad; + switch(fvp->v_type) { + case VREG: + vn_lock(un->un_vnode, LK_EXCLUSIVE | LK_RETRY, ap->a_fcnp->cn_proc); + error = union_copyup(un, 1, ap->a_fcnp->cn_cred, ap->a_fcnp->cn_proc); + VOP_UNLOCK(un->un_vnode, 0, ap->a_fcnp->cn_proc); + if (error) + goto bad; + break; + case VDIR: + /* + * XXX not yet. + * + * There is only one way to rename a directory + * based in the lowervp, and that is to copy + * the entire directory hierarchy. Otherwise + * it would not last across a reboot. + */ +#if 0 + vrele(fvp); + fvp = NULL; + vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY, ap->a_fcnp->cn_proc); + error = union_mkshadow(um, fdvp, + ap->a_fcnp, &un->un_uppervp); + VOP_UNLOCK(fdvp, 0, ap->a_fcnp->cn_proc); + if (un->un_uppervp) + VOP_UNLOCK(un->un_uppervp, 0, ap->a_fcnp->cn_proc); + if (error) + goto bad; + break; +#endif + default: + error = EXDEV; + goto bad; + } } if (un->un_lowervp != NULLVP) ap->a_fcnp->cn_flags |= DOWHITEOUT; - fvp = un->un_uppervp; VREF(fvp); vrele(ap->a_fvp); } + /* + * Figure out what tdvp (destination directory) to pass to the + * lower level. If we replace it with uppervp, we need to vput the + * old one. The exclusive lock is transfered to what we will pass + * down in the VOP_RENAME and we replace uppervp with a simple + * reference. + */ + if (tdvp->v_op == union_vnodeop_p) { struct union_node *un = VTOUNION(tdvp); + if (un->un_uppervp == NULLVP) { /* * this should never happen in normal @@ -1224,32 +1484,52 @@ union_rename(ap) goto bad; } - tdvp = un->un_uppervp; - VREF(tdvp); - un->un_flags |= UN_KLOCK; + /* + * new tdvp is a lock and reference on uppervp, put away + * the old tdvp. + */ + tdvp = union_lock_upper(un, ap->a_tcnp->cn_proc); vput(ap->a_tdvp); } + /* + * Figure out what tvp (destination file) to pass to the + * lower level. + * + * If the uppervp file does not exist put away the (wrong) + * file and change tvp to NULL. + */ + if (tvp != NULLVP && tvp->v_op == union_vnodeop_p) { struct union_node *un = VTOUNION(tvp); - tvp = un->un_uppervp; - if (tvp != NULLVP) { - VREF(tvp); - un->un_flags |= UN_KLOCK; - } + tvp = union_lock_upper(un, ap->a_tcnp->cn_proc); vput(ap->a_tvp); + /* note: tvp may be NULL */ } + /* + * VOP_RENAME releases/vputs prior to returning, so we have no + * cleanup to do. + */ + return (VOP_RENAME(fdvp, fvp, ap->a_fcnp, tdvp, tvp, ap->a_tcnp)); + /* + * Error. We still have to release / vput the various elements. + */ + bad: vrele(fdvp); - vrele(fvp); + if (fvp) + vrele(fvp); vput(tdvp); - if (tvp != NULLVP) - vput(tvp); - + if (tvp != NULLVP) { + if (tvp != tdvp) + vput(tvp); + else + vrele(tvp); + } return (error); } @@ -1263,34 +1543,26 @@ union_mkdir(ap) } */ *ap; { struct union_node *dun = VTOUNION(ap->a_dvp); - struct vnode *dvp = dun->un_uppervp; struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; + struct vnode *upperdvp; + int error = EROFS; - if (dvp != NULLVP) { + if ((upperdvp = union_lock_upper(dun, p)) != NULLVP) { struct vnode *vp; - int error; - FIXUP(dun, p); - dun->un_flags |= UN_KLOCK; - VOP_UNLOCK(ap->a_dvp, 0, p); - error = VOP_MKDIR(dvp, &vp, cnp, ap->a_vap); - if (error) { - dun->un_flags |= UN_ULOCK; - return (error); + error = VOP_MKDIR(upperdvp, &vp, cnp, ap->a_vap); + union_unlock_upper(upperdvp, p); + + if (error == 0) { + VOP_UNLOCK(vp, 0, p); + UDEBUG(("ALLOCVP-2 FROM %p REFS %d\n", vp, vp->v_usecount)); + error = union_allocvp(ap->a_vpp, ap->a_dvp->v_mount, + ap->a_dvp, NULLVP, cnp, vp, NULLVP, 1); + UDEBUG(("ALLOCVP-2B FROM %p REFS %d\n", *ap->a_vpp, vp->v_usecount)); } - - VOP_UNLOCK(dvp, 0, p); - error = union_allocvp(ap->a_vpp, ap->a_dvp->v_mount, ap->a_dvp, - NULLVP, cnp, vp, NULLVP, 1); - if (error) - vput(vp); - vn_lock(ap->a_dvp, LK_EXCLUSIVE| LK_RETRY, p); - - return (error); } - - return (EROFS); + return (error); } static int @@ -1305,42 +1577,34 @@ union_rmdir(ap) struct union_node *un = VTOUNION(ap->a_vp); struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; + struct vnode *upperdvp; + struct vnode *uppervp; int error; - if (dun->un_uppervp == NULLVP) + if ((upperdvp = union_lock_upper(dun, p)) == NULLVP) panic("union rmdir: null upper vnode"); - if (un->un_uppervp != NULLVP) { - struct vnode *dvp = dun->un_uppervp; - struct vnode *vp = un->un_uppervp; - - FIXUP(dun, p); - dun->un_flags |= UN_KLOCK; - VOP_UNLOCK(ap->a_dvp, 0, p); - FIXUP(un, p); - un->un_flags |= UN_KLOCK; - VOP_UNLOCK(ap->a_vp, 0, p); - + if ((uppervp = union_lock_upper(un, p)) != NULLVP) { if (union_dowhiteout(un, cnp->cn_cred, p)) cnp->cn_flags |= DOWHITEOUT; - error = VOP_RMDIR(dvp, vp, ap->a_cnp); -#if 0 - /* XXX */ - if (!error) - union_removed_upper(un); -#endif - dun->un_flags |= UN_ULOCK; - un->un_flags |= UN_ULOCK; + error = VOP_RMDIR(upperdvp, uppervp, ap->a_cnp); + union_unlock_upper(uppervp, p); } else { - FIXUP(dun, p); error = union_mkwhiteout( - MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount), - dun->un_uppervp, ap->a_cnp, un->un_path); + MOUNTTOUNIONMOUNT(ap->a_dvp->v_mount), + dun->un_uppervp, ap->a_cnp, un->un_path); } - + union_unlock_upper(upperdvp, p); return (error); } +/* + * union_symlink: + * + * dvp is locked on entry and remains locked on return. a_vpp is garbage + * (unused). + */ + static int union_symlink(ap) struct vop_symlink_args /* { @@ -1352,24 +1616,20 @@ union_symlink(ap) } */ *ap; { struct union_node *dun = VTOUNION(ap->a_dvp); - struct vnode *dvp = dun->un_uppervp; struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; + struct vnode *dvp; + int error = EROFS; - if (dvp != NULLVP) { + if ((dvp = union_lock_upper(dun, p)) != NULLVP) { struct vnode *vp; - int error; - FIXUP(dun, p); - dun->un_flags |= UN_KLOCK; - VOP_UNLOCK(ap->a_dvp, 0, p); error = VOP_SYMLINK(dvp, &vp, cnp, ap->a_vap, ap->a_target); - dun->un_flags |= UN_ULOCK; + /* vp is garbage whether an error occurs or not */ *ap->a_vpp = NULLVP; - return (error); + union_unlock_upper(dvp, p); } - - return (EROFS); + return (error); } /* @@ -1391,15 +1651,16 @@ union_readdir(ap) } */ *ap; { struct union_node *un = VTOUNION(ap->a_vp); - struct vnode *uvp = un->un_uppervp; struct proc *p = ap->a_uio->uio_procp; + struct vnode *uvp; + int error = 0; - if (uvp == NULLVP) - return (0); - - FIXUP(un, p); - ap->a_vp = uvp; - return (VCALL(uvp, VOFFSET(vop_readdir), ap)); + if ((uvp = union_lock_upper(un, p)) != NULLVP) { + ap->a_vp = uvp; + error = VCALL(uvp, VOFFSET(vop_readdir), ap); + union_unlock_upper(uvp, p); + } + return(error); } static int @@ -1411,23 +1672,28 @@ union_readlink(ap) } */ *ap; { int error; + struct union_node *un = VTOUNION(ap->a_vp); struct uio *uio = ap->a_uio; struct proc *p = uio->uio_procp; - struct vnode *vp = OTHERVP(ap->a_vp); - int dolock = (vp == LOWERVP(ap->a_vp)); + struct vnode *vp; + + vp = union_lock_other(un, p); + KASSERT(vp != NULL, ("union_readlink: backing vnode missing!")); - if (dolock) - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - else - FIXUP(VTOUNION(ap->a_vp), p); ap->a_vp = vp; error = VCALL(vp, VOFFSET(vop_readlink), ap); - if (dolock) - VOP_UNLOCK(vp, 0, p); + union_unlock_other(vp, p); return (error); } +/* + * union_abortop: + * + * dvp is locked on entry and left locked on return + * + */ + static int union_abortop(ap) struct vop_abortop_args /* { @@ -1435,28 +1701,35 @@ union_abortop(ap) struct componentname *a_cnp; } */ *ap; { - int error; struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; - struct vnode *vp = OTHERVP(ap->a_dvp); struct union_node *un = VTOUNION(ap->a_dvp); - int islocked = un->un_flags & UN_LOCKED; - int dolock = (vp == LOWERVP(ap->a_dvp)); + int islocked = VOP_ISLOCKED(ap->a_dvp); + struct vnode *vp; + int error; if (islocked) { - if (dolock) - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - else - FIXUP(VTOUNION(ap->a_dvp), p); + vp = union_lock_other(un, p); + } else { + vp = OTHERVP(ap->a_dvp); } + KASSERT(vp != NULL, ("union_abortop: backing vnode missing!")); + ap->a_dvp = vp; error = VCALL(vp, VOFFSET(vop_abortop), ap); - if (islocked && dolock) - VOP_UNLOCK(vp, 0, p); + + if (islocked) + union_unlock_other(vp, p); return (error); } +/* + * union_inactive: + * + * Called with the vnode locked. We are expected to unlock the vnode. + */ + static int union_inactive(ap) struct vop_inactive_args /* { @@ -1485,10 +1758,17 @@ union_inactive(ap) if (un->un_dircache != 0) { for (vpp = un->un_dircache; *vpp != NULLVP; vpp++) vrele(*vpp); - free(un->un_dircache, M_TEMP); + free (un->un_dircache, M_TEMP); un->un_dircache = 0; } +#if 0 + if ((un->un_flags & UN_ULOCK) && un->un_uppervp) { + un->un_flags &= ~UN_ULOCK; + VOP_UNLOCK(un->un_uppervp, 0, p); + } +#endif + VOP_UNLOCK(vp, 0, p); if ((un->un_flags & UN_CACHED) == 0) @@ -1503,7 +1783,6 @@ union_reclaim(ap) struct vnode *a_vp; } */ *ap; { - union_freevp(ap->a_vp); return (0); @@ -1513,75 +1792,47 @@ static int union_lock(ap) struct vop_lock_args *ap; { +#if 0 struct vnode *vp = ap->a_vp; struct proc *p = ap->a_p; int flags = ap->a_flags; struct union_node *un; +#endif int error; - vop_nolock(ap); - /* - * Need to do real lockmgr-style locking here. - * in the mean time, draining won't work quite right, - * which could lead to a few race conditions. - * the following test was here, but is not quite right, we - * still need to take the lock: - if ((flags & LK_TYPE_MASK) == LK_DRAIN) - return (0); - */ - flags &= ~LK_INTERLOCK; - -start: + error = vop_stdlock(ap); +#if 0 un = VTOUNION(vp); - if (un->un_uppervp != NULLVP) { - if (((un->un_flags & UN_ULOCK) == 0) && - (vp->v_usecount != 0)) { - error = vn_lock(un->un_uppervp, flags, p); - if (error) - return (error); - un->un_flags |= UN_ULOCK; + if (error == 0) { + /* + * Lock the upper if it exists and this is an exclusive lock + * request. + */ + if (un->un_uppervp != NULLVP && + (flags & LK_TYPE_MASK) == LK_EXCLUSIVE) { + if ((un->un_flags & UN_ULOCK) == 0 && vp->v_usecount) { + error = vn_lock(un->un_uppervp, flags, p); + if (error) { + struct vop_unlock_args uap = { 0 }; + uap.a_vp = ap->a_vp; + uap.a_flags = ap->a_flags; + uap.a_p = ap->a_p; + vop_stdunlock(&uap); + return (error); + } + un->un_flags |= UN_ULOCK; + } } -#ifdef DIAGNOSTIC - if (un->un_flags & UN_KLOCK) { - vprint("dangling upper lock", vp); - panic("union: dangling upper lock"); - } -#endif } - - if (un->un_flags & UN_LOCKED) { -#ifdef DIAGNOSTIC - if (curproc && un->un_pid == curproc->p_pid && - un->un_pid > -1 && curproc->p_pid > -1) - panic("union: locking against myself"); #endif - un->un_flags |= UN_WANT; - tsleep((caddr_t)&un->un_flags, PINOD, "unionlk2", 0); - goto start; - } - -#ifdef DIAGNOSTIC - if (curproc) - un->un_pid = curproc->p_pid; - else - un->un_pid = -1; -#endif - - un->un_flags |= UN_LOCKED; - return (0); + return (error); } /* - * When operations want to vput() a union node yet retain a lock on - * the upper vnode (say, to do some further operations like link(), - * mkdir(), ...), they set UN_KLOCK on the union node, then call - * vput() which calls VOP_UNLOCK() and comes here. union_unlock() - * unlocks the union node (leaving the upper vnode alone), clears the - * KLOCK flag, and then returns to vput(). The caller then does whatever - * is left to do with the upper vnode, and ensures that it gets unlocked. + * union_unlock: * - * If UN_KLOCK isn't set, then the upper vnode is unlocked here. + * Unlock our union node. This also unlocks uppervp. */ static int union_unlock(ap) @@ -1592,36 +1843,38 @@ union_unlock(ap) } */ *ap; { struct union_node *un = VTOUNION(ap->a_vp); - struct proc *p = ap->a_p; + int error; -#ifdef DIAGNOSTIC - if ((un->un_flags & UN_LOCKED) == 0) - panic("union: unlock unlocked node"); - if (curproc && un->un_pid != curproc->p_pid && - curproc->p_pid > -1 && un->un_pid > -1) - panic("union: unlocking other process's union node"); -#endif + KASSERT((un->un_uppervp == NULL || un->un_uppervp->v_usecount > 0), ("uppervp usecount is 0")); - un->un_flags &= ~UN_LOCKED; + error = vop_stdunlock(ap); +#if 0 - if ((un->un_flags & (UN_ULOCK|UN_KLOCK)) == UN_ULOCK) - VOP_UNLOCK(un->un_uppervp, 0, p); + /* + * If no exclusive locks remain and we are holding an uppervp lock, + * remove the uppervp lock. + */ - un->un_flags &= ~(UN_ULOCK|UN_KLOCK); - - if (un->un_flags & UN_WANT) { - un->un_flags &= ~UN_WANT; - wakeup((caddr_t) &un->un_flags); + if ((un->un_flags & UN_ULOCK) && + lockstatus(&un->un_lock) != LK_EXCLUSIVE) { + un->un_flags &= ~UN_ULOCK; + VOP_UNLOCK(un->un_uppervp, LK_EXCLUSIVE, p); } - -#ifdef DIAGNOSTIC - un->un_pid = 0; #endif - vop_nounlock(ap); - - return (0); + return(error); } +/* + * union_bmap: + * + * There isn't much we can do. We cannot push through to the real vnode + * to get to the underlying device because this will bypass data + * cached by the real vnode. + * + * For some reason we cannot return the 'real' vnode either, it seems + * to blow up memory maps. + */ + static int union_bmap(ap) struct vop_bmap_args /* { @@ -1633,21 +1886,7 @@ union_bmap(ap) int *a_runb; } */ *ap; { - int error; - struct proc *p = curproc; /* XXX */ - struct vnode *vp = OTHERVP(ap->a_vp); - int dolock = (vp == LOWERVP(ap->a_vp)); - - if (dolock) - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - else - FIXUP(VTOUNION(ap->a_vp), p); - ap->a_vp = vp; - error = VCALL(vp, VOFFSET(vop_bmap), ap); - if (dolock) - VOP_UNLOCK(vp, 0, p); - - return (error); + return(EOPNOTSUPP); } static int @@ -1668,16 +1907,6 @@ union_print(ap) return (0); } -static int -union_islocked(ap) - struct vop_islocked_args /* { - struct vnode *a_vp; - } */ *ap; -{ - - return ((VTOUNION(ap->a_vp)->un_flags & UN_LOCKED) ? 1 : 0); -} - static int union_pathconf(ap) struct vop_pathconf_args /* { @@ -1688,17 +1917,15 @@ union_pathconf(ap) { int error; struct proc *p = curproc; /* XXX */ - struct vnode *vp = OTHERVP(ap->a_vp); - int dolock = (vp == LOWERVP(ap->a_vp)); + struct union_node *un = VTOUNION(ap->a_vp); + struct vnode *vp; + + vp = union_lock_other(un, p); + KASSERT(vp != NULL, ("union_pathconf: backing vnode missing!")); - if (dolock) - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - else - FIXUP(VTOUNION(ap->a_vp), p); ap->a_vp = vp; error = VCALL(vp, VOFFSET(vop_pathconf), ap); - if (dolock) - VOP_UNLOCK(vp, 0, p); + union_unlock_other(vp, p); return (error); } @@ -1722,6 +1949,8 @@ union_advlock(ap) /* * XXX - vop_strategy must be hand coded because it has no + * YYY - and it is not coherent with anything + * * vnode in its arguments. * This goes away with a merged VM/buffer cache. */ @@ -1742,7 +1971,6 @@ union_strategy(ap) (othervp == LOWERVP(bp->b_vp))) panic("union_strategy: writing to lowervp"); #endif - return (VOP_STRATEGY(othervp, bp)); } @@ -1759,10 +1987,12 @@ static struct vnodeopv_entry_desc union_vnodeop_entries[] = { { &vop_close_desc, (vop_t *) union_close }, { &vop_create_desc, (vop_t *) union_create }, { &vop_fsync_desc, (vop_t *) union_fsync }, + { &vop_getpages_desc, (vop_t *) union_getpages }, + { &vop_putpages_desc, (vop_t *) union_putpages }, { &vop_getattr_desc, (vop_t *) union_getattr }, { &vop_inactive_desc, (vop_t *) union_inactive }, { &vop_ioctl_desc, (vop_t *) union_ioctl }, - { &vop_islocked_desc, (vop_t *) union_islocked }, + { &vop_islocked_desc, (vop_t *) vop_stdislocked }, { &vop_lease_desc, (vop_t *) union_lease }, { &vop_link_desc, (vop_t *) union_link }, { &vop_lock_desc, (vop_t *) union_lock }, diff --git a/sys/miscfs/union/union.h b/sys/miscfs/union/union.h index 1fc5f996cab0..02a897101472 100644 --- a/sys/miscfs/union/union.h +++ b/sys/miscfs/union/union.h @@ -49,8 +49,8 @@ struct union_args { #define UNMNT_OPMASK 0x0003 struct union_mount { - struct vnode *um_uppervp; - struct vnode *um_lowervp; + struct vnode *um_uppervp; /* UN_ULOCK holds locking state */ + struct vnode *um_lowervp; /* Left unlocked */ struct ucred *um_cred; /* Credentials of user calling mount */ int um_cmode; /* cmask from mount process */ int um_op; /* Operation mode */ @@ -58,6 +58,10 @@ struct union_mount { #ifdef KERNEL +#ifndef DIAGNOSTIC +#define DIAGNOSTIC +#endif + /* * DEFDIRMODE is the mode bits used to create a shadow directory. */ @@ -67,9 +71,14 @@ struct union_mount { #define UN_FILEMODE ((VRWMODE)|(VRWMODE>>3)|(VRWMODE>>6)) /* - * A cache of vnode references + * A cache of vnode references (hangs off v_data) + * + * Placing un_lock as the first elements theoretically allows us to + * use the vop_stdlock functions. However, we need to make sure of + * certain side effects so we will still punch in our own code. */ struct union_node { + struct lock un_lock; LIST_ENTRY(union_node) un_cache; /* Hash chain */ struct vnode *un_vnode; /* Back pointer */ struct vnode *un_uppervp; /* overlaying object */ @@ -79,6 +88,7 @@ struct union_node { char *un_path; /* saved component name */ int un_hash; /* saved un_path hash value */ int un_openl; /* # of opens on lowervp */ + int un_exclcnt; /* exclusive count */ unsigned int un_flags; struct vnode **un_dircache; /* cached union stack */ off_t un_uppersz; /* size of upper object */ @@ -88,14 +98,25 @@ struct union_node { #endif }; -#define UN_WANT 0x01 -#define UN_LOCKED 0x02 -#define UN_ULOCK 0x04 /* Upper node is locked */ -#define UN_KLOCK 0x08 /* Keep upper node locked on vput */ -#define UN_CACHED 0x10 /* In union cache */ +/* + * XXX UN_ULOCK - indicates that the uppervp is locked + * + * UN_CACHED - node is in the union cache + */ + +/*#define UN_ULOCK 0x04*/ /* Upper node is locked */ +#define UN_CACHED 0x10 /* In union cache */ + +/* + * Hash table locking flags + */ + +#define UNVP_WANT 0x01 +#define UNVP_LOCKED 0x02 extern int union_allocvp __P((struct vnode **, struct mount *, - struct vnode *, struct vnode *, + struct vnode *, + struct vnode *, struct componentname *, struct vnode *, struct vnode *, int)); extern int union_freevp __P((struct vnode *)); @@ -113,6 +134,7 @@ extern int union_cn_close __P((struct vnode *, int, struct ucred *, extern void union_removed_upper __P((struct union_node *un)); extern struct vnode *union_lowervp __P((struct vnode *)); extern void union_newsize __P((struct vnode *, off_t, off_t)); +extern void union_vm_coherency __P((struct vnode *, struct uio *, int)); extern int (*union_dircheckp) __P((struct proc *, struct vnode **, struct file *)); @@ -124,6 +146,11 @@ extern int (*union_dircheckp) __P((struct proc *, struct vnode **, #define UPPERVP(vp) (VTOUNION(vp)->un_uppervp) #define OTHERVP(vp) (UPPERVP(vp) ? UPPERVP(vp) : LOWERVP(vp)) +#define UDEBUG(x) if (uniondebug) printf x +#define UDEBUG_ENABLED 1 + extern vop_t **union_vnodeop_p; extern struct vfsops union_vfsops; +extern int uniondebug; + #endif /* KERNEL */ diff --git a/sys/miscfs/union/union_subr.c b/sys/miscfs/union/union_subr.c index ed09a65fbb4a..c03153c4894b 100644 --- a/sys/miscfs/union/union_subr.c +++ b/sys/miscfs/union/union_subr.c @@ -53,6 +53,7 @@ #include #include /* for vnode_pager_setsize */ #include +#include /* for vm cache coherency */ #include #include @@ -97,7 +98,7 @@ union_init() for (i = 0; i < NHASH; i++) LIST_INIT(&unhead[i]); - bzero((caddr_t) unvplock, sizeof(unvplock)); + bzero((caddr_t)unvplock, sizeof(unvplock)); return (0); } @@ -105,15 +106,12 @@ static int union_list_lock(ix) int ix; { - - if (unvplock[ix] & UN_LOCKED) { - unvplock[ix] |= UN_WANT; + if (unvplock[ix] & UNVP_LOCKED) { + unvplock[ix] |= UNVP_WANT; (void) tsleep((caddr_t) &unvplock[ix], PINOD, "unllck", 0); return (1); } - - unvplock[ix] |= UN_LOCKED; - + unvplock[ix] |= UNVP_LOCKED; return (0); } @@ -121,15 +119,25 @@ static void union_list_unlock(ix) int ix; { + unvplock[ix] &= ~UNVP_LOCKED; - unvplock[ix] &= ~UN_LOCKED; - - if (unvplock[ix] & UN_WANT) { - unvplock[ix] &= ~UN_WANT; + if (unvplock[ix] & UNVP_WANT) { + unvplock[ix] &= ~UNVP_WANT; wakeup((caddr_t) &unvplock[ix]); } } +/* + * union_updatevp: + * + * The uppervp, if not NULL, must be referenced and not locked by us + * The lowervp, if not NULL, must be referenced. + * + * if uppervp and lowervp match pointers already installed, nothing + * happens. The passed vp's (when matching) are not adjusted. This + * routine may only be called by union_newupper() and union_newlower(). + */ + static void union_updatevp(un, uppervp, lowervp) struct union_node *un; @@ -153,9 +161,10 @@ union_updatevp(un, uppervp, lowervp) uhash = nhash; } - if (lhash != uhash) + if (lhash != uhash) { while (union_list_lock(lhash)) continue; + } while (union_list_lock(uhash)) continue; @@ -177,10 +186,6 @@ union_updatevp(un, uppervp, lowervp) free(un->un_path, M_TEMP); un->un_path = 0; } - if (un->un_dirvp) { - vrele(un->un_dirvp); - un->un_dirvp = NULLVP; - } } un->un_lowervp = lowervp; un->un_lowersz = VNOVAL; @@ -189,7 +194,6 @@ union_updatevp(un, uppervp, lowervp) if (un->un_uppervp != uppervp) { if (un->un_uppervp) vrele(un->un_uppervp); - un->un_uppervp = uppervp; un->un_uppersz = VNOVAL; } @@ -202,21 +206,30 @@ union_updatevp(un, uppervp, lowervp) union_list_unlock(nhash); } +/* + * Set a new lowervp. The passed lowervp must be referenced and will be + * stored in the vp in a referenced state. + */ + static void union_newlower(un, lowervp) struct union_node *un; struct vnode *lowervp; { - union_updatevp(un, un->un_uppervp, lowervp); } +/* + * Set a new uppervp. The passed uppervp must be locked and will be + * stored in the vp in a locked state. The caller should not unlock + * uppervp. + */ + static void union_newupper(un, uppervp) struct union_node *un; struct vnode *uppervp; { - union_updatevp(un, uppervp, un->un_lowervp); } @@ -253,27 +266,51 @@ union_newsize(vp, uppersz, lowersz) } if (sz != VNOVAL) { -#ifdef DEBUG - printf("union: %s size now %ld\n", - uppersz != VNOVAL ? "upper" : "lower", (long) sz); -#endif + UDEBUG(("union: %s size now %ld\n", + (uppersz != VNOVAL ? "upper" : "lower"), (long)sz)); vnode_pager_setsize(vp, sz); } } /* - * allocate a union_node/vnode pair. the vnode is - * referenced and locked. the new vnode is returned - * via (vpp). (mp) is the mountpoint of the union filesystem, - * (dvp) is the parent directory where the upper layer object - * should exist (but doesn't) and (cnp) is the componentname - * information which is partially copied to allow the upper - * layer object to be created at a later time. (uppervp) - * and (lowervp) reference the upper and lower layer objects - * being mapped. either, but not both, can be nil. - * if supplied, (uppervp) is locked. - * the reference is either maintained in the new union_node - * object which is allocated, or they are vrele'd. + * union_allocvp: allocate a union_node and associate it with a + * parent union_node and one or two vnodes. + * + * vpp Holds the returned vnode locked and referenced if no + * error occurs. + * + * mp Holds the mount point. mp may or may not be busied. + * allocvp makes no changes to mp. + * + * dvp Holds the parent union_node to the one we wish to create. + * XXX may only be used to traverse an uncopied lowervp-based + * tree? XXX + * + * dvp may or may not be locked. allocvp makes no changes + * to dvp. + * + * upperdvp Holds the parent vnode to uppervp, generally used along + * with path component information to create a shadow of + * lowervp when uppervp does not exist. + * + * upperdvp is referenced but unlocked on entry, and will be + * dereferenced on return. + * + * uppervp Holds the new uppervp vnode to be stored in the + * union_node we are allocating. uppervp is referenced but + * not locked, and will be dereferenced on return. + * + * lowervp Holds the new lowervp vnode to be stored in the + * union_node we are allocating. uppervp is referenced but + * not locked, and will be dereferenced on return. + * + * cnp Holds path component information to be coupled with + * lowervp and upperdvp to allow unionfs to create an uppervp + * later on. Only used if lowervp is valid. The conents + * of cnp is only valid for the duration of the call. + * + * docache Determine whether this node should be entered in the + * cache or whether it should be destroyed as soon as possible. * * all union_nodes are maintained on a singly-linked * list. new nodes are only allocated when they cannot @@ -292,12 +329,13 @@ union_newsize(vp, uppersz, lowersz) * zero references to it and so it needs to removed from * the vnode free list. */ + int -union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache) +union_allocvp(vpp, mp, dvp, upperdvp, cnp, uppervp, lowervp, docache) struct vnode **vpp; struct mount *mp; - struct vnode *undvp; /* parent union vnode */ - struct vnode *dvp; /* may be null */ + struct vnode *dvp; /* parent union vnode */ + struct vnode *upperdvp; /* parent vnode of uppervp */ struct componentname *cnp; /* may be null */ struct vnode *uppervp; /* may be null */ struct vnode *lowervp; /* may be null */ @@ -307,6 +345,7 @@ union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache) struct union_node *un = 0; struct vnode *xlowervp = NULLVP; struct union_mount *um = MOUNTTOUNIONMOUNT(mp); + struct proc *p = (cnp) ? cnp->cn_proc : curproc; int hash = 0; int vflag; int try; @@ -382,65 +421,76 @@ loop: if (un) { /* - * Obtain a lock on the union_node. - * uppervp is locked, though un->un_uppervp - * may not be. this doesn't break the locking - * hierarchy since in the case that un->un_uppervp - * is not yet locked it will be vrele'd and replaced - * with uppervp. + * Obtain a lock on the union_node. Everything is unlocked + * except for dvp, so check that case. If they match, our + * new un is already locked. Otherwise we have to lock our + * new un. + * + * A potential deadlock situation occurs when we are holding + * one lock while trying to get another. We must follow + * strict ordering rules to avoid it. We try to locate dvp + * by scanning up from un_vnode, since the most likely + * scenario is un being under dvp. */ - if ((dvp != NULLVP) && (uppervp == dvp)) { - /* - * Access ``.'', so (un) will already - * be locked. Since this process has - * the lock on (uppervp) no other - * process can hold the lock on (un). - */ -#ifdef DIAGNOSTIC - if ((un->un_flags & UN_LOCKED) == 0) - panic("union: . not locked"); - else if (curproc && un->un_pid != curproc->p_pid && - un->un_pid > -1 && curproc->p_pid > -1) - panic("union: allocvp not lock owner"); -#endif - } else { - if (un->un_flags & UN_LOCKED) { - vrele(UNIONTOV(un)); - un->un_flags |= UN_WANT; - (void) tsleep((caddr_t) &un->un_flags, PINOD, "unalvp", 0); - goto loop; - } - un->un_flags |= UN_LOCKED; + if (dvp && un->un_vnode != dvp) { + struct vnode *scan = un->un_vnode; -#ifdef DIAGNOSTIC - if (curproc) - un->un_pid = curproc->p_pid; - else - un->un_pid = -1; -#endif + do { + scan = VTOUNION(scan)->un_pvp; + } while (scan && scan->v_tag == VT_UNION && scan != dvp); + if (scan != dvp) { + /* + * our new un is above dvp (we never saw dvp + * while moving up the tree). + */ + VREF(dvp); + VOP_UNLOCK(dvp, 0, p); + error = vn_lock(un->un_vnode, LK_EXCLUSIVE, p); + vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); + vrele(dvp); + } else { + /* + * our new un is under dvp + */ + error = vn_lock(un->un_vnode, LK_EXCLUSIVE, p); + } + } else if (dvp == NULLVP) { + /* + * dvp is NULL, we need to lock un. + */ + error = vn_lock(un->un_vnode, LK_EXCLUSIVE, p); + } else { + /* + * dvp == un->un_vnode, we are already locked. + */ + error = 0; } - /* - * At this point, the union_node is locked, - * un->un_uppervp may not be locked, and uppervp - * is locked or nil. - */ + if (error) + goto loop; /* - * Save information about the upper layer. + * At this point, the union_node is locked and referenced. + * + * uppervp is locked and referenced or NULL, lowervp is + * referenced or NULL. */ + UDEBUG(("Modify existing un %p vn %p upper %p(refs %d) -> %p(refs %d)\n", + un, un->un_vnode, un->un_uppervp, + (un->un_uppervp ? un->un_uppervp->v_usecount : -99), + uppervp, + (uppervp ? uppervp->v_usecount : -99) + )); + if (uppervp != un->un_uppervp) { + KASSERT(uppervp == NULL || uppervp->v_usecount > 0, ("union_allocvp: too few refs %d (at least 1 required) on uppervp", uppervp->v_usecount)); union_newupper(un, uppervp); } else if (uppervp) { + KASSERT(uppervp->v_usecount > 1, ("union_allocvp: too few refs %d (at least 2 required) on uppervp", uppervp->v_usecount)); vrele(uppervp); } - if (un->un_uppervp) { - un->un_flags |= UN_ULOCK; - un->un_flags &= ~UN_KLOCK; - } - /* * Save information about the lower layer. * This needs to keep track of pathname @@ -456,12 +506,22 @@ loop: bcopy(cnp->cn_nameptr, un->un_path, cnp->cn_namelen); un->un_path[cnp->cn_namelen] = '\0'; - VREF(dvp); - un->un_dirvp = dvp; } } else if (lowervp) { vrele(lowervp); } + + /* + * and upperdvp + */ + if (upperdvp != un->un_dirvp) { + if (un->un_dirvp) + vrele(un->un_dirvp); + un->un_dirvp = upperdvp; + } else if (upperdvp) { + vrele(upperdvp); + } + *vpp = UNIONTOV(un); return (0); } @@ -477,17 +537,22 @@ loop: goto loop; } + /* + * Create new node rather then replace old node + */ + error = getnewvnode(VT_UNION, mp, union_vnodeop_p, vpp); if (error) { - if (uppervp) { - if (dvp == uppervp) - vrele(uppervp); - else - vput(uppervp); - } + /* + * If an error occurs clear out vnodes. + */ if (lowervp) vrele(lowervp); - + if (uppervp) + vrele(uppervp); + if (upperdvp) + vrele(upperdvp); + *vpp = NULL; goto out; } @@ -499,37 +564,34 @@ loop: (*vpp)->v_type = uppervp->v_type; else (*vpp)->v_type = lowervp->v_type; + un = VTOUNION(*vpp); + bzero(un, sizeof(*un)); + + lockinit(&un->un_lock, PVFS, "unlock", 0, 0); + vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, p); + un->un_vnode = *vpp; un->un_uppervp = uppervp; un->un_uppersz = VNOVAL; un->un_lowervp = lowervp; un->un_lowersz = VNOVAL; - un->un_pvp = undvp; - if (undvp != NULLVP) - VREF(undvp); + un->un_dirvp = upperdvp; + un->un_pvp = dvp; /* only parent dir in new allocation */ + if (dvp != NULLVP) + VREF(dvp); un->un_dircache = 0; un->un_openl = 0; - un->un_flags = UN_LOCKED; - if (un->un_uppervp) - un->un_flags |= UN_ULOCK; -#ifdef DIAGNOSTIC - if (curproc) - un->un_pid = curproc->p_pid; - else - un->un_pid = -1; -#endif + if (cnp && (lowervp != NULLVP)) { un->un_hash = cnp->cn_hash; un->un_path = malloc(cnp->cn_namelen+1, M_TEMP, M_WAITOK); bcopy(cnp->cn_nameptr, un->un_path, cnp->cn_namelen); un->un_path[cnp->cn_namelen] = '\0'; - VREF(dvp); - un->un_dirvp = dvp; } else { un->un_hash = 0; un->un_path = 0; - un->un_dirvp = 0; + un->un_dirvp = NULL; } if (docache) { @@ -537,10 +599,10 @@ loop: un->un_flags |= UN_CACHED; } +out: if (xlowervp) vrele(xlowervp); -out: if (docache) union_list_unlock(hash); @@ -558,16 +620,26 @@ union_freevp(vp) LIST_REMOVE(un, un_cache); } - if (un->un_pvp != NULLVP) + if (un->un_pvp != NULLVP) { vrele(un->un_pvp); - if (un->un_uppervp != NULLVP) + un->un_pvp = NULL; + } + if (un->un_uppervp != NULLVP) { vrele(un->un_uppervp); - if (un->un_lowervp != NULLVP) + un->un_uppervp = NULL; + } + if (un->un_lowervp != NULLVP) { vrele(un->un_lowervp); - if (un->un_dirvp != NULLVP) + un->un_lowervp = NULL; + } + if (un->un_dirvp != NULLVP) { vrele(un->un_dirvp); - if (un->un_path) + un->un_dirvp = NULL; + } + if (un->un_path) { free(un->un_path, M_TEMP); + un->un_path = NULL; + } FREE(vp->v_data, M_TEMP); vp->v_data = 0; @@ -579,6 +651,9 @@ union_freevp(vp) * copyfile. copy the vnode (fvp) to the vnode (tvp) * using a sequence of reads and writes. both (fvp) * and (tvp) are locked on entry and exit. + * + * fvp and tvp are both exclusive locked on call, but their refcount's + * haven't been bumped at all. */ static int union_copyfile(fvp, tvp, cred, p) @@ -600,48 +675,62 @@ union_copyfile(fvp, tvp, cred, p) * give up at the first sign of trouble. */ + bzero(&uio, sizeof(uio)); + uio.uio_procp = p; uio.uio_segflg = UIO_SYSSPACE; uio.uio_offset = 0; - VOP_UNLOCK(fvp, 0, p); /* XXX */ VOP_LEASE(fvp, p, cred, LEASE_READ); - vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY, p); /* XXX */ - VOP_UNLOCK(tvp, 0, p); /* XXX */ VOP_LEASE(tvp, p, cred, LEASE_WRITE); - vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p); /* XXX */ buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK); /* ugly loop follows... */ do { off_t offset = uio.uio_offset; + int count; + int bufoffset; + /* + * Setup for big read + */ uio.uio_iov = &iov; uio.uio_iovcnt = 1; iov.iov_base = buf; iov.iov_len = MAXBSIZE; uio.uio_resid = iov.iov_len; uio.uio_rw = UIO_READ; - error = VOP_READ(fvp, &uio, 0, cred); - if (error == 0) { + if ((error = VOP_READ(fvp, &uio, 0, cred)) != 0) + break; + + /* + * Get bytes read, handle read eof case and setup for + * write loop + */ + if ((count = MAXBSIZE - uio.uio_resid) == 0) + break; + bufoffset = 0; + + /* + * Write until an error occurs or our buffer has been + * exhausted, then update the offset for the next read. + */ + while (bufoffset < count) { uio.uio_iov = &iov; uio.uio_iovcnt = 1; - iov.iov_base = buf; - iov.iov_len = MAXBSIZE - uio.uio_resid; - uio.uio_offset = offset; + iov.iov_base = buf + bufoffset; + iov.iov_len = count - bufoffset; + uio.uio_offset = offset + bufoffset; uio.uio_rw = UIO_WRITE; uio.uio_resid = iov.iov_len; - if (uio.uio_resid == 0) + if ((error = VOP_WRITE(tvp, &uio, 0, cred)) != 0) break; - - do { - error = VOP_WRITE(tvp, &uio, 0, cred); - } while ((uio.uio_resid > 0) && (error == 0)); + bufoffset += (count - bufoffset) - uio.uio_resid; } - + uio.uio_offset = offset + bufoffset; } while (error == 0); free(buf, M_TEMP); @@ -649,9 +738,10 @@ union_copyfile(fvp, tvp, cred, p) } /* - * (un) is assumed to be locked on entry and remains - * locked on exit. + * + * un's vnode is assumed to be locked on entry and remains locked on exit. */ + int union_copyup(un, docopy, cred, p) struct union_node *un; @@ -676,12 +766,9 @@ union_copyup(un, docopy, cred, p) if (error) return (error); - /* at this point, uppervp is locked */ - union_newupper(un, uvp); - un->un_flags |= UN_ULOCK; - lvp = un->un_lowervp; + KASSERT(uvp->v_usecount > 0, ("copy: uvp refcount 0: %d", uvp->v_usecount)); if (docopy) { /* * XX - should not ignore errors @@ -689,23 +776,22 @@ union_copyup(un, docopy, cred, p) */ vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY, p); error = VOP_OPEN(lvp, FREAD, cred, p); + if (error == 0 && vn_canvmio(lvp) == TRUE) + error = vfs_object_create(lvp, p, cred); if (error == 0) { error = union_copyfile(lvp, uvp, cred, p); VOP_UNLOCK(lvp, 0, p); (void) VOP_CLOSE(lvp, FREAD, cred, p); } -#ifdef DEBUG if (error == 0) - uprintf("union: copied up %s\n", un->un_path); -#endif + UDEBUG(("union: copied up %s\n", un->un_path)); } - un->un_flags &= ~UN_ULOCK; VOP_UNLOCK(uvp, 0, p); + union_newupper(un, uvp); + KASSERT(uvp->v_usecount > 0, ("copy: uvp refcount 0: %d", uvp->v_usecount)); union_vn_close(uvp, FWRITE, cred, p); - vn_lock(uvp, LK_EXCLUSIVE | LK_RETRY, p); - un->un_flags |= UN_ULOCK; - + KASSERT(uvp->v_usecount > 0, ("copy: uvp refcount 0: %d", uvp->v_usecount)); /* * Subsequent IOs will go to the top layer, so * call close on the lower vnode and open on the @@ -721,6 +807,8 @@ union_copyup(un, docopy, cred, p) (void) VOP_CLOSE(lvp, FREAD, cred, p); (void) VOP_OPEN(uvp, FREAD, cred, p); } + if (vn_canvmio(uvp) == TRUE) + error = vfs_object_create(uvp, p, cred); un->un_openl = 0; } @@ -728,6 +816,17 @@ union_copyup(un, docopy, cred, p) } +/* + * union_relookup: + * + * dvp should be locked on entry and will be locked on return. No + * net change in the ref count will occur. + * + * If an error is returned, *vpp will be invalid, otherwise it + * will hold a locked, referenced vnode. If *vpp == dvp then + * remember that only one exclusive lock is held. + */ + static int union_relookup(um, dvp, vpp, cnp, cn, path, pathlen) struct union_mount *um; @@ -757,7 +856,7 @@ union_relookup(um, dvp, vpp, cnp, cn, path, pathlen) cn->cn_pnbuf[cn->cn_namelen] = '\0'; cn->cn_nameiop = CREATE; - cn->cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN); + cn->cn_flags = (LOCKPARENT|LOCKLEAF|HASBUF|SAVENAME|ISLASTCN); cn->cn_proc = cnp->cn_proc; if (um->um_op == UNMNT_ABOVE) cn->cn_cred = cnp->cn_cred; @@ -768,15 +867,30 @@ union_relookup(um, dvp, vpp, cnp, cn, path, pathlen) cn->cn_consume = cnp->cn_consume; VREF(dvp); - error = relookup(dvp, vpp, cn); - if (!error) - vrele(dvp); - else { + VOP_UNLOCK(dvp, 0, cnp->cn_proc); + + /* + * Pass dvp unlocked and referenced on call to relookup(). + * + * If an error occurs, dvp will be returned unlocked and dereferenced. + */ + + if ((error = relookup(dvp, vpp, cn)) != 0) { zfree(namei_zone, cn->cn_pnbuf); cn->cn_pnbuf = NULL; + vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, cnp->cn_proc); + return(error); } - return (error); + /* + * If no error occurs, dvp will be returned locked with the reference + * left as before, and vpp will be returned referenced and locked. + * + * We want to return with dvp as it was passed to us, so we get + * rid of our reference. + */ + vrele(dvp); + return (0); } /* @@ -785,11 +899,11 @@ union_relookup(um, dvp, vpp, cnp, cn, path, pathlen) * * (um) points to the union mount structure for access to the * the mounting process's credentials. - * (dvp) is the directory in which to create the shadow directory. - * it is unlocked on entry and exit. + * (dvp) is the directory in which to create the shadow directory, + * it is locked (but not ref'd) on entry and return. * (cnp) is the componentname to be created. * (vpp) is the returned newly created shadow directory, which - * is returned locked. + * is returned locked and ref'd */ int union_mkshadow(um, dvp, cnp, vpp) @@ -810,8 +924,10 @@ union_mkshadow(um, dvp, cnp, vpp) if (*vpp) { VOP_ABORTOP(dvp, &cn); - VOP_UNLOCK(dvp, 0, p); - vrele(*vpp); + if (dvp == *vpp) + vrele(*vpp); + else + vput(*vpp); *vpp = NULLVP; return (EEXIST); } @@ -832,7 +948,7 @@ union_mkshadow(um, dvp, cnp, vpp) VOP_LEASE(dvp, p, cn.cn_cred, LEASE_WRITE); error = VOP_MKDIR(dvp, vpp, &cn, &va); - vput(dvp); + /*vput(dvp);*/ return (error); } @@ -842,7 +958,7 @@ union_mkshadow(um, dvp, cnp, vpp) * (um) points to the union mount structure for access to the * the mounting process's credentials. * (dvp) is the directory in which to create the whiteout. - * it is locked on entry and exit. + * it is locked on entry and return. * (cnp) is the componentname to be created. */ int @@ -857,17 +973,16 @@ union_mkwhiteout(um, dvp, cnp, path) struct vnode *wvp; struct componentname cn; - VOP_UNLOCK(dvp, 0, p); error = union_relookup(um, dvp, &wvp, cnp, &cn, path, strlen(path)); - if (error) { - vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); + if (error) return (error); - } if (wvp) { VOP_ABORTOP(dvp, &cn); - vrele(dvp); - vrele(wvp); + if (wvp == dvp) + vrele(wvp); + else + vput(wvp); return (EEXIST); } @@ -877,9 +992,6 @@ union_mkwhiteout(um, dvp, cnp, path) error = VOP_WHITEOUT(dvp, &cn, CREATE); if (error) VOP_ABORTOP(dvp, &cn); - - vrele(dvp); - return (error); } @@ -890,6 +1002,12 @@ union_mkwhiteout(um, dvp, cnp, path) * the problem with calling namei is that a) it locks too many * things, and b) it doesn't start at the "right" directory, * whereas relookup is told where to start. + * + * On entry, the vnode associated with un is locked. It remains locked + * on return. + * + * If no error occurs, *vpp contains a locked referenced vnode for your + * use. If an error occurs *vpp iis undefined. */ static int union_vn_create(vpp, un, p) @@ -921,26 +1039,34 @@ union_vn_create(vpp, un, p) cn.cn_pnbuf = zalloc(namei_zone); bcopy(un->un_path, cn.cn_pnbuf, cn.cn_namelen+1); cn.cn_nameiop = CREATE; - cn.cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN); + cn.cn_flags = (LOCKPARENT|LOCKLEAF|HASBUF|SAVENAME|ISLASTCN); cn.cn_proc = p; cn.cn_cred = p->p_ucred; cn.cn_nameptr = cn.cn_pnbuf; cn.cn_hash = un->un_hash; cn.cn_consume = 0; + /* + * Pass dvp unlocked and referenced on call to relookup(). + * + * If an error occurs, dvp will be returned unlocked and dereferenced. + */ VREF(un->un_dirvp); error = relookup(un->un_dirvp, &vp, &cn); if (error) return (error); - vrele(un->un_dirvp); + /* + * If no error occurs, dvp will be returned locked with the reference + * left as before, and vpp will be returned referenced and locked. + */ if (vp) { + vput(un->un_dirvp); VOP_ABORTOP(un->un_dirvp, &cn); - if (un->un_dirvp == vp) - vrele(un->un_dirvp); + if (vp == un->un_dirvp) + vrele(vp); else - vput(un->un_dirvp); - vrele(vp); + vput(vp); return (EEXIST); } @@ -964,11 +1090,12 @@ union_vn_create(vpp, un, p) return (error); error = VOP_OPEN(vp, fmode, cred, p); + if (error == 0 && vn_canvmio(vp) == TRUE) + error = vfs_object_create(vp, p, cred); if (error) { vput(vp); return (error); } - vp->v_writecount++; *vpp = vp; return (0); @@ -987,6 +1114,14 @@ union_vn_close(vp, fmode, cred, p) return (VOP_CLOSE(vp, fmode, cred, p)); } +#if 0 + +/* + * union_removed_upper: + * + * called with union_node unlocked. XXX + */ + void union_removed_upper(un) struct union_node *un; @@ -999,9 +1134,7 @@ union_removed_upper(un) * union node will have neither uppervp nor lowervp. We remove * the union node from cache, so that it will not be referrenced. */ -#if 0 union_newupper(un, NULLVP); -#endif if (un->un_dircache != 0) { for (vpp = un->un_dircache; *vpp != NULLVP; vpp++) vrele(*vpp); @@ -1013,28 +1146,8 @@ union_removed_upper(un) un->un_flags &= ~UN_CACHED; LIST_REMOVE(un, un_cache); } - - if (un->un_flags & UN_ULOCK) { - un->un_flags &= ~UN_ULOCK; - VOP_UNLOCK(un->un_uppervp, 0, p); - } } -#if 0 -struct vnode * -union_lowervp(vp) - struct vnode *vp; -{ - struct union_node *un = VTOUNION(vp); - - if ((un->un_lowervp != NULLVP) && - (vp->v_type == un->un_lowervp->v_type)) { - if (vget(un->un_lowervp, 0) == 0) - return (un->un_lowervp); - } - - return (NULLVP); -} #endif /* @@ -1104,13 +1217,12 @@ union_dircache(vp, p) nvp = NULLVP; - if (dircache == 0) { + if (dircache == NULL) { cnt = 0; union_dircache_r(vp, 0, &cnt); cnt++; - dircache = (struct vnode **) - malloc(cnt * sizeof(struct vnode *), - M_TEMP, M_WAITOK); + dircache = malloc(cnt * sizeof(struct vnode *), + M_TEMP, M_WAITOK); vpp = dircache; union_dircache_r(vp, &vpp, &cnt); *vpp = NULLVP; @@ -1126,9 +1238,11 @@ union_dircache(vp, p) if (*vpp == NULLVP) goto out; - vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, p); + /*vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, p);*/ + UDEBUG(("ALLOCVP-3 %p ref %d\n", *vpp, (*vpp ? (*vpp)->v_usecount : -99))); VREF(*vpp); - error = union_allocvp(&nvp, vp->v_mount, NULLVP, NULLVP, 0, *vpp, NULLVP, 0); + error = union_allocvp(&nvp, vp->v_mount, NULLVP, NULLVP, NULL, *vpp, NULLVP, 0); + UDEBUG(("ALLOCVP-3B %p ref %d\n", nvp, (*vpp ? (*vpp)->v_usecount : -99))); if (error) goto out; @@ -1141,6 +1255,40 @@ out: return (nvp); } +/* + * Guarentee coherency with the VM cache by invalidating any clean VM pages + * associated with this write and updating any dirty VM pages. Since our + * vnode is locked, other processes will not be able to read the pages in + * again until after our write completes. + * + * We also have to be coherent with reads, by flushing any pending dirty + * pages prior to issuing the read. + * + * XXX this is somewhat of a hack at the moment. To support this properly + * we would have to be able to run VOP_READ and VOP_WRITE through the VM + * cache. Then we wouldn't need to worry about coherency. + */ + +void +union_vm_coherency(struct vnode *vp, struct uio *uio, int cleanfls) +{ + vm_object_t object; + vm_pindex_t pstart; + vm_pindex_t pend; + int pgoff; + + if ((object = vp->v_object) == NULL) + return; + + pgoff = uio->uio_offset & PAGE_MASK; + pstart = uio->uio_offset / PAGE_SIZE; + pend = pstart + (uio->uio_resid + pgoff + PAGE_MASK) / PAGE_SIZE; + + vm_object_page_clean(object, pstart, pend, OBJPC_SYNC); + if (cleanfls) + vm_object_page_remove(object, pstart, pend, TRUE); +} + /* * Module glue to remove #ifdef UNION from vfs_syscalls.c */ @@ -1169,6 +1317,8 @@ union_dircheck(struct proc *p, struct vnode **vp, struct file *fp) if (lvp != NULLVP) { error = VOP_OPEN(lvp, FREAD, fp->f_cred, p); + if (error == 0 && vn_canvmio(lvp) == TRUE) + error = vfs_object_create(lvp, p, fp->f_cred); if (error) { vput(lvp); return (error); @@ -1201,9 +1351,11 @@ union_modevent(module_t mod, int type, void *data) } return 0; } + static moduledata_t union_mod = { "union_dircheck", union_modevent, NULL }; + DECLARE_MODULE(union_dircheck, union_mod, SI_SUB_VFS, SI_ORDER_ANY); diff --git a/sys/miscfs/union/union_vfsops.c b/sys/miscfs/union/union_vfsops.c index af828ac64a8c..1a53f88bcc7b 100644 --- a/sys/miscfs/union/union_vfsops.c +++ b/sys/miscfs/union/union_vfsops.c @@ -85,9 +85,7 @@ union_mount(mp, path, data, ndp, p) int len; u_int size; -#ifdef DEBUG - printf("union_mount(mp = %p)\n", (void *)mp); -#endif + UDEBUG(("union_mount(mp = %p)\n", (void *)mp)); /* * Disable clustered write, otherwise system becomes unstable. @@ -114,24 +112,35 @@ union_mount(mp, path, data, ndp, p) if (error) goto bad; + /* + * Obtain lower vnode. Vnode is stored in mp->mnt_vnodecovered. + * We need to reference it but not lock it. + */ + lowerrootvp = mp->mnt_vnodecovered; VREF(lowerrootvp); +#if 0 /* * Unlock lower node to avoid deadlock. */ if (lowerrootvp->v_op == union_vnodeop_p) VOP_UNLOCK(lowerrootvp, 0, p); +#endif /* - * Find upper node. + * Obtain upper vnode by calling namei() on the path. The + * upperrootvp will be turned referenced but not locked. */ NDINIT(ndp, LOOKUP, FOLLOW|WANTPARENT, UIO_USERSPACE, args.target, p); error = namei(ndp); + +#if 0 if (lowerrootvp->v_op == union_vnodeop_p) vn_lock(lowerrootvp, LK_EXCLUSIVE | LK_RETRY, p); +#endif if (error) goto bad; @@ -139,8 +148,11 @@ union_mount(mp, path, data, ndp, p) vrele(ndp->ni_dvp); ndp->ni_dvp = NULL; + UDEBUG(("mount_root UPPERVP %p locked = %d\n", upperrootvp, VOP_ISLOCKED(upperrootvp))); + /* * Check multi union mount to avoid `lock myself again' panic. + * Also require that it be a directory. */ if (upperrootvp == VTOUNION(lowerrootvp)->un_uppervp) { #ifdef DIAGNOSTIC @@ -155,35 +167,43 @@ union_mount(mp, path, data, ndp, p) goto bad; } - um = (struct union_mount *) malloc(sizeof(struct union_mount), - M_UNIONFSMNT, M_WAITOK); /* XXX */ - /* - * Keep a held reference to the target vnodes. - * They are vrele'd in union_unmount. - * - * Depending on the _BELOW flag, the filesystems are - * viewed in a different order. In effect, this is the - * same as providing a mount under option to the mount syscall. + * Allocate our union_mount structure and populate the fields. + * The vnode references are stored in the union_mount as held, + * unlocked references. Depending on the _BELOW flag, the + * filesystems are viewed in a different order. In effect this + * is the same as providing a mount-under option to the mount + * syscall. */ + um = (struct union_mount *) malloc(sizeof(struct union_mount), + M_UNIONFSMNT, M_WAITOK); + + bzero(um, sizeof(struct union_mount)); + um->um_op = args.mntflags & UNMNT_OPMASK; + switch (um->um_op) { case UNMNT_ABOVE: um->um_lowervp = lowerrootvp; um->um_uppervp = upperrootvp; + upperrootvp = NULL; + lowerrootvp = NULL; break; case UNMNT_BELOW: um->um_lowervp = upperrootvp; um->um_uppervp = lowerrootvp; + upperrootvp = NULL; + lowerrootvp = NULL; break; case UNMNT_REPLACE: vrele(lowerrootvp); - lowerrootvp = NULLVP; + lowerrootvp = NULL; um->um_uppervp = upperrootvp; um->um_lowervp = lowerrootvp; + upperrootvp = NULL; break; default: @@ -196,7 +216,7 @@ union_mount(mp, path, data, ndp, p) * supports whiteout operations */ if ((mp->mnt_flag & MNT_RDONLY) == 0) { - error = VOP_WHITEOUT(um->um_uppervp, (struct componentname *) 0, LOOKUP); + error = VOP_WHITEOUT(um->um_uppervp, NULL, LOOKUP); if (error) goto bad; } @@ -258,15 +278,19 @@ union_mount(mp, path, data, ndp, p) (void)union_statfs(mp, &mp->mnt_stat, p); -#ifdef DEBUG - printf("union_mount: from %s, on %s\n", - mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname); -#endif + UDEBUG(("union_mount: from %s, on %s\n", + mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname)); return (0); bad: - if (um) + if (um) { + if (um->um_uppervp) + vrele(um->um_uppervp); + if (um->um_lowervp) + vrele(um->um_lowervp); + /* XXX other fields */ free(um, M_UNIONFSMNT); + } if (cred) crfree(cred); if (upperrootvp) @@ -291,9 +315,7 @@ union_unmount(mp, mntflags, p) int freeing; int flags = 0; -#ifdef DEBUG - printf("union_unmount(mp = %p)\n", (void *)mp); -#endif + UDEBUG(("union_unmount(mp = %p)\n", (void *)mp)); if (mntflags & MNT_FORCE) flags |= FORCECLOSE; @@ -365,55 +387,25 @@ union_root(mp, vpp) struct mount *mp; struct vnode **vpp; { - struct proc *p = curproc; /* XXX */ struct union_mount *um = MOUNTTOUNIONMOUNT(mp); int error; - int loselock; - int lockadj = 0; - - if (um->um_lowervp && um->um_op != UNMNT_BELOW && - VOP_ISLOCKED(um->um_lowervp)) { - VREF(um->um_lowervp); - VOP_UNLOCK(um->um_lowervp, 0, p); - lockadj = 1; - } /* - * Return locked reference to root. + * Supply an unlocked reference to um_uppervp and to um_lowervp. It + * is possible for um_uppervp to be locked without the associated + * root union_node being locked. We let union_allocvp() deal with + * it. */ + UDEBUG(("union_root UPPERVP %p locked = %d\n", um->um_uppervp, VOP_ISLOCKED(um->um_uppervp))); + VREF(um->um_uppervp); - if ((um->um_op == UNMNT_BELOW) && - VOP_ISLOCKED(um->um_uppervp)) { - loselock = 1; - } else { - vn_lock(um->um_uppervp, LK_EXCLUSIVE | LK_RETRY, p); - loselock = 0; - } if (um->um_lowervp) VREF(um->um_lowervp); - error = union_allocvp(vpp, mp, - (struct vnode *) 0, - (struct vnode *) 0, - (struct componentname *) 0, - um->um_uppervp, - um->um_lowervp, - 1); - if (error) { - if (loselock) - vrele(um->um_uppervp); - else - vput(um->um_uppervp); - if (um->um_lowervp) - vrele(um->um_lowervp); - } else { - if (loselock) - VTOUNION(*vpp)->un_flags &= ~UN_ULOCK; - } - if (lockadj) { - vn_lock(um->um_lowervp, LK_EXCLUSIVE | LK_RETRY, p); - vrele(um->um_lowervp); - } + error = union_allocvp(vpp, mp, NULLVP, NULLVP, NULL, + um->um_uppervp, um->um_lowervp, 1); + UDEBUG(("error %d\n", error)); + UDEBUG(("union_root2 UPPERVP %p locked = %d\n", um->um_uppervp, VOP_ISLOCKED(um->um_uppervp))); return (error); } @@ -429,10 +421,8 @@ union_statfs(mp, sbp, p) struct statfs mstat; int lbsize; -#ifdef DEBUG - printf("union_statfs(mp = %p, lvp = %p, uvp = %p)\n", - (void *)mp, (void *)um->um_lowervp, (void *)um->um_uppervp); -#endif + UDEBUG(("union_statfs(mp = %p, lvp = %p, uvp = %p)\n", + (void *)mp, (void *)um->um_lowervp, (void *)um->um_uppervp)); bzero(&mstat, sizeof(mstat)); diff --git a/sys/miscfs/union/union_vnops.c b/sys/miscfs/union/union_vnops.c index 145f8ca6f0ad..128e59ebaa21 100644 --- a/sys/miscfs/union/union_vnops.c +++ b/sys/miscfs/union/union_vnops.c @@ -50,13 +50,25 @@ #include #include #include +#include #include -#define FIXUP(un, p) { \ - if (((un)->un_flags & UN_ULOCK) == 0) { \ - union_fixup(un, p); \ - } \ -} +#include +#include + +#include +#include +#include +#include +#include + +int uniondebug = 0; + +#if UDEBUG_ENABLED +SYSCTL_INT(_vfs, OID_AUTO, uniondebug, CTLFLAG_RW, &uniondebug, 0, ""); +#else +SYSCTL_INT(_vfs, OID_AUTO, uniondebug, CTLFLAG_RD, &uniondebug, 0, ""); +#endif static int union_abortop __P((struct vop_abortop_args *ap)); static int union_access __P((struct vop_access_args *ap)); @@ -64,17 +76,15 @@ static int union_advlock __P((struct vop_advlock_args *ap)); static int union_bmap __P((struct vop_bmap_args *ap)); static int union_close __P((struct vop_close_args *ap)); static int union_create __P((struct vop_create_args *ap)); -static void union_fixup __P((struct union_node *un, struct proc *p)); static int union_fsync __P((struct vop_fsync_args *ap)); static int union_getattr __P((struct vop_getattr_args *ap)); static int union_inactive __P((struct vop_inactive_args *ap)); static int union_ioctl __P((struct vop_ioctl_args *ap)); -static int union_islocked __P((struct vop_islocked_args *ap)); static int union_lease __P((struct vop_lease_args *ap)); static int union_link __P((struct vop_link_args *ap)); static int union_lock __P((struct vop_lock_args *ap)); static int union_lookup __P((struct vop_lookup_args *ap)); -static int union_lookup1 __P((struct vnode *udvp, struct vnode **dvpp, +static int union_lookup1 __P((struct vnode *udvp, struct vnode **dvp, struct vnode **vpp, struct componentname *cnp)); static int union_mkdir __P((struct vop_mkdir_args *ap)); @@ -94,36 +104,89 @@ static int union_rmdir __P((struct vop_rmdir_args *ap)); static int union_poll __P((struct vop_poll_args *ap)); static int union_setattr __P((struct vop_setattr_args *ap)); static int union_strategy __P((struct vop_strategy_args *ap)); +static int union_getpages __P((struct vop_getpages_args *ap)); +static int union_putpages __P((struct vop_putpages_args *ap)); static int union_symlink __P((struct vop_symlink_args *ap)); static int union_unlock __P((struct vop_unlock_args *ap)); static int union_whiteout __P((struct vop_whiteout_args *ap)); static int union_write __P((struct vop_read_args *ap)); -static void -union_fixup(un, p) - struct union_node *un; - struct proc *p; +static __inline +struct vnode * +union_lock_upper(struct union_node *un, struct proc *p) { + struct vnode *uppervp; - vn_lock(un->un_uppervp, LK_EXCLUSIVE | LK_RETRY, p); - un->un_flags |= UN_ULOCK; + if ((uppervp = un->un_uppervp) != NULL) { + VREF(uppervp); + vn_lock(uppervp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY, p); + } + KASSERT((uppervp == NULL || uppervp->v_usecount > 0), ("uppervp usecount is 0")); + return(uppervp); } +static __inline +void +union_unlock_upper(struct vnode *uppervp, struct proc *p) +{ + vput(uppervp); +} + +static __inline +struct vnode * +union_lock_other(struct union_node *un, struct proc *p) +{ + struct vnode *vp; + + if (un->un_uppervp != NULL) { + vp = union_lock_upper(un, p); + } else if ((vp = un->un_lowervp) != NULL) { + VREF(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY, p); + } + return(vp); +} + +static __inline +void +union_unlock_other(struct vnode *vp, struct proc *p) +{ + vput(vp); +} + +/* + * union_lookup: + * + * udvp must be exclusively locked on call and will remain + * exclusively locked on return. This is the mount point + * for out filesystem. + * + * dvp Our base directory, locked and referenced. + * The passed dvp will be dereferenced and unlocked on return + * and a new dvp will be returned which is locked and + * referenced in the same variable. + * + * vpp is filled in with the result if no error occured, + * locked and ref'd. + * + * If an error is returned, *vpp is set to NULLVP. If no + * error occurs, *vpp is returned with a reference and an + * exclusive lock. + */ + static int -union_lookup1(udvp, dvpp, vpp, cnp) +union_lookup1(udvp, pdvp, vpp, cnp) struct vnode *udvp; - struct vnode **dvpp; + struct vnode **pdvp; struct vnode **vpp; struct componentname *cnp; { int error; struct proc *p = cnp->cn_proc; + struct vnode *dvp = *pdvp; struct vnode *tdvp; - struct vnode *dvp; struct mount *mp; - dvp = *dvpp; - /* * If stepping up the directory tree, check for going * back across the mount point, in which case do what @@ -139,49 +202,79 @@ union_lookup1(udvp, dvpp, vpp, cnp) * filesystems. */ tdvp = dvp; - *dvpp = dvp = dvp->v_mount->mnt_vnodecovered; - vput(tdvp); + dvp = dvp->v_mount->mnt_vnodecovered; VREF(dvp); + vput(tdvp); vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); } } + /* + * Set return dvp to be the upperdvp 'parent directory. + */ + *pdvp = dvp; + + /* + * If the VOP_LOOKUP call generates an error, tdvp is invalid and no + * changes will have been made to dvp, so we are set to return. + */ + error = VOP_LOOKUP(dvp, &tdvp, cnp); - if (error) + if (error) { + UDEBUG(("dvp %p error %d flags %lx\n", dvp, error, cnp->cn_flags)); + *vpp = NULL; return (error); + } /* * The parent directory will have been unlocked, unless lookup - * found the last component. In which case, re-lock the node - * here to allow it to be unlocked again (phew) in union_lookup. + * found the last component or if dvp == tdvp (tdvp must be locked). + * + * We want our dvp to remain locked and ref'd. We also want tdvp + * to remain locked and ref'd. */ - if (dvp != tdvp && !(cnp->cn_flags & ISLASTCN)) - vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); + UDEBUG(("parentdir %p result %p flag %lx\n", dvp, tdvp, cnp->cn_flags)); - dvp = tdvp; + if (dvp != tdvp && (cnp->cn_flags & ISLASTCN) == 0) + vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); /* * Lastly check if the current node is a mount point in * which case walk up the mount hierarchy making sure not to * bump into the root of the mount tree (ie. dvp != udvp). + * + * We use dvp as a temporary variable here, it is no longer related + * to the dvp above. However, we have to ensure that both *pdvp and + * tdvp are locked on return. */ - while (dvp != udvp && (dvp->v_type == VDIR) && - (mp = dvp->v_mountedhere)) { + + dvp = tdvp; + while ( + dvp != udvp && + (dvp->v_type == VDIR) && + (mp = dvp->v_mountedhere) + ) { + int relock_pdvp = 0; if (vfs_busy(mp, 0, 0, p)) continue; - error = VFS_ROOT(mp, &tdvp); + if (dvp == *pdvp) + relock_pdvp = 1; + vput(dvp); + dvp = NULL; + error = VFS_ROOT(mp, &dvp); + vfs_unbusy(mp, p); + + if (relock_pdvp) + vn_lock(*pdvp, LK_EXCLUSIVE | LK_RETRY, p); + if (error) { - vput(dvp); + *vpp = NULL; return (error); } - - vput(dvp); - dvp = tdvp; } - *vpp = dvp; return (0); } @@ -199,8 +292,8 @@ union_lookup(ap) int uerror, lerror; struct vnode *uppervp, *lowervp; struct vnode *upperdvp, *lowerdvp; - struct vnode *dvp = ap->a_dvp; - struct union_node *dun = VTOUNION(dvp); + struct vnode *dvp = ap->a_dvp; /* starting dir */ + struct union_node *dun = VTOUNION(dvp); /* associated union node */ struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; int lockparent = cnp->cn_flags & LOCKPARENT; @@ -209,44 +302,38 @@ union_lookup(ap) int iswhiteout; struct vattr va; + *ap->a_vpp = NULLVP; /* * Disallow write attemps to the filesystem mounted read-only. */ - if ((cnp->cn_flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && - (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) + if ((cnp->cn_flags & ISLASTCN) && + (dvp->v_mount->mnt_flag & MNT_RDONLY) && + (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { return (EROFS); - -#ifdef notyet - if (cnp->cn_namelen == 3 && - cnp->cn_nameptr[2] == '.' && - cnp->cn_nameptr[1] == '.' && - cnp->cn_nameptr[0] == '.') { - dvp = *ap->a_vpp = LOWERVP(ap->a_dvp); - if (dvp == NULLVP) - return (ENOENT); - VREF(dvp); - vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); - if (!lockparent || !(cnp->cn_flags & ISLASTCN)) - VOP_UNLOCK(ap->a_dvp, 0, p); - return (0); } -#endif + /* + * For any lookup's we do, always return with the parent locked + */ cnp->cn_flags |= LOCKPARENT; - upperdvp = dun->un_uppervp; lowerdvp = dun->un_lowervp; uppervp = NULLVP; lowervp = NULLVP; iswhiteout = 0; - if (cnp->cn_flags & ISDOTDOT) { - if (upperdvp != NULL) - VREF(upperdvp); - if (lowerdvp != NULL) - VREF(lowerdvp); - } + uerror = ENOENT; + lerror = ENOENT; + + /* + * Get a private lock on uppervp and a reference, effectively + * taking it out of the union_node's control. + * + * We must lock upperdvp while holding our lock on dvp + * to avoid a deadlock. + */ + upperdvp = union_lock_upper(dun, p); /* * do the lookup in the upper level. @@ -255,62 +342,64 @@ union_lookup(ap) * on and just return that vnode. */ if (upperdvp != NULLVP) { - FIXUP(dun, p); /* - * If we're doing `..' in the underlying filesystem, - * we must drop our lock on the union node before - * going up the tree in the lower file system--if we block - * on the lowervp lock, and that's held by someone else - * coming down the tree and who's waiting for our lock, - * we would be hosed. + * We do not have to worry about the DOTDOT case, we've + * already unlocked dvp. */ - if (cnp->cn_flags & ISDOTDOT) { - /* retain lock on underlying VP: */ - dun->un_flags |= UN_KLOCK; - VOP_UNLOCK(dvp, 0, p); - } - uerror = union_lookup1(um->um_uppervp, &upperdvp, - &uppervp, cnp); + UDEBUG(("A %p\n", upperdvp)); + + /* + * Do the lookup. We must supply a locked and referenced + * upperdvp to the function and will get a new locked and + * referenced upperdvp back with the old having been + * dereferenced. + * + * If an error is returned, uppervp will be NULLVP. If no + * error occurs, uppervp will be the locked and referenced + * return vnode or possibly NULL, depending on what is being + * requested. It is possible that the returned uppervp + * will be the same as upperdvp. + */ + uerror = union_lookup1(um->um_uppervp, &upperdvp, &uppervp, cnp); + UDEBUG(( + "uerror %d upperdvp %p %d/%d, uppervp %p ref=%d/lck=%d\n", + uerror, + upperdvp, + upperdvp->v_usecount, + VOP_ISLOCKED(upperdvp), + uppervp, + (uppervp ? uppervp->v_usecount : -99), + (uppervp ? VOP_ISLOCKED(uppervp) : -99) + )); + /* * Disallow write attemps to the filesystem mounted read-only. */ if (uerror == EJUSTRETURN && (cnp->cn_flags & ISLASTCN) && - (dvp->v_mount->mnt_flag & MNT_RDONLY) && - (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME)) { - if (!lockparent) - cnp->cn_flags &= ~LOCKPARENT; - return (EROFS); - } - - if (cnp->cn_flags & ISDOTDOT) { - if (dun->un_uppervp == upperdvp) { - /* - * We got the underlying bugger back locked... - * now take back the union node lock. Since we - * hold the uppervp lock, we can diddle union - * locking flags at will. :) - */ - dun->un_flags |= UN_ULOCK; - } - /* - * If upperdvp got swapped out, it means we did - * some mount point magic, and we do not have - * dun->un_uppervp locked currently--so we get it - * locked here (don't set the UN_ULOCK flag). - */ - vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); - } - - /*if (uppervp == upperdvp) - dun->un_flags |= UN_KLOCK;*/ - - if (cnp->cn_consume != 0) { - *ap->a_vpp = uppervp; - if (!lockparent) - cnp->cn_flags &= ~LOCKPARENT; - error = uerror; + (dvp->v_mount->mnt_flag & MNT_RDONLY) && + (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME)) { + error = EROFS; goto out; } + + /* + * Special case. If cn_consume != 0 skip out. The result + * of the lookup is transfered to our return variable. If + * an error occured we have to throw away the results. + */ + + if (cnp->cn_consume != 0) { + if ((error = uerror) == 0) { + *ap->a_vpp = uppervp; + uppervp = NULL; + } + goto out; + } + + /* + * Calculate whiteout, fall through + */ + if (uerror == ENOENT || uerror == EJUSTRETURN) { if (cnp->cn_flags & ISWHITEOUT) { iswhiteout = 1; @@ -321,8 +410,6 @@ union_lookup(ap) iswhiteout = 1; } } - } else { - uerror = ENOENT; } /* @@ -332,13 +419,14 @@ union_lookup(ap) * back from the upper layer and return the lower vnode * instead. */ + if (lowerdvp != NULLVP && !iswhiteout) { int nameiop; - vn_lock(lowerdvp, LK_EXCLUSIVE | LK_RETRY, p); + UDEBUG(("B %p\n", lowerdvp)); /* - * Only do a LOOKUP on the bottom node, since + * Force only LOOKUPs on the lower node, since * we won't be making changes to it anyway. */ nameiop = cnp->cn_nameiop; @@ -347,42 +435,42 @@ union_lookup(ap) saved_cred = cnp->cn_cred; cnp->cn_cred = um->um_cred; } + /* * We shouldn't have to worry about locking interactions * between the lower layer and our union layer (w.r.t. * `..' processing) because we don't futz with lowervp * locks in the union-node instantiation code path. + * + * union_lookup1() requires lowervp to be locked on entry, + * and it will be unlocked on return. The ref count will + * not change. On return lowervp doesn't represent anything + * to us so we NULL it out. */ - lerror = union_lookup1(um->um_lowervp, &lowerdvp, - &lowervp, cnp); + VREF(lowerdvp); + vn_lock(lowerdvp, LK_EXCLUSIVE | LK_RETRY, p); + lerror = union_lookup1(um->um_lowervp, &lowerdvp, &lowervp, cnp); + if (lowerdvp == lowervp) + vrele(lowerdvp); + else + vput(lowerdvp); + lowerdvp = NULL; /* lowerdvp invalid after vput */ + if (um->um_op == UNMNT_BELOW) cnp->cn_cred = saved_cred; cnp->cn_nameiop = nameiop; - if (lowervp != lowerdvp) - VOP_UNLOCK(lowerdvp, 0, p); - if (cnp->cn_consume != 0 || lerror == EACCES) { - if (lerror == EACCES) - lowervp = NULLVP; - if (uppervp != NULLVP) { - if (uppervp == upperdvp) - vrele(uppervp); - else - vput(uppervp); - uppervp = NULLVP; + if ((error = lerror) == 0) { + *ap->a_vpp = lowervp; + lowervp = NULL; } - *ap->a_vpp = lowervp; - if (!lockparent) - cnp->cn_flags &= ~LOCKPARENT; - error = lerror; goto out; } } else { - lerror = ENOENT; + UDEBUG(("C %p\n", lowerdvp)); if ((cnp->cn_flags & ISDOTDOT) && dun->un_pvp != NULLVP) { - lowervp = LOWERVP(dun->un_pvp); - if (lowervp != NULLVP) { + if ((lowervp = LOWERVP(dun->un_pvp)) != NULL) { VREF(lowervp); vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY, p); lerror = 0; @@ -390,35 +478,27 @@ union_lookup(ap) } } - if (!lockparent) - cnp->cn_flags &= ~LOCKPARENT; - /* - * at this point, we have uerror and lerror indicating - * possible errors with the lookups in the upper and lower - * layers. additionally, uppervp and lowervp are (locked) - * references to existing vnodes in the upper and lower layers. + * Ok. Now we have uerror, uppervp, upperdvp, lerror, and lowervp. * - * there are now three cases to consider. - * 1. if both layers returned an error, then return whatever - * error the upper layer generated. + * 1. If both layers returned an error, select the upper layer. * - * 2. if the top layer failed and the bottom layer succeeded - * then two subcases occur. - * a. the bottom vnode is not a directory, in which - * case just return a new union vnode referencing - * an empty top layer and the existing bottom layer. - * b. the bottom vnode is a directory, in which case - * create a new directory in the top-level and - * continue as in case 3. + * 2. If the upper layer faile and the bottom layer succeeded, + * two subcases occur: * - * 3. if the top layer succeeded then return a new union + * a. The bottom vnode is not a directory, in which case + * just return a new union vnode referencing an + * empty top layer and the existing bottom layer. + * + * b. The button vnode is a directory, in which case + * create a new directory in the top layer and + * and fall through to case 3. + * + * 3. If the top layer succeeded then return a new union * vnode referencing whatever the new top layer and * whatever the bottom layer returned. */ - *ap->a_vpp = NULLVP; - /* case 1. */ if ((uerror != 0) && (lerror != 0)) { error = uerror; @@ -428,59 +508,126 @@ union_lookup(ap) /* case 2. */ if (uerror != 0 /* && (lerror == 0) */ ) { if (lowervp->v_type == VDIR) { /* case 2b. */ - dun->un_flags &= ~UN_ULOCK; - VOP_UNLOCK(upperdvp, 0, p); + KASSERT(uppervp == NULL, ("uppervp unexpectedly non-NULL")); + /* + * oops, uppervp has a problem, we may have to shadow. + */ uerror = union_mkshadow(um, upperdvp, cnp, &uppervp); - vn_lock(upperdvp, LK_EXCLUSIVE | LK_RETRY, p); - dun->un_flags |= UN_ULOCK; - if (uerror) { - if (lowervp != NULLVP) { - vput(lowervp); - lowervp = NULLVP; - } error = uerror; goto out; } } } - if (lowervp != NULLVP) + /* + * Must call union_allocvp with both the upper and lower vnodes + * referenced and the upper vnode locked. ap->a_vpp is returned + * referenced and locked. lowervp, uppervp, and upperdvp are + * absorbed by union_allocvp() whether it succeeds or fails. + * + * upperdvp is the parent directory of uppervp which may be + * different, depending on the path, from dvp->un_uppervp. That's + * why it is a separate argument. Note that it must be unlocked. + * + * dvp must be locked on entry to the call and will be locked on + * return. + */ + + if (uppervp && uppervp != upperdvp) + VOP_UNLOCK(uppervp, 0, p); + if (lowervp) VOP_UNLOCK(lowervp, 0, p); + if (upperdvp) + VOP_UNLOCK(upperdvp, 0, p); error = union_allocvp(ap->a_vpp, dvp->v_mount, dvp, upperdvp, cnp, uppervp, lowervp, 1); - if (error) { - if (uppervp != NULLVP) - vput(uppervp); - if (lowervp != NULLVP) - vrele(lowervp); - } else { - if (*ap->a_vpp != dvp) - if (!lockparent || !(cnp->cn_flags & ISLASTCN)) - VOP_UNLOCK(dvp, 0, p); -#ifdef DIAGNOSTIC - if (cnp->cn_namelen == 1 && - cnp->cn_nameptr[0] == '.' && - *ap->a_vpp != dvp) { - panic("union_lookup returning . (%p) not same as startdir (%p)", - ap->a_vpp, dvp); - } -#endif - } + UDEBUG(("Create %p = %p %p refs=%d\n", *ap->a_vpp, uppervp, lowervp, (*ap->a_vpp) ? ((*ap->a_vpp)->v_usecount) : -99)); + + uppervp = NULL; + upperdvp = NULL; + lowervp = NULL; + + /* + * Termination Code + * + * - put away any extra junk laying around. Note that lowervp + * (if not NULL) will never be the same as *ap->a_vp and + * neither will uppervp, because when we set that state we + * NULL-out lowervp or uppervp. On the otherhand, upperdvp + * may match uppervp or *ap->a_vpp. + * + * - relock/unlock dvp if appropriate. + */ out: - if (cnp->cn_flags & ISDOTDOT) { - if (upperdvp != NULL) - vrele(upperdvp); - if (lowerdvp != NULL) - vrele(lowerdvp); - } + if (upperdvp) { + if (upperdvp == uppervp || upperdvp == *ap->a_vpp) + vrele(upperdvp); + else + vput(upperdvp); + } + + if (uppervp) + vput(uppervp); + + if (lowervp) + vput(lowervp); + + /* + * Restore LOCKPARENT state + */ + + if (!lockparent) + cnp->cn_flags &= ~LOCKPARENT; + + UDEBUG(("Out %d vpp %p/%d lower %p upper %p\n", error, *ap->a_vpp, + ((*ap->a_vpp) ? (*ap->a_vpp)->v_usecount : -99), + lowervp, uppervp)); + + /* + * dvp lock state, determine whether to relock dvp. dvp is expected + * to be locked on return if: + * + * - there was an error (except not EJUSTRETURN), or + * - we hit the last component and lockparent is true + * + * dvp_is_locked is the current state of the dvp lock, not counting + * the possibility that *ap->a_vpp == dvp (in which case it is locked + * anyway). Note that *ap->a_vpp == dvp only if no error occured. + */ + + if (*ap->a_vpp != dvp) { + if ((error == 0 || error == EJUSTRETURN) && + (!lockparent || (cnp->cn_flags & ISLASTCN) == 0)) { + VOP_UNLOCK(dvp, 0, p); + } + } + + /* + * Diagnostics + */ + +#ifdef DIAGNOSTIC + if (cnp->cn_namelen == 1 && + cnp->cn_nameptr[0] == '.' && + *ap->a_vpp != dvp) { + panic("union_lookup returning . (%p) not same as startdir (%p)", ap->a_vpp, dvp); + } +#endif return (error); } +/* + * union_create: + * + * a_dvp is locked on entry and remains locked on return. a_vpp is returned + * locked if no error occurs, otherwise it is garbage. + */ + static int union_create(ap) struct vop_create_args /* { @@ -491,36 +638,27 @@ union_create(ap) } */ *ap; { struct union_node *dun = VTOUNION(ap->a_dvp); - struct vnode *dvp = dun->un_uppervp; struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; + struct vnode *dvp; + int error = EROFS; - if (dvp != NULLVP) { + if ((dvp = union_lock_upper(dun, p)) != NULL) { struct vnode *vp; struct mount *mp; - int error; - FIXUP(dun, p); - - dun->un_flags |= UN_KLOCK; - VOP_UNLOCK(ap->a_dvp, 0, p); error = VOP_CREATE(dvp, &vp, cnp, ap->a_vap); - if (error) { - dun->un_flags |= UN_ULOCK; - return (error); + if (error == 0) { + mp = ap->a_dvp->v_mount; + VOP_UNLOCK(vp, 0, p); + UDEBUG(("ALLOCVP-1 FROM %p REFS %d\n", vp, vp->v_usecount)); + error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP, + cnp, vp, NULLVP, 1); + UDEBUG(("ALLOCVP-2B FROM %p REFS %d\n", *ap->a_vpp, vp->v_usecount)); } - - mp = ap->a_dvp->v_mount; - VOP_UNLOCK(dvp, 0, p); - error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP, cnp, vp, - NULLVP, 1); - if (error) - vput(vp); - vn_lock(ap->a_dvp, LK_EXCLUSIVE| LK_RETRY, p); - return (error); + union_unlock_upper(dvp, p); } - - return (EROFS); + return (error); } static int @@ -533,15 +671,23 @@ union_whiteout(ap) { struct union_node *un = VTOUNION(ap->a_dvp); struct componentname *cnp = ap->a_cnp; - struct proc *p = cnp->cn_proc; + struct vnode *uppervp; + int error = EOPNOTSUPP; - if (un->un_uppervp == NULLVP) - return (EOPNOTSUPP); - - FIXUP(un, p); - return (VOP_WHITEOUT(un->un_uppervp, cnp, ap->a_flags)); + if ((uppervp = union_lock_upper(un, cnp->cn_proc)) != NULLVP) { + error = VOP_WHITEOUT(un->un_uppervp, cnp, ap->a_flags); + union_unlock_upper(uppervp, cnp->cn_proc); + } + return(error); } +/* + * union_mknod: + * + * a_dvp is locked on entry and should remain locked on return. + * a_vpp is garbagre whether an error occurs or not. + */ + static int union_mknod(ap) struct vop_mknod_args /* { @@ -552,42 +698,28 @@ union_mknod(ap) } */ *ap; { struct union_node *dun = VTOUNION(ap->a_dvp); - struct vnode *dvp = dun->un_uppervp; struct componentname *cnp = ap->a_cnp; - struct proc *p = cnp->cn_proc; + struct vnode *dvp; + int error = EROFS; - if (dvp != NULLVP) { + if ((dvp = union_lock_upper(dun, cnp->cn_proc)) != NULL) { struct vnode *vp; - struct mount *mp; - int error; - - FIXUP(dun, p); - - dun->un_flags |= UN_KLOCK; - VOP_UNLOCK(ap->a_dvp, 0, p); error = VOP_MKNOD(dvp, &vp, cnp, ap->a_vap); - if (error) { - dun->un_flags |= UN_ULOCK; - return (error); - } - - if (vp != NULLVP) { - mp = ap->a_dvp->v_mount; - VOP_UNLOCK(dvp, 0, p); - error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP, - cnp, vp, NULLVP, 1); - if (error) - vput(vp); - vn_lock(ap->a_dvp, LK_EXCLUSIVE| LK_RETRY, p); - } else { - dun->un_flags |= UN_ULOCK; - } - return (error); + /* vp is garbage whether an error occurs or not */ + union_unlock_upper(dvp, cnp->cn_proc); } - - return (EROFS); + return (error); } +/* + * union_open: + * + * run open VOP. When opening the underlying vnode we have to mimic + * vn_open. What we *really* need to do to avoid screwups if the + * open semantics change is to call vn_open(). For example, ufs blows + * up if you open a file but do not vmio it prior to writing. + */ + static int union_open(ap) struct vop_open_args /* { @@ -603,13 +735,18 @@ union_open(ap) int mode = ap->a_mode; struct ucred *cred = ap->a_cred; struct proc *p = ap->a_p; - int error; + int error = 0; + int tvpisupper = 1; /* * If there is an existing upper vp then simply open that. + * The upper vp takes precedence over the lower vp. When opening + * a lower vp for writing copy it to the uppervp and then open the + * uppervp. + * + * At the end of this section tvp will be left locked. */ - tvp = un->un_uppervp; - if (tvp == NULLVP) { + if ((tvp = union_lock_upper(un, p)) == NULLVP) { /* * If the lower vnode is being opened for writing, then * copy the file contents to the upper vnode and open that, @@ -617,30 +754,50 @@ union_open(ap) */ tvp = un->un_lowervp; if ((ap->a_mode & FWRITE) && (tvp->v_type == VREG)) { - error = union_copyup(un, (mode&O_TRUNC) == 0, cred, p); - if (error == 0) - error = VOP_OPEN(un->un_uppervp, mode, cred, p); - return (error); + int docopy = !(mode & O_TRUNC); + error = union_copyup(un, docopy, cred, p); + tvp = union_lock_upper(un, p); + } else { + un->un_openl++; + VREF(tvp); + vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p); + tvpisupper = 0; } - - /* - * Just open the lower vnode - */ - un->un_openl++; - vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p); - error = VOP_OPEN(tvp, mode, cred, p); - VOP_UNLOCK(tvp, 0, p); - - return (error); } - FIXUP(un, p); + /* + * We are holding the correct vnode, open it + */ - error = VOP_OPEN(tvp, mode, cred, p); + if (error == 0) + error = VOP_OPEN(tvp, mode, cred, p); + /* + * Absolutely necessary or UFS will blowup + */ + if (error == 0 && vn_canvmio(tvp) == TRUE) { + error = vfs_object_create(tvp, p, cred); + } + + /* + * Release any locks held + */ + if (tvpisupper) { + if (tvp) + union_unlock_upper(tvp, p); + } else { + vput(tvp); + } return (error); } +/* + * union_close: + * + * It is unclear whether a_vp is passed locked or unlocked. Whatever + * the case we do not change it. + */ + static int union_close(ap) struct vop_close_args /* { @@ -661,7 +818,6 @@ union_close(ap) --un->un_openl; vp = un->un_lowervp; } - ap->a_vp = vp; return (VCALL(vp, VOFFSET(vop_close), ap)); } @@ -688,12 +844,12 @@ union_access(ap) struct proc *p = ap->a_p; int error = EACCES; struct vnode *vp; - struct vnode *savedvp; /* * Disallow write attempts on filesystems mounted read-only. */ - if (ap->a_mode & VWRITE && (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)) { + if ((ap->a_mode & VWRITE) && + (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)) { switch (ap->a_vp->v_type) { case VREG: case VDIR: @@ -703,19 +859,30 @@ union_access(ap) break; } } - if ((vp = un->un_uppervp) != NULLVP) { - FIXUP(un, p); + + if ((vp = union_lock_upper(un, p)) != NULLVP) { ap->a_vp = vp; - return (VCALL(vp, VOFFSET(vop_access), ap)); + error = VCALL(vp, VOFFSET(vop_access), ap); + union_unlock_upper(vp, p); + return(error); } if ((vp = un->un_lowervp) != NULLVP) { vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - savedvp = ap->a_vp; ap->a_vp = vp; + + /* + * Remove VWRITE from a_mode if our mount point is RW, because + * we want to allow writes and lowervp may be read-only. + */ + if ((un->un_vnode->v_mount->mnt_flag & MNT_RDONLY) == 0) + ap->a_mode &= ~VWRITE; + error = VCALL(vp, VOFFSET(vop_access), ap); if (error == 0) { - struct union_mount *um = MOUNTTOUNIONMOUNT(savedvp->v_mount); + struct union_mount *um; + + um = MOUNTTOUNIONMOUNT(un->un_vnode->v_mount); if (um->um_op == UNMNT_BELOW) { ap->a_cred = um->um_cred; @@ -723,17 +890,26 @@ union_access(ap) } } VOP_UNLOCK(vp, 0, p); - if (error) - return (error); } - - return (error); + return(error); } /* * We handle getattr only to change the fsid and * track object sizes + * + * It's not clear whether VOP_GETATTR is to be + * called with the vnode locked or not. stat() calls + * it with (vp) locked, and fstat calls it with + * (vp) unlocked. + * + * Because of this we cannot use our normal locking functions + * if we do not intend to lock the main a_vp node. At the moment + * we are running without any specific locking at all, but beware + * to any programmer that care must be taken if locking is added + * to this function. */ + static int union_getattr(ap) struct vop_getattr_args /* { @@ -745,12 +921,10 @@ union_getattr(ap) { int error; struct union_node *un = VTOUNION(ap->a_vp); - struct vnode *vp = un->un_uppervp; - struct proc *p = ap->a_p; + struct vnode *vp; struct vattr *vap; struct vattr va; - /* * Some programs walk the filesystem hierarchy by counting * links to directories to avoid stat'ing all the time. @@ -762,22 +936,11 @@ union_getattr(ap) vap = ap->a_vap; - vp = un->un_uppervp; - if (vp != NULLVP) { - /* - * It's not clear whether VOP_GETATTR is to be - * called with the vnode locked or not. stat() calls - * it with (vp) locked, and fstat calls it with - * (vp) unlocked. - * In the mean time, compensate here by checking - * the union_node's lock flag. - */ - if (un->un_flags & UN_LOCKED) - FIXUP(un, p); - + if ((vp = un->un_uppervp) != NULLVP) { error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_p); if (error) return (error); + /* XXX isn't this dangerouso without a lock? */ union_newsize(ap->a_vp, vap->va_size, VNOVAL); } @@ -794,12 +957,12 @@ union_getattr(ap) error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_p); if (error) return (error); + /* XXX isn't this dangerous without a lock? */ union_newsize(ap->a_vp, VNOVAL, vap->va_size); } if ((vap != ap->a_vap) && (vap->va_type == VDIR)) ap->a_vap->va_nlink += vap->va_nlink; - return (0); } @@ -815,27 +978,28 @@ union_setattr(ap) struct union_node *un = VTOUNION(ap->a_vp); struct proc *p = ap->a_p; struct vattr *vap = ap->a_vap; + struct vnode *uppervp; int error; /* * Disallow write attempts on filesystems mounted read-only. */ if ((ap->a_vp->v_mount->mnt_flag & MNT_RDONLY) && - (vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || - vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || - vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL)) + (vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || + vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || + vap->va_mtime.tv_sec != VNOVAL || + vap->va_mode != (mode_t)VNOVAL)) { return (EROFS); + } /* * Handle case of truncating lower object to zero size, * by creating a zero length upper object. This is to * handle the case of open with O_TRUNC and O_CREAT. */ - if ((un->un_uppervp == NULLVP) && - /* assert(un->un_lowervp != NULLVP) */ - (un->un_lowervp->v_type == VREG)) { + if (un->un_uppervp == NULLVP && (un->un_lowervp->v_type == VREG)) { error = union_copyup(un, (ap->a_vap->va_size != 0), - ap->a_cred, ap->a_p); + ap->a_cred, ap->a_p); if (error) return (error); } @@ -844,19 +1008,45 @@ union_setattr(ap) * Try to set attributes in upper layer, * otherwise return read-only filesystem error. */ - if (un->un_uppervp != NULLVP) { - FIXUP(un, p); + error = EROFS; + if ((uppervp = union_lock_upper(un, p)) != NULLVP) { error = VOP_SETATTR(un->un_uppervp, ap->a_vap, ap->a_cred, ap->a_p); if ((error == 0) && (ap->a_vap->va_size != VNOVAL)) union_newsize(ap->a_vp, ap->a_vap->va_size, VNOVAL); - } else { - error = EROFS; + union_unlock_upper(uppervp, p); } - return (error); } +/* + * union_getpages: + */ + +static int +union_getpages(struct vop_getpages_args *ap) +{ + int r; + + r = vnode_pager_generic_getpages(ap->a_vp, ap->a_m, + ap->a_count, ap->a_reqpage); + return(r); +} + +/* + * union_putpages: + */ + +static int +union_putpages(struct vop_putpages_args *ap) +{ + int r; + + r = vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count, + ap->a_sync, ap->a_rtvals); + return(r); +} + static int union_read(ap) struct vop_read_args /* { @@ -866,18 +1056,19 @@ union_read(ap) struct ucred *a_cred; } */ *ap; { - int error; + struct union_node *un = VTOUNION(ap->a_vp); struct proc *p = ap->a_uio->uio_procp; - struct vnode *vp = OTHERVP(ap->a_vp); - int dolock = (vp == LOWERVP(ap->a_vp)); + struct vnode *uvp; + int error; - if (dolock) - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - else - FIXUP(VTOUNION(ap->a_vp), p); - error = VOP_READ(vp, ap->a_uio, ap->a_ioflag, ap->a_cred); - if (dolock) - VOP_UNLOCK(vp, 0, p); + uvp = union_lock_other(un, p); + KASSERT(uvp != NULL, ("union_read: backing vnode missing!")); + + if (ap->a_vp->v_flag & VOBJBUF) + union_vm_coherency(ap->a_vp, ap->a_uio, 0); + + error = VOP_READ(uvp, ap->a_uio, ap->a_ioflag, ap->a_cred); + union_unlock_other(uvp, p); /* * XXX @@ -889,7 +1080,7 @@ union_read(ap) struct union_node *un = VTOUNION(ap->a_vp); off_t cur = ap->a_uio->uio_offset; - if (vp == un->un_uppervp) { + if (uvp == un->un_uppervp) { if (cur > un->un_uppersz) union_newsize(ap->a_vp, cur, VNOVAL); } else { @@ -897,7 +1088,6 @@ union_read(ap) union_newsize(ap->a_vp, VNOVAL, cur); } } - return (error); } @@ -910,17 +1100,36 @@ union_write(ap) struct ucred *a_cred; } */ *ap; { - int error; - struct vnode *vp; struct union_node *un = VTOUNION(ap->a_vp); struct proc *p = ap->a_uio->uio_procp; + struct vnode *uppervp; + int error; - vp = UPPERVP(ap->a_vp); - if (vp == NULLVP) + if ((uppervp = union_lock_upper(un, p)) == NULLVP) panic("union: missing upper layer in write"); - FIXUP(un, p); - error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag, ap->a_cred); + /* + * Since our VM pages are associated with our vnode rather then + * the real vnode, and since we do not run our reads and writes + * through our own VM cache, we have a VM/VFS coherency problem. + * We solve them by invalidating or flushing the associated VM + * pages prior to allowing a normal read or write to occur. + * + * VM-backed writes (UIO_NOCOPY) have to be converted to normal + * writes because we are not cache-coherent. Normal writes need + * to be made coherent with our VM-backing store, which we do by + * first flushing any dirty VM pages associated with the write + * range, and then destroying any clean VM pages associated with + * the write range. + */ + + if (ap->a_uio->uio_segflg == UIO_NOCOPY) { + ap->a_uio->uio_segflg = UIO_SYSSPACE; + } else if (ap->a_vp->v_flag & VOBJBUF) { + union_vm_coherency(ap->a_vp, ap->a_uio, 1); + } + + error = VOP_WRITE(uppervp, ap->a_uio, ap->a_ioflag, ap->a_cred); /* * the size of the underlying object may be changed by the @@ -932,7 +1141,7 @@ union_write(ap) if (cur > un->un_uppersz) union_newsize(ap->a_vp, cur, VNOVAL); } - + union_unlock_upper(uppervp, p); return (error); } @@ -945,7 +1154,7 @@ union_lease(ap) int a_flag; } */ *ap; { - register struct vnode *ovp = OTHERVP(ap->a_vp); + struct vnode *ovp = OTHERVP(ap->a_vp); ap->a_vp = ovp; return (VCALL(ovp, VOFFSET(vop_lease), ap)); @@ -962,7 +1171,7 @@ union_ioctl(ap) struct proc *a_p; } */ *ap; { - register struct vnode *ovp = OTHERVP(ap->a_vp); + struct vnode *ovp = OTHERVP(ap->a_vp); ap->a_vp = ovp; return (VCALL(ovp, VOFFSET(vop_ioctl), ap)); @@ -977,7 +1186,7 @@ union_poll(ap) struct proc *a_p; } */ *ap; { - register struct vnode *ovp = OTHERVP(ap->a_vp); + struct vnode *ovp = OTHERVP(ap->a_vp); ap->a_vp = ovp; return (VCALL(ovp, VOFFSET(vop_poll), ap)); @@ -1010,7 +1219,7 @@ union_mmap(ap) struct proc *a_p; } */ *ap; { - register struct vnode *ovp = OTHERVP(ap->a_vp); + struct vnode *ovp = OTHERVP(ap->a_vp); ap->a_vp = ovp; return (VCALL(ovp, VOFFSET(vop_mmap), ap)); @@ -1027,35 +1236,24 @@ union_fsync(ap) { int error = 0; struct proc *p = ap->a_p; - struct vnode *targetvp = OTHERVP(ap->a_vp); - struct union_node *un; - - if (targetvp != NULLVP) { - int dolock = (targetvp == LOWERVP(ap->a_vp)); - - un = VTOUNION(ap->a_vp); - if (dolock) - vn_lock(targetvp, LK_EXCLUSIVE | LK_RETRY, p); - else { - un = VTOUNION(ap->a_vp); - if ((un->un_flags & UN_ULOCK) == 0 && - targetvp->v_data != NULL && - ((struct lock *)targetvp->v_data)->lk_lockholder - == curproc->p_pid && - VOP_ISLOCKED(targetvp) != 0) - return 0; /* XXX */ - - FIXUP(un, p); - } + struct vnode *targetvp; + struct union_node *un = VTOUNION(ap->a_vp); + if ((targetvp = union_lock_other(un, p)) != NULLVP) { error = VOP_FSYNC(targetvp, ap->a_cred, ap->a_waitfor, p); - if (dolock) - VOP_UNLOCK(targetvp, 0, p); + union_unlock_other(targetvp, p); } return (error); } +/* + * union_remove: + * + * Remove the specified cnp. The dvp and vp are passed to us locked + * and must remain locked on return. + */ + static int union_remove(ap) struct vop_remove_args /* { @@ -1068,42 +1266,40 @@ union_remove(ap) struct union_node *un = VTOUNION(ap->a_vp); struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; + struct vnode *uppervp; + struct vnode *upperdvp; int error; - if (dun->un_uppervp == NULLVP) + if ((upperdvp = union_lock_upper(dun, p)) == NULLVP) panic("union remove: null upper vnode"); - if (un->un_uppervp != NULLVP) { - struct vnode *dvp = dun->un_uppervp; - struct vnode *vp = un->un_uppervp; - - FIXUP(dun, p); - dun->un_flags |= UN_KLOCK; - VOP_UNLOCK(ap->a_dvp, 0, p); - FIXUP(un, p); - un->un_flags |= UN_KLOCK; - VOP_UNLOCK(ap->a_vp, 0, p); - + if ((uppervp = union_lock_upper(un, p)) != NULLVP) { if (union_dowhiteout(un, cnp->cn_cred, p)) cnp->cn_flags |= DOWHITEOUT; - error = VOP_REMOVE(dvp, vp, cnp); + error = VOP_REMOVE(upperdvp, uppervp, cnp); #if 0 /* XXX */ if (!error) union_removed_upper(un); #endif - dun->un_flags |= UN_ULOCK; - un->un_flags |= UN_ULOCK; + union_unlock_upper(uppervp, p); } else { - FIXUP(dun, p); error = union_mkwhiteout( - MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount), - dun->un_uppervp, ap->a_cnp, un->un_path); + MOUNTTOUNIONMOUNT(ap->a_dvp->v_mount), + upperdvp, ap->a_cnp, un->un_path); } - + union_unlock_upper(upperdvp, p); return (error); } +/* + * union_link: + * + * tdvp will be locked on entry, vp will not be locked on entry. + * tdvp should remain locked on return and vp should remain unlocked + * on return. + */ + static int union_link(ap) struct vop_link_args /* { @@ -1119,43 +1315,56 @@ union_link(ap) struct vnode *tdvp; int error = 0; - if (ap->a_tdvp->v_op != ap->a_vp->v_op) { vp = ap->a_vp; } else { struct union_node *tun = VTOUNION(ap->a_vp); + if (tun->un_uppervp == NULLVP) { vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY, p); +#if 0 if (dun->un_uppervp == tun->un_dirvp) { - dun->un_flags &= ~UN_ULOCK; - VOP_UNLOCK(dun->un_uppervp, 0, p); + if (dun->un_flags & UN_ULOCK) { + dun->un_flags &= ~UN_ULOCK; + VOP_UNLOCK(dun->un_uppervp, 0, p); + } } +#endif error = union_copyup(tun, 1, cnp->cn_cred, p); +#if 0 if (dun->un_uppervp == tun->un_dirvp) { vn_lock(dun->un_uppervp, - LK_EXCLUSIVE | LK_RETRY, p); + LK_EXCLUSIVE | LK_RETRY, p); dun->un_flags |= UN_ULOCK; } +#endif VOP_UNLOCK(ap->a_vp, 0, p); } vp = tun->un_uppervp; } - tdvp = dun->un_uppervp; - if (tdvp == NULLVP) - error = EROFS; - if (error) return (error); - FIXUP(dun, p); - dun->un_flags |= UN_KLOCK; - VOP_UNLOCK(ap->a_tdvp, 0, p); + /* + * Make sure upper is locked, then unlock the union directory we were + * called with to avoid a deadlock while we are calling VOP_LINK on + * the upper (with tdvp locked and vp not locked). Our ap->a_tdvp + * is expected to be locked on return. + */ - error = VOP_LINK(tdvp, vp, cnp); + if ((tdvp = union_lock_upper(dun, p)) == NULLVP) + return (EROFS); - dun->un_flags |= UN_ULOCK; + VOP_UNLOCK(ap->a_tdvp, 0, p); /* unlock calling node */ + error = VOP_LINK(tdvp, vp, cnp); /* call link on upper */ + /* + * We have to unlock tdvp prior to relocking our calling node in + * order to avoid a deadlock. + */ + union_unlock_upper(tdvp, p); + vn_lock(ap->a_tdvp, LK_EXCLUSIVE | LK_RETRY, p); return (error); } @@ -1171,12 +1380,16 @@ union_rename(ap) } */ *ap; { int error; - struct vnode *fdvp = ap->a_fdvp; struct vnode *fvp = ap->a_fvp; struct vnode *tdvp = ap->a_tdvp; struct vnode *tvp = ap->a_tvp; + /* + * Figure out what fdvp to pass to our upper or lower vnode. If we + * replace the fdvp, release the original one and ref the new one. + */ + if (fdvp->v_op == union_vnodeop_p) { /* always true */ struct union_node *un = VTOUNION(fdvp); if (un->un_uppervp == NULLVP) { @@ -1189,30 +1402,77 @@ union_rename(ap) error = EXDEV; goto bad; } - fdvp = un->un_uppervp; VREF(fdvp); vrele(ap->a_fdvp); } + /* + * Figure out what fvp to pass to our upper or lower vnode. If we + * replace the fvp, release the original one and ref the new one. + */ + if (fvp->v_op == union_vnodeop_p) { /* always true */ struct union_node *un = VTOUNION(fvp); +#if 0 + struct union_mount *um = MOUNTTOUNIONMOUNT(fvp->v_mount); +#endif + if (un->un_uppervp == NULLVP) { - /* XXX: should do a copyup */ - error = EXDEV; - goto bad; + switch(fvp->v_type) { + case VREG: + vn_lock(un->un_vnode, LK_EXCLUSIVE | LK_RETRY, ap->a_fcnp->cn_proc); + error = union_copyup(un, 1, ap->a_fcnp->cn_cred, ap->a_fcnp->cn_proc); + VOP_UNLOCK(un->un_vnode, 0, ap->a_fcnp->cn_proc); + if (error) + goto bad; + break; + case VDIR: + /* + * XXX not yet. + * + * There is only one way to rename a directory + * based in the lowervp, and that is to copy + * the entire directory hierarchy. Otherwise + * it would not last across a reboot. + */ +#if 0 + vrele(fvp); + fvp = NULL; + vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY, ap->a_fcnp->cn_proc); + error = union_mkshadow(um, fdvp, + ap->a_fcnp, &un->un_uppervp); + VOP_UNLOCK(fdvp, 0, ap->a_fcnp->cn_proc); + if (un->un_uppervp) + VOP_UNLOCK(un->un_uppervp, 0, ap->a_fcnp->cn_proc); + if (error) + goto bad; + break; +#endif + default: + error = EXDEV; + goto bad; + } } if (un->un_lowervp != NULLVP) ap->a_fcnp->cn_flags |= DOWHITEOUT; - fvp = un->un_uppervp; VREF(fvp); vrele(ap->a_fvp); } + /* + * Figure out what tdvp (destination directory) to pass to the + * lower level. If we replace it with uppervp, we need to vput the + * old one. The exclusive lock is transfered to what we will pass + * down in the VOP_RENAME and we replace uppervp with a simple + * reference. + */ + if (tdvp->v_op == union_vnodeop_p) { struct union_node *un = VTOUNION(tdvp); + if (un->un_uppervp == NULLVP) { /* * this should never happen in normal @@ -1224,32 +1484,52 @@ union_rename(ap) goto bad; } - tdvp = un->un_uppervp; - VREF(tdvp); - un->un_flags |= UN_KLOCK; + /* + * new tdvp is a lock and reference on uppervp, put away + * the old tdvp. + */ + tdvp = union_lock_upper(un, ap->a_tcnp->cn_proc); vput(ap->a_tdvp); } + /* + * Figure out what tvp (destination file) to pass to the + * lower level. + * + * If the uppervp file does not exist put away the (wrong) + * file and change tvp to NULL. + */ + if (tvp != NULLVP && tvp->v_op == union_vnodeop_p) { struct union_node *un = VTOUNION(tvp); - tvp = un->un_uppervp; - if (tvp != NULLVP) { - VREF(tvp); - un->un_flags |= UN_KLOCK; - } + tvp = union_lock_upper(un, ap->a_tcnp->cn_proc); vput(ap->a_tvp); + /* note: tvp may be NULL */ } + /* + * VOP_RENAME releases/vputs prior to returning, so we have no + * cleanup to do. + */ + return (VOP_RENAME(fdvp, fvp, ap->a_fcnp, tdvp, tvp, ap->a_tcnp)); + /* + * Error. We still have to release / vput the various elements. + */ + bad: vrele(fdvp); - vrele(fvp); + if (fvp) + vrele(fvp); vput(tdvp); - if (tvp != NULLVP) - vput(tvp); - + if (tvp != NULLVP) { + if (tvp != tdvp) + vput(tvp); + else + vrele(tvp); + } return (error); } @@ -1263,34 +1543,26 @@ union_mkdir(ap) } */ *ap; { struct union_node *dun = VTOUNION(ap->a_dvp); - struct vnode *dvp = dun->un_uppervp; struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; + struct vnode *upperdvp; + int error = EROFS; - if (dvp != NULLVP) { + if ((upperdvp = union_lock_upper(dun, p)) != NULLVP) { struct vnode *vp; - int error; - FIXUP(dun, p); - dun->un_flags |= UN_KLOCK; - VOP_UNLOCK(ap->a_dvp, 0, p); - error = VOP_MKDIR(dvp, &vp, cnp, ap->a_vap); - if (error) { - dun->un_flags |= UN_ULOCK; - return (error); + error = VOP_MKDIR(upperdvp, &vp, cnp, ap->a_vap); + union_unlock_upper(upperdvp, p); + + if (error == 0) { + VOP_UNLOCK(vp, 0, p); + UDEBUG(("ALLOCVP-2 FROM %p REFS %d\n", vp, vp->v_usecount)); + error = union_allocvp(ap->a_vpp, ap->a_dvp->v_mount, + ap->a_dvp, NULLVP, cnp, vp, NULLVP, 1); + UDEBUG(("ALLOCVP-2B FROM %p REFS %d\n", *ap->a_vpp, vp->v_usecount)); } - - VOP_UNLOCK(dvp, 0, p); - error = union_allocvp(ap->a_vpp, ap->a_dvp->v_mount, ap->a_dvp, - NULLVP, cnp, vp, NULLVP, 1); - if (error) - vput(vp); - vn_lock(ap->a_dvp, LK_EXCLUSIVE| LK_RETRY, p); - - return (error); } - - return (EROFS); + return (error); } static int @@ -1305,42 +1577,34 @@ union_rmdir(ap) struct union_node *un = VTOUNION(ap->a_vp); struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; + struct vnode *upperdvp; + struct vnode *uppervp; int error; - if (dun->un_uppervp == NULLVP) + if ((upperdvp = union_lock_upper(dun, p)) == NULLVP) panic("union rmdir: null upper vnode"); - if (un->un_uppervp != NULLVP) { - struct vnode *dvp = dun->un_uppervp; - struct vnode *vp = un->un_uppervp; - - FIXUP(dun, p); - dun->un_flags |= UN_KLOCK; - VOP_UNLOCK(ap->a_dvp, 0, p); - FIXUP(un, p); - un->un_flags |= UN_KLOCK; - VOP_UNLOCK(ap->a_vp, 0, p); - + if ((uppervp = union_lock_upper(un, p)) != NULLVP) { if (union_dowhiteout(un, cnp->cn_cred, p)) cnp->cn_flags |= DOWHITEOUT; - error = VOP_RMDIR(dvp, vp, ap->a_cnp); -#if 0 - /* XXX */ - if (!error) - union_removed_upper(un); -#endif - dun->un_flags |= UN_ULOCK; - un->un_flags |= UN_ULOCK; + error = VOP_RMDIR(upperdvp, uppervp, ap->a_cnp); + union_unlock_upper(uppervp, p); } else { - FIXUP(dun, p); error = union_mkwhiteout( - MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount), - dun->un_uppervp, ap->a_cnp, un->un_path); + MOUNTTOUNIONMOUNT(ap->a_dvp->v_mount), + dun->un_uppervp, ap->a_cnp, un->un_path); } - + union_unlock_upper(upperdvp, p); return (error); } +/* + * union_symlink: + * + * dvp is locked on entry and remains locked on return. a_vpp is garbage + * (unused). + */ + static int union_symlink(ap) struct vop_symlink_args /* { @@ -1352,24 +1616,20 @@ union_symlink(ap) } */ *ap; { struct union_node *dun = VTOUNION(ap->a_dvp); - struct vnode *dvp = dun->un_uppervp; struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; + struct vnode *dvp; + int error = EROFS; - if (dvp != NULLVP) { + if ((dvp = union_lock_upper(dun, p)) != NULLVP) { struct vnode *vp; - int error; - FIXUP(dun, p); - dun->un_flags |= UN_KLOCK; - VOP_UNLOCK(ap->a_dvp, 0, p); error = VOP_SYMLINK(dvp, &vp, cnp, ap->a_vap, ap->a_target); - dun->un_flags |= UN_ULOCK; + /* vp is garbage whether an error occurs or not */ *ap->a_vpp = NULLVP; - return (error); + union_unlock_upper(dvp, p); } - - return (EROFS); + return (error); } /* @@ -1391,15 +1651,16 @@ union_readdir(ap) } */ *ap; { struct union_node *un = VTOUNION(ap->a_vp); - struct vnode *uvp = un->un_uppervp; struct proc *p = ap->a_uio->uio_procp; + struct vnode *uvp; + int error = 0; - if (uvp == NULLVP) - return (0); - - FIXUP(un, p); - ap->a_vp = uvp; - return (VCALL(uvp, VOFFSET(vop_readdir), ap)); + if ((uvp = union_lock_upper(un, p)) != NULLVP) { + ap->a_vp = uvp; + error = VCALL(uvp, VOFFSET(vop_readdir), ap); + union_unlock_upper(uvp, p); + } + return(error); } static int @@ -1411,23 +1672,28 @@ union_readlink(ap) } */ *ap; { int error; + struct union_node *un = VTOUNION(ap->a_vp); struct uio *uio = ap->a_uio; struct proc *p = uio->uio_procp; - struct vnode *vp = OTHERVP(ap->a_vp); - int dolock = (vp == LOWERVP(ap->a_vp)); + struct vnode *vp; + + vp = union_lock_other(un, p); + KASSERT(vp != NULL, ("union_readlink: backing vnode missing!")); - if (dolock) - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - else - FIXUP(VTOUNION(ap->a_vp), p); ap->a_vp = vp; error = VCALL(vp, VOFFSET(vop_readlink), ap); - if (dolock) - VOP_UNLOCK(vp, 0, p); + union_unlock_other(vp, p); return (error); } +/* + * union_abortop: + * + * dvp is locked on entry and left locked on return + * + */ + static int union_abortop(ap) struct vop_abortop_args /* { @@ -1435,28 +1701,35 @@ union_abortop(ap) struct componentname *a_cnp; } */ *ap; { - int error; struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; - struct vnode *vp = OTHERVP(ap->a_dvp); struct union_node *un = VTOUNION(ap->a_dvp); - int islocked = un->un_flags & UN_LOCKED; - int dolock = (vp == LOWERVP(ap->a_dvp)); + int islocked = VOP_ISLOCKED(ap->a_dvp); + struct vnode *vp; + int error; if (islocked) { - if (dolock) - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - else - FIXUP(VTOUNION(ap->a_dvp), p); + vp = union_lock_other(un, p); + } else { + vp = OTHERVP(ap->a_dvp); } + KASSERT(vp != NULL, ("union_abortop: backing vnode missing!")); + ap->a_dvp = vp; error = VCALL(vp, VOFFSET(vop_abortop), ap); - if (islocked && dolock) - VOP_UNLOCK(vp, 0, p); + + if (islocked) + union_unlock_other(vp, p); return (error); } +/* + * union_inactive: + * + * Called with the vnode locked. We are expected to unlock the vnode. + */ + static int union_inactive(ap) struct vop_inactive_args /* { @@ -1485,10 +1758,17 @@ union_inactive(ap) if (un->un_dircache != 0) { for (vpp = un->un_dircache; *vpp != NULLVP; vpp++) vrele(*vpp); - free(un->un_dircache, M_TEMP); + free (un->un_dircache, M_TEMP); un->un_dircache = 0; } +#if 0 + if ((un->un_flags & UN_ULOCK) && un->un_uppervp) { + un->un_flags &= ~UN_ULOCK; + VOP_UNLOCK(un->un_uppervp, 0, p); + } +#endif + VOP_UNLOCK(vp, 0, p); if ((un->un_flags & UN_CACHED) == 0) @@ -1503,7 +1783,6 @@ union_reclaim(ap) struct vnode *a_vp; } */ *ap; { - union_freevp(ap->a_vp); return (0); @@ -1513,75 +1792,47 @@ static int union_lock(ap) struct vop_lock_args *ap; { +#if 0 struct vnode *vp = ap->a_vp; struct proc *p = ap->a_p; int flags = ap->a_flags; struct union_node *un; +#endif int error; - vop_nolock(ap); - /* - * Need to do real lockmgr-style locking here. - * in the mean time, draining won't work quite right, - * which could lead to a few race conditions. - * the following test was here, but is not quite right, we - * still need to take the lock: - if ((flags & LK_TYPE_MASK) == LK_DRAIN) - return (0); - */ - flags &= ~LK_INTERLOCK; - -start: + error = vop_stdlock(ap); +#if 0 un = VTOUNION(vp); - if (un->un_uppervp != NULLVP) { - if (((un->un_flags & UN_ULOCK) == 0) && - (vp->v_usecount != 0)) { - error = vn_lock(un->un_uppervp, flags, p); - if (error) - return (error); - un->un_flags |= UN_ULOCK; + if (error == 0) { + /* + * Lock the upper if it exists and this is an exclusive lock + * request. + */ + if (un->un_uppervp != NULLVP && + (flags & LK_TYPE_MASK) == LK_EXCLUSIVE) { + if ((un->un_flags & UN_ULOCK) == 0 && vp->v_usecount) { + error = vn_lock(un->un_uppervp, flags, p); + if (error) { + struct vop_unlock_args uap = { 0 }; + uap.a_vp = ap->a_vp; + uap.a_flags = ap->a_flags; + uap.a_p = ap->a_p; + vop_stdunlock(&uap); + return (error); + } + un->un_flags |= UN_ULOCK; + } } -#ifdef DIAGNOSTIC - if (un->un_flags & UN_KLOCK) { - vprint("dangling upper lock", vp); - panic("union: dangling upper lock"); - } -#endif } - - if (un->un_flags & UN_LOCKED) { -#ifdef DIAGNOSTIC - if (curproc && un->un_pid == curproc->p_pid && - un->un_pid > -1 && curproc->p_pid > -1) - panic("union: locking against myself"); #endif - un->un_flags |= UN_WANT; - tsleep((caddr_t)&un->un_flags, PINOD, "unionlk2", 0); - goto start; - } - -#ifdef DIAGNOSTIC - if (curproc) - un->un_pid = curproc->p_pid; - else - un->un_pid = -1; -#endif - - un->un_flags |= UN_LOCKED; - return (0); + return (error); } /* - * When operations want to vput() a union node yet retain a lock on - * the upper vnode (say, to do some further operations like link(), - * mkdir(), ...), they set UN_KLOCK on the union node, then call - * vput() which calls VOP_UNLOCK() and comes here. union_unlock() - * unlocks the union node (leaving the upper vnode alone), clears the - * KLOCK flag, and then returns to vput(). The caller then does whatever - * is left to do with the upper vnode, and ensures that it gets unlocked. + * union_unlock: * - * If UN_KLOCK isn't set, then the upper vnode is unlocked here. + * Unlock our union node. This also unlocks uppervp. */ static int union_unlock(ap) @@ -1592,36 +1843,38 @@ union_unlock(ap) } */ *ap; { struct union_node *un = VTOUNION(ap->a_vp); - struct proc *p = ap->a_p; + int error; -#ifdef DIAGNOSTIC - if ((un->un_flags & UN_LOCKED) == 0) - panic("union: unlock unlocked node"); - if (curproc && un->un_pid != curproc->p_pid && - curproc->p_pid > -1 && un->un_pid > -1) - panic("union: unlocking other process's union node"); -#endif + KASSERT((un->un_uppervp == NULL || un->un_uppervp->v_usecount > 0), ("uppervp usecount is 0")); - un->un_flags &= ~UN_LOCKED; + error = vop_stdunlock(ap); +#if 0 - if ((un->un_flags & (UN_ULOCK|UN_KLOCK)) == UN_ULOCK) - VOP_UNLOCK(un->un_uppervp, 0, p); + /* + * If no exclusive locks remain and we are holding an uppervp lock, + * remove the uppervp lock. + */ - un->un_flags &= ~(UN_ULOCK|UN_KLOCK); - - if (un->un_flags & UN_WANT) { - un->un_flags &= ~UN_WANT; - wakeup((caddr_t) &un->un_flags); + if ((un->un_flags & UN_ULOCK) && + lockstatus(&un->un_lock) != LK_EXCLUSIVE) { + un->un_flags &= ~UN_ULOCK; + VOP_UNLOCK(un->un_uppervp, LK_EXCLUSIVE, p); } - -#ifdef DIAGNOSTIC - un->un_pid = 0; #endif - vop_nounlock(ap); - - return (0); + return(error); } +/* + * union_bmap: + * + * There isn't much we can do. We cannot push through to the real vnode + * to get to the underlying device because this will bypass data + * cached by the real vnode. + * + * For some reason we cannot return the 'real' vnode either, it seems + * to blow up memory maps. + */ + static int union_bmap(ap) struct vop_bmap_args /* { @@ -1633,21 +1886,7 @@ union_bmap(ap) int *a_runb; } */ *ap; { - int error; - struct proc *p = curproc; /* XXX */ - struct vnode *vp = OTHERVP(ap->a_vp); - int dolock = (vp == LOWERVP(ap->a_vp)); - - if (dolock) - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - else - FIXUP(VTOUNION(ap->a_vp), p); - ap->a_vp = vp; - error = VCALL(vp, VOFFSET(vop_bmap), ap); - if (dolock) - VOP_UNLOCK(vp, 0, p); - - return (error); + return(EOPNOTSUPP); } static int @@ -1668,16 +1907,6 @@ union_print(ap) return (0); } -static int -union_islocked(ap) - struct vop_islocked_args /* { - struct vnode *a_vp; - } */ *ap; -{ - - return ((VTOUNION(ap->a_vp)->un_flags & UN_LOCKED) ? 1 : 0); -} - static int union_pathconf(ap) struct vop_pathconf_args /* { @@ -1688,17 +1917,15 @@ union_pathconf(ap) { int error; struct proc *p = curproc; /* XXX */ - struct vnode *vp = OTHERVP(ap->a_vp); - int dolock = (vp == LOWERVP(ap->a_vp)); + struct union_node *un = VTOUNION(ap->a_vp); + struct vnode *vp; + + vp = union_lock_other(un, p); + KASSERT(vp != NULL, ("union_pathconf: backing vnode missing!")); - if (dolock) - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - else - FIXUP(VTOUNION(ap->a_vp), p); ap->a_vp = vp; error = VCALL(vp, VOFFSET(vop_pathconf), ap); - if (dolock) - VOP_UNLOCK(vp, 0, p); + union_unlock_other(vp, p); return (error); } @@ -1722,6 +1949,8 @@ union_advlock(ap) /* * XXX - vop_strategy must be hand coded because it has no + * YYY - and it is not coherent with anything + * * vnode in its arguments. * This goes away with a merged VM/buffer cache. */ @@ -1742,7 +1971,6 @@ union_strategy(ap) (othervp == LOWERVP(bp->b_vp))) panic("union_strategy: writing to lowervp"); #endif - return (VOP_STRATEGY(othervp, bp)); } @@ -1759,10 +1987,12 @@ static struct vnodeopv_entry_desc union_vnodeop_entries[] = { { &vop_close_desc, (vop_t *) union_close }, { &vop_create_desc, (vop_t *) union_create }, { &vop_fsync_desc, (vop_t *) union_fsync }, + { &vop_getpages_desc, (vop_t *) union_getpages }, + { &vop_putpages_desc, (vop_t *) union_putpages }, { &vop_getattr_desc, (vop_t *) union_getattr }, { &vop_inactive_desc, (vop_t *) union_inactive }, { &vop_ioctl_desc, (vop_t *) union_ioctl }, - { &vop_islocked_desc, (vop_t *) union_islocked }, + { &vop_islocked_desc, (vop_t *) vop_stdislocked }, { &vop_lease_desc, (vop_t *) union_lease }, { &vop_link_desc, (vop_t *) union_link }, { &vop_lock_desc, (vop_t *) union_lock },