freebsd-nq/sys/fs/smbfs/smbfs_io.c
Jeff Roberson ce1fb23146 - Remove interlock protection around VI_XLOCK. The interlock is not
sufficient to guarantee that this race is not hit.  The XLOCK will likely
   have to be redesigned due to the way reference counting and mutexes work
   in FreeBSD.  We currently can not be guaranteed that xlock was not set
   and cleared while we were blocked on the interlock while waiting to check
   for XLOCK.  This would lead us to reference a vnode which was not the
   vnode we requested.
 - Add a backtrace() call inside of INVARIANTS in the hopes of finding out if
   this condition is ever hit.  It should not, since we should be retaining
   a reference to the vnode in these cases.  The reference would be sufficient
   to block recycling.
2003-09-19 23:37:49 +00:00

723 lines
18 KiB
C

/*
* Copyright (c) 2000-2001, Boris Popov
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Boris Popov.
* 4. Neither the name of the author nor the names of any co-contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*
*/
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/resourcevar.h> /* defines plimit structure in proc struct */
#include <sys/kernel.h>
#include <sys/proc.h>
#include <sys/fcntl.h>
#include <sys/bio.h>
#include <sys/buf.h>
#include <sys/mount.h>
#include <sys/namei.h>
#include <sys/vnode.h>
#include <sys/dirent.h>
#include <sys/signalvar.h>
#include <sys/sysctl.h>
#include <sys/vmmeter.h>
#include <vm/vm.h>
#include <vm/vm_page.h>
#include <vm/vm_extern.h>
#include <vm/vm_object.h>
#include <vm/vm_pager.h>
#include <vm/vnode_pager.h>
/*
#include <sys/ioccom.h>
*/
#include <netsmb/smb.h>
#include <netsmb/smb_conn.h>
#include <netsmb/smb_subr.h>
#include <fs/smbfs/smbfs.h>
#include <fs/smbfs/smbfs_node.h>
#include <fs/smbfs/smbfs_subr.h>
/*#define SMBFS_RWGENERIC*/
extern int smbfs_pbuf_freecnt;
static int smbfs_fastlookup = 1;
SYSCTL_DECL(_vfs_smbfs);
SYSCTL_INT(_vfs_smbfs, OID_AUTO, fastlookup, CTLFLAG_RW, &smbfs_fastlookup, 0, "");
#define DE_SIZE (sizeof(struct dirent))
static int
smbfs_readvdir(struct vnode *vp, struct uio *uio, struct ucred *cred)
{
struct dirent de;
struct componentname cn;
struct smb_cred scred;
struct smbfs_fctx *ctx;
struct vnode *newvp;
struct smbnode *np = VTOSMB(vp);
int error/*, *eofflag = ap->a_eofflag*/;
long offset, limit;
np = VTOSMB(vp);
SMBVDEBUG("dirname='%s'\n", np->n_name);
smb_makescred(&scred, uio->uio_td, cred);
offset = uio->uio_offset / DE_SIZE; /* offset in the directory */
limit = uio->uio_resid / DE_SIZE;
if (uio->uio_resid < DE_SIZE || uio->uio_offset < 0)
return EINVAL;
while (limit && offset < 2) {
limit--;
bzero((caddr_t)&de, DE_SIZE);
de.d_reclen = DE_SIZE;
de.d_fileno = (offset == 0) ? np->n_ino :
(np->n_parent ? VTOSMB(np->n_parent)->n_ino : 2);
if (de.d_fileno == 0)
de.d_fileno = 0x7ffffffd + offset;
de.d_namlen = offset + 1;
de.d_name[0] = '.';
de.d_name[1] = '.';
de.d_name[offset + 1] = '\0';
de.d_type = DT_DIR;
error = uiomove(&de, DE_SIZE, uio);
if (error)
return error;
offset++;
uio->uio_offset += DE_SIZE;
}
if (limit == 0)
return 0;
if (offset != np->n_dirofs || np->n_dirseq == NULL) {
SMBVDEBUG("Reopening search %ld:%ld\n", offset, np->n_dirofs);
if (np->n_dirseq) {
smbfs_findclose(np->n_dirseq, &scred);
np->n_dirseq = NULL;
}
np->n_dirofs = 2;
error = smbfs_findopen(np, "*", 1,
SMB_FA_SYSTEM | SMB_FA_HIDDEN | SMB_FA_DIR,
&scred, &ctx);
if (error) {
SMBVDEBUG("can not open search, error = %d", error);
return error;
}
np->n_dirseq = ctx;
} else
ctx = np->n_dirseq;
while (np->n_dirofs < offset) {
error = smbfs_findnext(ctx, offset - np->n_dirofs++, &scred);
if (error) {
smbfs_findclose(np->n_dirseq, &scred);
np->n_dirseq = NULL;
return error == ENOENT ? 0 : error;
}
}
error = 0;
for (; limit; limit--, offset++) {
error = smbfs_findnext(ctx, limit, &scred);
if (error)
break;
np->n_dirofs++;
bzero((caddr_t)&de, DE_SIZE);
de.d_reclen = DE_SIZE;
de.d_fileno = ctx->f_attr.fa_ino;
de.d_type = (ctx->f_attr.fa_attr & SMB_FA_DIR) ? DT_DIR : DT_REG;
de.d_namlen = ctx->f_nmlen;
bcopy(ctx->f_name, de.d_name, de.d_namlen);
de.d_name[de.d_namlen] = '\0';
if (smbfs_fastlookup) {
error = smbfs_nget(vp->v_mount, vp, ctx->f_name,
ctx->f_nmlen, &ctx->f_attr, &newvp);
if (!error) {
cn.cn_nameptr = de.d_name;
cn.cn_namelen = de.d_namlen;
cache_enter(vp, newvp, &cn);
vput(newvp);
}
}
error = uiomove(&de, DE_SIZE, uio);
if (error)
break;
}
if (error == ENOENT)
error = 0;
uio->uio_offset = offset * DE_SIZE;
return error;
}
int
smbfs_readvnode(struct vnode *vp, struct uio *uiop, struct ucred *cred)
{
struct smbmount *smp = VFSTOSMBFS(vp->v_mount);
struct smbnode *np = VTOSMB(vp);
struct thread *td;
struct vattr vattr;
struct smb_cred scred;
int error, lks;
/*
* Protect against method which is not supported for now
*/
if (uiop->uio_segflg == UIO_NOCOPY)
return EOPNOTSUPP;
if (vp->v_type != VREG && vp->v_type != VDIR) {
SMBFSERR("vn types other than VREG or VDIR are unsupported !\n");
return EIO;
}
if (uiop->uio_resid == 0)
return 0;
if (uiop->uio_offset < 0)
return EINVAL;
/* if (uiop->uio_offset + uiop->uio_resid > smp->nm_maxfilesize)
return EFBIG;*/
td = uiop->uio_td;
if (vp->v_type == VDIR) {
lks = LK_EXCLUSIVE;/*lockstatus(vp->v_vnlock, td);*/
if (lks == LK_SHARED)
vn_lock(vp, LK_UPGRADE | LK_RETRY, td);
error = smbfs_readvdir(vp, uiop, cred);
if (lks == LK_SHARED)
vn_lock(vp, LK_DOWNGRADE | LK_RETRY, td);
return error;
}
/* biosize = SSTOCN(smp->sm_share)->sc_txmax;*/
if (np->n_flag & NMODIFIED) {
smbfs_attr_cacheremove(vp);
error = VOP_GETATTR(vp, &vattr, cred, td);
if (error)
return error;
np->n_mtime.tv_sec = vattr.va_mtime.tv_sec;
} else {
error = VOP_GETATTR(vp, &vattr, cred, td);
if (error)
return error;
if (np->n_mtime.tv_sec != vattr.va_mtime.tv_sec) {
error = smbfs_vinvalbuf(vp, V_SAVE, cred, td, 1);
if (error)
return error;
np->n_mtime.tv_sec = vattr.va_mtime.tv_sec;
}
}
smb_makescred(&scred, td, cred);
return smb_read(smp->sm_share, np->n_fid, uiop, &scred);
}
int
smbfs_writevnode(struct vnode *vp, struct uio *uiop,
struct ucred *cred, int ioflag)
{
struct smbmount *smp = VTOSMBFS(vp);
struct smbnode *np = VTOSMB(vp);
struct smb_cred scred;
struct proc *p;
struct thread *td;
int error = 0;
if (vp->v_type != VREG) {
SMBERROR("vn types other than VREG unsupported !\n");
return EIO;
}
SMBVDEBUG("ofs=%d,resid=%d\n",(int)uiop->uio_offset, uiop->uio_resid);
if (uiop->uio_offset < 0)
return EINVAL;
/* if (uiop->uio_offset + uiop->uio_resid > smp->nm_maxfilesize)
return (EFBIG);*/
td = uiop->uio_td;
p = td->td_proc;
if (ioflag & (IO_APPEND | IO_SYNC)) {
if (np->n_flag & NMODIFIED) {
smbfs_attr_cacheremove(vp);
error = smbfs_vinvalbuf(vp, V_SAVE, cred, td, 1);
if (error)
return error;
}
if (ioflag & IO_APPEND) {
#if notyet
/*
* File size can be changed by another client
*/
smbfs_attr_cacheremove(vp);
error = VOP_GETATTR(vp, &vattr, cred, td);
if (error) return (error);
#endif
uiop->uio_offset = np->n_size;
}
}
if (uiop->uio_resid == 0)
return 0;
if (p && uiop->uio_offset + uiop->uio_resid > p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
PROC_LOCK(td->td_proc);
psignal(td->td_proc, SIGXFSZ);
PROC_UNLOCK(td->td_proc);
return EFBIG;
}
smb_makescred(&scred, td, cred);
error = smb_write(smp->sm_share, np->n_fid, uiop, &scred);
SMBVDEBUG("after: ofs=%d,resid=%d\n",(int)uiop->uio_offset, uiop->uio_resid);
if (!error) {
if (uiop->uio_offset > np->n_size) {
np->n_size = uiop->uio_offset;
vnode_pager_setsize(vp, np->n_size);
}
}
return error;
}
/*
* Do an I/O operation to/from a cache block.
*/
int
smbfs_doio(struct buf *bp, struct ucred *cr, struct thread *td)
{
struct vnode *vp = bp->b_vp;
struct smbmount *smp = VFSTOSMBFS(vp->v_mount);
struct smbnode *np = VTOSMB(vp);
struct uio uio, *uiop = &uio;
struct iovec io;
struct smb_cred scred;
int error = 0;
uiop->uio_iov = &io;
uiop->uio_iovcnt = 1;
uiop->uio_segflg = UIO_SYSSPACE;
uiop->uio_td = td;
smb_makescred(&scred, td, cr);
if (bp->b_iocmd == BIO_READ) {
io.iov_len = uiop->uio_resid = bp->b_bcount;
io.iov_base = bp->b_data;
uiop->uio_rw = UIO_READ;
switch (vp->v_type) {
case VREG:
uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE;
error = smb_read(smp->sm_share, np->n_fid, uiop, &scred);
if (error)
break;
if (uiop->uio_resid) {
int left = uiop->uio_resid;
int nread = bp->b_bcount - left;
if (left > 0)
bzero((char *)bp->b_data + nread, left);
}
break;
default:
printf("smbfs_doio: type %x unexpected\n",vp->v_type);
break;
};
if (error) {
bp->b_error = error;
bp->b_ioflags |= BIO_ERROR;
}
} else { /* write */
if (((bp->b_blkno * DEV_BSIZE) + bp->b_dirtyend) > np->n_size)
bp->b_dirtyend = np->n_size - (bp->b_blkno * DEV_BSIZE);
if (bp->b_dirtyend > bp->b_dirtyoff) {
io.iov_len = uiop->uio_resid = bp->b_dirtyend - bp->b_dirtyoff;
uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff;
io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
uiop->uio_rw = UIO_WRITE;
bp->b_flags |= B_WRITEINPROG;
error = smb_write(smp->sm_share, np->n_fid, uiop, &scred);
bp->b_flags &= ~B_WRITEINPROG;
/*
* For an interrupted write, the buffer is still valid
* and the write hasn't been pushed to the server yet,
* so we can't set BIO_ERROR and report the interruption
* by setting B_EINTR. For the B_ASYNC case, B_EINTR
* is not relevant, so the rpc attempt is essentially
* a noop. For the case of a V3 write rpc not being
* committed to stable storage, the block is still
* dirty and requires either a commit rpc or another
* write rpc with iomode == NFSV3WRITE_FILESYNC before
* the block is reused. This is indicated by setting
* the B_DELWRI and B_NEEDCOMMIT flags.
*/
if (error == EINTR
|| (!error && (bp->b_flags & B_NEEDCOMMIT))) {
int s;
s = splbio();
bp->b_flags &= ~(B_INVAL|B_NOCACHE);
if ((bp->b_flags & B_ASYNC) == 0)
bp->b_flags |= B_EINTR;
if ((bp->b_flags & B_PAGING) == 0) {
bdirty(bp);
bp->b_flags &= ~B_DONE;
}
if ((bp->b_flags & B_ASYNC) == 0)
bp->b_flags |= B_EINTR;
splx(s);
} else {
if (error) {
bp->b_ioflags |= BIO_ERROR;
bp->b_error = error;
}
bp->b_dirtyoff = bp->b_dirtyend = 0;
}
} else {
bp->b_resid = 0;
bufdone(bp);
return 0;
}
}
bp->b_resid = uiop->uio_resid;
bufdone(bp);
return error;
}
/*
* Vnode op for VM getpages.
* Wish wish .... get rid from multiple IO routines
*/
int
smbfs_getpages(ap)
struct vop_getpages_args /* {
struct vnode *a_vp;
vm_page_t *a_m;
int a_count;
int a_reqpage;
vm_ooffset_t a_offset;
} */ *ap;
{
#ifdef SMBFS_RWGENERIC
return vop_stdgetpages(ap);
#else
int i, error, nextoff, size, toff, npages, count, reqpage;
struct uio uio;
struct iovec iov;
vm_offset_t kva;
struct buf *bp;
struct vnode *vp;
struct thread *td;
struct ucred *cred;
struct smbmount *smp;
struct smbnode *np;
struct smb_cred scred;
vm_object_t object;
vm_page_t *pages, m;
vp = ap->a_vp;
if ((object = vp->v_object) == NULL) {
printf("smbfs_getpages: called with non-merged cache vnode??\n");
return VM_PAGER_ERROR;
}
td = curthread; /* XXX */
cred = td->td_ucred; /* XXX */
np = VTOSMB(vp);
smp = VFSTOSMBFS(vp->v_mount);
pages = ap->a_m;
count = ap->a_count;
npages = btoc(count);
reqpage = ap->a_reqpage;
/*
* If the requested page is partially valid, just return it and
* allow the pager to zero-out the blanks. Partially valid pages
* can only occur at the file EOF.
*/
m = pages[reqpage];
if (m->valid != 0) {
/* handled by vm_fault now */
/* vm_page_zero_invalid(m, TRUE); */
VM_OBJECT_LOCK(object);
vm_page_lock_queues();
for (i = 0; i < npages; ++i) {
if (i != reqpage)
vm_page_free(pages[i]);
}
vm_page_unlock_queues();
VM_OBJECT_UNLOCK(object);
return 0;
}
smb_makescred(&scred, td, cred);
bp = getpbuf(&smbfs_pbuf_freecnt);
kva = (vm_offset_t) bp->b_data;
pmap_qenter(kva, pages, npages);
cnt.v_vnodein++;
cnt.v_vnodepgsin += npages;
iov.iov_base = (caddr_t) kva;
iov.iov_len = count;
uio.uio_iov = &iov;
uio.uio_iovcnt = 1;
uio.uio_offset = IDX_TO_OFF(pages[0]->pindex);
uio.uio_resid = count;
uio.uio_segflg = UIO_SYSSPACE;
uio.uio_rw = UIO_READ;
uio.uio_td = td;
error = smb_read(smp->sm_share, np->n_fid, &uio, &scred);
pmap_qremove(kva, npages);
relpbuf(bp, &smbfs_pbuf_freecnt);
if (error)
VM_OBJECT_LOCK(object);
if (error && (uio.uio_resid == count)) {
printf("smbfs_getpages: error %d\n",error);
vm_page_lock_queues();
for (i = 0; i < npages; i++) {
if (reqpage != i)
vm_page_free(pages[i]);
}
vm_page_unlock_queues();
VM_OBJECT_UNLOCK(object);
return VM_PAGER_ERROR;
}
size = count - uio.uio_resid;
vm_page_lock_queues();
for (i = 0, toff = 0; i < npages; i++, toff = nextoff) {
vm_page_t m;
nextoff = toff + PAGE_SIZE;
m = pages[i];
m->flags &= ~PG_ZERO;
if (nextoff <= size) {
/*
* Read operation filled an entire page
*/
m->valid = VM_PAGE_BITS_ALL;
vm_page_undirty(m);
} else if (size > toff) {
/*
* Read operation filled a partial page.
*/
m->valid = 0;
vm_page_set_validclean(m, 0, size - toff);
/* handled by vm_fault now */
/* vm_page_zero_invalid(m, TRUE); */
} else {
/*
* Read operation was short. If no error occured
* we may have hit a zero-fill section. We simply
* leave valid set to 0.
*/
;
}
if (i != reqpage) {
/*
* Whether or not to leave the page activated is up in
* the air, but we should put the page on a page queue
* somewhere (it already is in the object). Result:
* It appears that emperical results show that
* deactivating pages is best.
*/
/*
* Just in case someone was asking for this page we
* now tell them that it is ok to use.
*/
if (!error) {
if (m->flags & PG_WANTED)
vm_page_activate(m);
else
vm_page_deactivate(m);
vm_page_wakeup(m);
} else {
vm_page_free(m);
}
}
}
vm_page_unlock_queues();
if (error)
VM_OBJECT_UNLOCK(object);
return 0;
#endif /* SMBFS_RWGENERIC */
}
/*
* Vnode op for VM putpages.
* possible bug: all IO done in sync mode
* Note that vop_close always invalidate pages before close, so it's
* not necessary to open vnode.
*/
int
smbfs_putpages(ap)
struct vop_putpages_args /* {
struct vnode *a_vp;
vm_page_t *a_m;
int a_count;
int a_sync;
int *a_rtvals;
vm_ooffset_t a_offset;
} */ *ap;
{
int error;
struct vnode *vp = ap->a_vp;
struct thread *td;
struct ucred *cred;
#ifdef SMBFS_RWGENERIC
td = curthread; /* XXX */
cred = td->td_ucred; /* XXX */
VOP_OPEN(vp, FWRITE, cred, td, -1);
error = vop_stdputpages(ap);
VOP_CLOSE(vp, FWRITE, cred, td);
return error;
#else
struct uio uio;
struct iovec iov;
vm_offset_t kva;
struct buf *bp;
int i, npages, count;
int *rtvals;
struct smbmount *smp;
struct smbnode *np;
struct smb_cred scred;
vm_page_t *pages;
td = curthread; /* XXX */
cred = td->td_ucred; /* XXX */
/* VOP_OPEN(vp, FWRITE, cred, td, -1);*/
np = VTOSMB(vp);
smp = VFSTOSMBFS(vp->v_mount);
pages = ap->a_m;
count = ap->a_count;
rtvals = ap->a_rtvals;
npages = btoc(count);
for (i = 0; i < npages; i++) {
rtvals[i] = VM_PAGER_AGAIN;
}
bp = getpbuf(&smbfs_pbuf_freecnt);
kva = (vm_offset_t) bp->b_data;
pmap_qenter(kva, pages, npages);
cnt.v_vnodeout++;
cnt.v_vnodepgsout += count;
iov.iov_base = (caddr_t) kva;
iov.iov_len = count;
uio.uio_iov = &iov;
uio.uio_iovcnt = 1;
uio.uio_offset = IDX_TO_OFF(pages[0]->pindex);
uio.uio_resid = count;
uio.uio_segflg = UIO_SYSSPACE;
uio.uio_rw = UIO_WRITE;
uio.uio_td = td;
SMBVDEBUG("ofs=%d,resid=%d\n",(int)uio.uio_offset, uio.uio_resid);
smb_makescred(&scred, td, cred);
error = smb_write(smp->sm_share, np->n_fid, &uio, &scred);
/* VOP_CLOSE(vp, FWRITE, cred, td);*/
SMBVDEBUG("paged write done: %d\n", error);
pmap_qremove(kva, npages);
relpbuf(bp, &smbfs_pbuf_freecnt);
if (!error) {
int nwritten = round_page(count - uio.uio_resid) / PAGE_SIZE;
vm_page_lock_queues();
for (i = 0; i < nwritten; i++) {
rtvals[i] = VM_PAGER_OK;
vm_page_undirty(pages[i]);
}
vm_page_unlock_queues();
}
return rtvals[0];
#endif /* SMBFS_RWGENERIC */
}
/*
* Flush and invalidate all dirty buffers. If another process is already
* doing the flush, just wait for completion.
*/
int
smbfs_vinvalbuf(vp, flags, cred, td, intrflg)
struct vnode *vp;
int flags;
struct ucred *cred;
struct thread *td;
int intrflg;
{
struct smbnode *np = VTOSMB(vp);
int error = 0, slpflag, slptimeo;
if (vp->v_iflag & VI_XLOCK) {
#ifdef INVARIANTS
backtrace();
#endif
return 0;
}
if (intrflg) {
slpflag = PCATCH;
slptimeo = 2 * hz;
} else {
slpflag = 0;
slptimeo = 0;
}
while (np->n_flag & NFLUSHINPROG) {
np->n_flag |= NFLUSHWANT;
error = tsleep(&np->n_flag, PRIBIO + 2, "smfsvinv", slptimeo);
error = smb_td_intr(td);
if (error == EINTR && intrflg)
return EINTR;
}
np->n_flag |= NFLUSHINPROG;
error = vinvalbuf(vp, flags, cred, td, slpflag, 0);
while (error) {
if (intrflg && (error == ERESTART || error == EINTR)) {
np->n_flag &= ~NFLUSHINPROG;
if (np->n_flag & NFLUSHWANT) {
np->n_flag &= ~NFLUSHWANT;
wakeup(&np->n_flag);
}
return EINTR;
}
error = vinvalbuf(vp, flags, cred, td, slpflag, 0);
}
np->n_flag &= ~(NMODIFIED | NFLUSHINPROG);
if (np->n_flag & NFLUSHWANT) {
np->n_flag &= ~NFLUSHWANT;
wakeup(&np->n_flag);
}
return (error);
}