From 59a01b70afb35b13dfc51b5f18c8336ec25229d9 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Tue, 19 Mar 2013 15:08:15 +0000 Subject: [PATCH] UFS support of the unmapped i/o for the user data buffers. Sponsored by: The FreeBSD Foundation Tested by: pho, scottl, jhb, bf --- sys/ufs/ffs/ffs_alloc.c | 10 ++++---- sys/ufs/ffs/ffs_balloc.c | 50 ++++++++++++++++++++++++---------------- sys/ufs/ffs/ffs_vfsops.c | 3 ++- sys/ufs/ffs/ffs_vnops.c | 33 ++++++++++++++++++-------- sys/ufs/ufs/ufs_extern.h | 1 + 5 files changed, 62 insertions(+), 35 deletions(-) diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c index 789a7cf53e38..ab21b8938100 100644 --- a/sys/ufs/ffs/ffs_alloc.c +++ b/sys/ufs/ffs/ffs_alloc.c @@ -254,7 +254,7 @@ ffs_realloccg(ip, lbprev, bprev, bpref, osize, nsize, flags, cred, bpp) struct buf *bp; struct ufsmount *ump; u_int cg, request, reclaimed; - int error; + int error, gbflags; ufs2_daddr_t bno; static struct timeval lastfail; static int curfail; @@ -265,6 +265,8 @@ ffs_realloccg(ip, lbprev, bprev, bpref, osize, nsize, flags, cred, bpp) fs = ip->i_fs; bp = NULL; ump = ip->i_ump; + gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0; + mtx_assert(UFS_MTX(ump), MA_OWNED); #ifdef INVARIANTS if (vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) @@ -296,7 +298,7 @@ ffs_realloccg(ip, lbprev, bprev, bpref, osize, nsize, flags, cred, bpp) /* * Allocate the extra space in the buffer. */ - error = bread(vp, lbprev, osize, NOCRED, &bp); + error = bread_gb(vp, lbprev, osize, NOCRED, gbflags, &bp); if (error) { brelse(bp); return (error); @@ -332,7 +334,7 @@ ffs_realloccg(ip, lbprev, bprev, bpref, osize, nsize, flags, cred, bpp) ip->i_flag |= IN_CHANGE | IN_UPDATE; allocbuf(bp, nsize); bp->b_flags |= B_DONE; - bzero(bp->b_data + osize, nsize - osize); + vfs_bio_bzero_buf(bp, osize, nsize - osize); if ((bp->b_flags & (B_MALLOC | B_VMIO)) == B_VMIO) vfs_bio_set_valid(bp, osize, nsize - osize); *bpp = bp; @@ -400,7 +402,7 @@ ffs_realloccg(ip, lbprev, bprev, bpref, osize, nsize, flags, cred, bpp) ip->i_flag |= IN_CHANGE | IN_UPDATE; allocbuf(bp, nsize); bp->b_flags |= B_DONE; - bzero(bp->b_data + osize, nsize - osize); + vfs_bio_bzero_buf(bp, osize, nsize - osize); if ((bp->b_flags & (B_MALLOC | B_VMIO)) == B_VMIO) vfs_bio_set_valid(bp, osize, nsize - osize); *bpp = bp; diff --git a/sys/ufs/ffs/ffs_balloc.c b/sys/ufs/ffs/ffs_balloc.c index a5e99d92920f..d20df77a73ad 100644 --- a/sys/ufs/ffs/ffs_balloc.c +++ b/sys/ufs/ffs/ffs_balloc.c @@ -107,7 +107,7 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size, int saved_inbdflush; static struct timeval lastfail; static int curfail; - int reclaimed; + int gbflags, reclaimed; ip = VTOI(vp); dp = ip->i_din1; @@ -123,6 +123,7 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size, return (EOPNOTSUPP); if (lbn < 0) return (EFBIG); + gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0; if (DOINGSOFTDEP(vp)) softdep_prealloc(vp, MNT_WAIT); @@ -211,7 +212,7 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size, nsize, flags, cred, &newb); if (error) return (error); - bp = getblk(vp, lbn, nsize, 0, 0, 0); + bp = getblk(vp, lbn, nsize, 0, 0, gbflags); bp->b_blkno = fsbtodb(fs, newb); if (flags & BA_CLRBUF) vfs_bio_clrbuf(bp); @@ -255,7 +256,7 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size, nb = newb; *allocblk++ = nb; *lbns_remfree++ = indirs[1].in_lbn; - bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, 0); + bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, gbflags); bp->b_blkno = fsbtodb(fs, nb); vfs_bio_clrbuf(bp); if (DOINGSOFTDEP(vp)) { @@ -389,7 +390,7 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size, nb = newb; *allocblk++ = nb; *lbns_remfree++ = lbn; - nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0); + nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags); nbp->b_blkno = fsbtodb(fs, nb); if (flags & BA_CLRBUF) vfs_bio_clrbuf(nbp); @@ -418,16 +419,17 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size, if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { error = cluster_read(vp, ip->i_size, lbn, (int)fs->fs_bsize, NOCRED, - MAXBSIZE, seqcount, 0, &nbp); + MAXBSIZE, seqcount, gbflags, &nbp); } else { - error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp); + error = bread_gb(vp, lbn, (int)fs->fs_bsize, NOCRED, + gbflags, &nbp); } if (error) { brelse(nbp); goto fail; } } else { - nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0); + nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags); nbp->b_blkno = fsbtodb(fs, nb); } curthread_pflags_restore(saved_inbdflush); @@ -539,7 +541,7 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size, int saved_inbdflush; static struct timeval lastfail; static int curfail; - int reclaimed; + int gbflags, reclaimed; ip = VTOI(vp); dp = ip->i_din2; @@ -553,6 +555,7 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size, *bpp = NULL; if (lbn < 0) return (EFBIG); + gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0; if (DOINGSOFTDEP(vp)) softdep_prealloc(vp, MNT_WAIT); @@ -603,7 +606,8 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size, panic("ffs_balloc_ufs2: BA_METAONLY for ext block"); nb = dp->di_extb[lbn]; if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) { - error = bread(vp, -1 - lbn, fs->fs_bsize, NOCRED, &bp); + error = bread_gb(vp, -1 - lbn, fs->fs_bsize, NOCRED, + gbflags, &bp); if (error) { brelse(bp); return (error); @@ -620,7 +624,8 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size, osize = fragroundup(fs, blkoff(fs, dp->di_extsize)); nsize = fragroundup(fs, size); if (nsize <= osize) { - error = bread(vp, -1 - lbn, osize, NOCRED, &bp); + error = bread_gb(vp, -1 - lbn, osize, NOCRED, + gbflags, &bp); if (error) { brelse(bp); return (error); @@ -653,7 +658,7 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size, nsize, flags, cred, &newb); if (error) return (error); - bp = getblk(vp, -1 - lbn, nsize, 0, 0, 0); + bp = getblk(vp, -1 - lbn, nsize, 0, 0, gbflags); bp->b_blkno = fsbtodb(fs, newb); bp->b_xflags |= BX_ALTDATA; if (flags & BA_CLRBUF) @@ -707,7 +712,8 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size, panic("ffs_balloc_ufs2: BA_METAONLY for direct block"); nb = dp->di_db[lbn]; if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) { - error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp); + error = bread_gb(vp, lbn, fs->fs_bsize, NOCRED, + gbflags, &bp); if (error) { brelse(bp); return (error); @@ -723,7 +729,8 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size, osize = fragroundup(fs, blkoff(fs, ip->i_size)); nsize = fragroundup(fs, size); if (nsize <= osize) { - error = bread(vp, lbn, osize, NOCRED, &bp); + error = bread_gb(vp, lbn, osize, NOCRED, + gbflags, &bp); if (error) { brelse(bp); return (error); @@ -753,7 +760,7 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size, &dp->di_db[0]), nsize, flags, cred, &newb); if (error) return (error); - bp = getblk(vp, lbn, nsize, 0, 0, 0); + bp = getblk(vp, lbn, nsize, 0, 0, gbflags); bp->b_blkno = fsbtodb(fs, newb); if (flags & BA_CLRBUF) vfs_bio_clrbuf(bp); @@ -797,7 +804,8 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size, nb = newb; *allocblk++ = nb; *lbns_remfree++ = indirs[1].in_lbn; - bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, 0); + bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, + GB_UNMAPPED); bp->b_blkno = fsbtodb(fs, nb); vfs_bio_clrbuf(bp); if (DOINGSOFTDEP(vp)) { @@ -862,7 +870,8 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size, nb = newb; *allocblk++ = nb; *lbns_remfree++ = indirs[i].in_lbn; - nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0); + nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, + GB_UNMAPPED); nbp->b_blkno = fsbtodb(fs, nb); vfs_bio_clrbuf(nbp); if (DOINGSOFTDEP(vp)) { @@ -931,7 +940,7 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size, nb = newb; *allocblk++ = nb; *lbns_remfree++ = lbn; - nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0); + nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags); nbp->b_blkno = fsbtodb(fs, nb); if (flags & BA_CLRBUF) vfs_bio_clrbuf(nbp); @@ -966,16 +975,17 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size, if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { error = cluster_read(vp, ip->i_size, lbn, (int)fs->fs_bsize, NOCRED, - MAXBSIZE, seqcount, 0, &nbp); + MAXBSIZE, seqcount, gbflags, &nbp); } else { - error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp); + error = bread_gb(vp, lbn, (int)fs->fs_bsize, + NOCRED, gbflags, &nbp); } if (error) { brelse(nbp); goto fail; } } else { - nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0); + nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags); nbp->b_blkno = fsbtodb(fs, nb); } curthread_pflags_restore(saved_inbdflush); diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c index b3292d0e5360..8710862828ff 100644 --- a/sys/ufs/ffs/ffs_vfsops.c +++ b/sys/ufs/ffs/ffs_vfsops.c @@ -1076,7 +1076,7 @@ ffs_mountfs(devvp, mp, td) */ MNT_ILOCK(mp); mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_EXTENDED_SHARED | - MNTK_NO_IOPF; + MNTK_NO_IOPF | MNTK_UNMAPPED_BUFS; MNT_IUNLOCK(mp); #ifdef UFS_EXTATTR #ifdef UFS_EXTATTR_AUTOSTART @@ -2110,6 +2110,7 @@ ffs_bufwrite(struct buf *bp) * set b_lblkno and BKGRDMARKER before calling bgetvp() * to avoid confusing the splay tree and gbincore(). */ + KASSERT((bp->b_flags & B_UNMAPPED) == 0, ("Unmapped cg")); memcpy(newbp->b_data, bp->b_data, bp->b_bufsize); newbp->b_lblkno = bp->b_lblkno; newbp->b_xflags |= BX_BKGRDMARKER; diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c index c310ece129f7..09e5a42388e5 100644 --- a/sys/ufs/ffs/ffs_vnops.c +++ b/sys/ufs/ffs/ffs_vnops.c @@ -509,7 +509,8 @@ ffs_read(ap) /* * Don't do readahead if this is the end of the file. */ - error = bread(vp, lbn, size, NOCRED, &bp); + error = bread_gb(vp, lbn, size, NOCRED, + GB_UNMAPPED, &bp); } else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { /* * Otherwise if we are allowed to cluster, @@ -520,7 +521,7 @@ ffs_read(ap) */ error = cluster_read(vp, ip->i_size, lbn, size, NOCRED, blkoffset + uio->uio_resid, - seqcount, 0, &bp); + seqcount, GB_UNMAPPED, &bp); } else if (seqcount > 1) { /* * If we are NOT allowed to cluster, then @@ -531,15 +532,16 @@ ffs_read(ap) * the 6th argument. */ int nextsize = blksize(fs, ip, nextlbn); - error = breadn(vp, lbn, - size, &nextlbn, &nextsize, 1, NOCRED, &bp); + error = breadn_flags(vp, lbn, size, &nextlbn, + &nextsize, 1, NOCRED, GB_UNMAPPED, &bp); } else { /* * Failing all of the above, just read what the * user asked for. Interestingly, the same as * the first option above. */ - error = bread(vp, lbn, size, NOCRED, &bp); + error = bread_gb(vp, lbn, size, NOCRED, + GB_UNMAPPED, &bp); } if (error) { brelse(bp); @@ -570,8 +572,13 @@ ffs_read(ap) xfersize = size; } - error = vn_io_fault_uiomove((char *)bp->b_data + blkoffset, - (int)xfersize, uio); + if ((bp->b_flags & B_UNMAPPED) == 0) { + error = vn_io_fault_uiomove((char *)bp->b_data + + blkoffset, (int)xfersize, uio); + } else { + error = vn_io_fault_pgmove(bp->b_pages, blkoffset, + (int)xfersize, uio); + } if (error) break; @@ -702,6 +709,7 @@ ffs_write(ap) flags = seqcount << BA_SEQSHIFT; if ((ioflag & IO_SYNC) && !DOINGASYNC(vp)) flags |= IO_SYNC; + flags |= BA_UNMAPPED; for (error = 0; uio->uio_resid > 0;) { lbn = lblkno(fs, uio->uio_offset); @@ -741,8 +749,13 @@ ffs_write(ap) if (size < xfersize) xfersize = size; - error = vn_io_fault_uiomove((char *)bp->b_data + blkoffset, - (int)xfersize, uio); + if ((bp->b_flags & B_UNMAPPED) == 0) { + error = vn_io_fault_uiomove((char *)bp->b_data + + blkoffset, (int)xfersize, uio); + } else { + error = vn_io_fault_pgmove(bp->b_pages, blkoffset, + (int)xfersize, uio); + } /* * If the buffer is not already filled and we encounter an * error while trying to fill it, we have to clear out any @@ -786,7 +799,7 @@ ffs_write(ap) if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) { bp->b_flags |= B_CLUSTEROK; cluster_write(vp, bp, ip->i_size, seqcount, - 0); + GB_UNMAPPED); } else { bawrite(bp); } diff --git a/sys/ufs/ufs/ufs_extern.h b/sys/ufs/ufs/ufs_extern.h index c590748f7ff0..31a2ba898894 100644 --- a/sys/ufs/ufs/ufs_extern.h +++ b/sys/ufs/ufs/ufs_extern.h @@ -121,6 +121,7 @@ void softdep_revert_rmdir(struct inode *, struct inode *); */ #define BA_CLRBUF 0x00010000 /* Clear invalid areas of buffer. */ #define BA_METAONLY 0x00020000 /* Return indirect block buffer. */ +#define BA_UNMAPPED 0x00040000 /* Do not mmap resulted buffer. */ #define BA_SEQMASK 0x7F000000 /* Bits holding seq heuristic. */ #define BA_SEQSHIFT 24 #define BA_SEQMAX 0x7F