Bring in reallocblk to ext2fs.

The feature has been standard for a while in UFS as a means to reduce
fragmentation, therefore maintaining consistent performance with
filesystem aging. This is also very similar to what ext4 calls
"delayed allocation".

In his 2010 GSoC, Zheng Liu ported and benchmarked the missing
FANCY_REALLOC code to find more consistent performance improvements than
with the preallocation approach.

PR:		159233
Author:		Zheng Liu <gnehzuil AT SPAMFREE gmail DOT com>
Sponsored by:	Google Inc.
Approved by:	jhb (mentor)
MFC after:	2 weeks
This commit is contained in:
Pedro F. Giffuni 2011-12-15 20:31:18 +00:00
parent 51907e6434
commit 5b63c1252b
5 changed files with 335 additions and 38 deletions

View File

@ -42,6 +42,7 @@
#include <sys/vnode.h>
#include <sys/stat.h>
#include <sys/mount.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>
#include <sys/buf.h>
@ -52,6 +53,7 @@
#include <fs/ext2fs/ext2_extern.h>
static daddr_t ext2_alloccg(struct inode *, int, daddr_t, int);
static daddr_t ext2_clusteralloc(struct inode *, int, daddr_t, int);
static u_long ext2_dirpref(struct inode *);
static void ext2_fserr(struct m_ext2fs *, uid_t, char *);
static u_long ext2_hashalloc(struct inode *, int, long, int,
@ -59,9 +61,6 @@ static u_long ext2_hashalloc(struct inode *, int, long, int,
int));
static daddr_t ext2_nodealloccg(struct inode *, int, daddr_t, int);
static daddr_t ext2_mapsearch(struct m_ext2fs *, char *, daddr_t);
#ifdef FANCY_REALLOC
static int ext2_reallocblks(struct vop_reallocblks_args *);
#endif
/*
* Allocate a block in the file system.
@ -150,7 +149,6 @@ nospace:
* the previous block allocation will be used.
*/
#ifdef FANCY_REALLOC
static SYSCTL_NODE(_vfs, OID_AUTO, ext2fs, CTLFLAG_RW, 0, "EXT2FS filesystem");
static int doasyncfree = 1;
@ -159,7 +157,6 @@ SYSCTL_INT(_vfs_ext2fs, OID_AUTO, doasyncfree, CTLFLAG_RW, &doasyncfree, 0,
static int doreallocblks = 1;
SYSCTL_INT(_vfs_ext2fs, OID_AUTO, doreallocblks, CTLFLAG_RW, &doreallocblks, 0, "");
#endif
int
ext2_reallocblks(ap)
@ -168,11 +165,6 @@ ext2_reallocblks(ap)
struct cluster_save *a_buflist;
} */ *ap;
{
#ifndef FANCY_REALLOC
/* printf("ext2_reallocblks not implemented\n"); */
return ENOSPC;
#else
struct m_ext2fs *fs;
struct inode *ip;
struct vnode *vp;
@ -184,14 +176,17 @@ return ENOSPC;
int32_t start_lbn, end_lbn, soff, newblk, blkno;
int i, len, start_lvl, end_lvl, pref, ssize;
if (doreallocblks == 0)
return (ENOSPC);
vp = ap->a_vp;
ip = VTOI(vp);
fs = ip->i_e2fs;
ump = ip->i_ump;
#ifdef UNKLAR
if (fs->fs_contigsumsize <= 0)
if (fs->e2fs_contigsumsize <= 0)
return (ENOSPC);
#endif
buflist = ap->a_buflist;
len = buflist->bs_nchildren;
start_lbn = buflist->bs_children[0]->b_lblkno;
@ -227,11 +222,6 @@ return ENOSPC;
sbap = (int32_t *)sbp->b_data;
soff = idp->in_off;
}
/*
* Find the preferred location for the cluster.
*/
EXT2_LOCK(ump);
pref = ext2_blkpref(ip, start_lbn, soff, sbap, 0);
/*
* If the block range spans two block maps, get the second map.
*/
@ -243,12 +233,15 @@ return ENOSPC;
panic("ext2_reallocblk: start == end");
#endif
ssize = len - (idp->in_off + 1);
if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &ebp)){
EXT2_UNLOCK(ump);
if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &ebp))
goto fail;
}
ebap = (int32_t *)ebp->b_data;
}
/*
* Find the preferred location for the cluster.
*/
EXT2_LOCK(ump);
pref = ext2_blkpref(ip, start_lbn, soff, sbap, 0);
/*
* Search the block map looking for an allocation of the desired size.
*/
@ -264,15 +257,23 @@ return ENOSPC;
* block pointers in the inode and indirect blocks associated
* with the file.
*/
#ifdef DEBUG
printf("realloc: ino %d, lbns %jd-%jd\n\told:", ip->i_number,
(intmax_t)start_lbn, (intmax_t)end_lbn);
#endif /* DEBUG */
blkno = newblk;
for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->e2fs_fpb) {
if (i == ssize)
if (i == ssize) {
bap = ebap;
soff = -i;
}
#ifdef DIAGNOSTIC
if (buflist->bs_children[i]->b_blkno != fsbtodb(fs, *bap))
panic("ext2_reallocblks: alloc mismatch");
#endif
#ifdef DEBUG
printf(" %d,", *bap);
#endif /* DEBUG */
*bap++ = blkno;
}
/*
@ -308,11 +309,20 @@ return ENOSPC;
/*
* Last, free the old blocks and assign the new blocks to the buffers.
*/
#ifdef DEBUG
printf("\n\tnew:");
#endif /* DEBUG */
for (blkno = newblk, i = 0; i < len; i++, blkno += fs->e2fs_fpb) {
ext2_blkfree(ip, dbtofsb(fs, buflist->bs_children[i]->b_blkno),
fs->e2fs_bsize);
buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno);
#ifdef DEBUG
printf(" %d,", blkno);
#endif /* DEBUG */
}
#ifdef DEBUG
printf("\n");
#endif /* DEBUG */
return (0);
fail:
@ -321,8 +331,6 @@ fail:
if (sbap != &ip->i_db[0])
brelse(sbp);
return (ENOSPC);
#endif /* FANCY_REALLOC */
}
/*
@ -747,6 +755,7 @@ gotit:
#endif
setbit(bbp, bno);
EXT2_LOCK(ump);
ext2_clusteracct(fs, bbp, cg, bno, -1);
fs->e2fs->e2fs_fbcount--;
fs->e2fs_gd[cg].ext2bgd_nbfree--;
fs->e2fs_fmod = 1;
@ -755,6 +764,116 @@ gotit:
return (cg * fs->e2fs->e2fs_fpg + fs->e2fs->e2fs_first_dblock + bno);
}
/*
* Determine whether a cluster can be allocated.
*/
static daddr_t
ext2_clusteralloc(struct inode *ip, int cg, daddr_t bpref, int len)
{
struct m_ext2fs *fs;
struct ext2mount *ump;
struct buf *bp;
char *bbp;
int bit, error, got, i, loc, run;
int32_t *lp;
daddr_t bno;
fs = ip->i_e2fs;
ump = ip->i_ump;
if (fs->e2fs_maxcluster[cg] < len)
return (0);
EXT2_UNLOCK(ump);
error = bread(ip->i_devvp,
fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_b_bitmap),
(int)fs->e2fs_bsize, NOCRED, &bp);
if (error)
goto fail_lock;
bbp = (char *)bp->b_data;
bp->b_xflags |= BX_BKGRDWRITE;
EXT2_LOCK(ump);
/*
* Check to see if a cluster of the needed size (or bigger) is
* available in this cylinder group.
*/
lp = &fs->e2fs_clustersum[cg].cs_sum[len];
for (i = len; i <= fs->e2fs_contigsumsize; i++)
if (*lp++ > 0)
break;
if (i > fs->e2fs_contigsumsize) {
/*
* Update the cluster summary information to reflect
* the true maximum-sized cluster so that future cluster
* allocation requests can avoid reading the bitmap only
* to find no cluster.
*/
lp = &fs->e2fs_clustersum[cg].cs_sum[len - 1];
for (i = len - 1; i > 0; i--)
if (*lp-- > 0)
break;
fs->e2fs_maxcluster[cg] = i;
goto fail;
}
EXT2_UNLOCK(ump);
/* Search the bitmap to find a big enough cluster like in FFS. */
if (dtog(fs, bpref) != cg)
bpref = 0;
if (bpref != 0)
bpref = dtogd(fs, bpref);
loc = bpref / NBBY;
bit = 1 << (bpref % NBBY);
for (run = 0, got = bpref; got < fs->e2fs->e2fs_fpg; got++) {
if ((bbp[loc] & bit) != 0)
run = 0;
else {
run++;
if (run == len)
break;
}
if ((got & (NBBY - 1)) != (NBBY - 1))
bit <<= 1;
else {
loc++;
bit = 1;
}
}
if (got >= fs->e2fs->e2fs_fpg)
goto fail_lock;
/* Allocate the cluster that we found. */
for (i = 1; i < len; i++)
if (!isclr(bbp, got - run + i))
panic("ext2_clusteralloc: map mismatch");
bno = got - run + 1;
if (bno >= fs->e2fs->e2fs_fpg)
panic("ext2_clusteralloc: allocated out of group");
EXT2_LOCK(ump);
for (i = 0; i < len; i += fs->e2fs_fpb) {
setbit(bbp, bno + i);
ext2_clusteracct(fs, bbp, cg, bno + i, -1);
fs->e2fs->e2fs_fbcount--;
fs->e2fs_gd[cg].ext2bgd_nbfree--;
}
fs->e2fs_fmod = 1;
EXT2_UNLOCK(ump);
bdwrite(bp);
return (cg * fs->e2fs->e2fs_fpg + fs->e2fs->e2fs_first_dblock + bno);
fail_lock:
EXT2_LOCK(ump);
fail:
brelse(bp);
return (0);
}
/*
* Determine whether an inode can be allocated.
*
@ -877,6 +996,7 @@ ext2_blkfree(ip, bno, size)
}
clrbit(bbp, bno);
EXT2_LOCK(ump);
ext2_clusteracct(fs, bbp, cg, bno, 1);
fs->e2fs->e2fs_fbcount++;
fs->e2fs_gd[cg].ext2bgd_nbfree++;
fs->e2fs_fmod = 1;

View File

@ -55,12 +55,13 @@ void ext2_blkfree(struct inode *, int32_t, long);
int32_t ext2_blkpref(struct inode *, int32_t, int, int32_t *, int32_t);
int ext2_bmap(struct vop_bmap_args *);
int ext2_bmaparray(struct vnode *, int32_t, int32_t *, int *, int *);
void ext2_clusteracct(struct m_ext2fs *, char *, int, daddr_t, int);
void ext2_dirbad(struct inode *ip, doff_t offset, char *how);
void ext2_ei2i(struct ext2fs_dinode *, struct inode *);
int ext2_getlbns(struct vnode *, int32_t, struct indir *, int *);
void ext2_i2ei(struct inode *, struct ext2fs_dinode *);
void ext2_itimes(struct vnode *vp);
int ext2_reallocblks(struct vop_reallocblks_args *);
void ext2_itimes(struct vnode *vp);
int ext2_reclaim(struct vop_reclaim_args *);
void ext2_setblock(struct m_ext2fs *, u_char *, int32_t);
int ext2_truncate(struct vnode *, off_t, int, struct ucred *, struct thread *);

View File

@ -120,3 +120,107 @@ ext2_checkoverlap(bp, ip)
}
}
#endif /* KDB */
/*
* Update the cluster map because of an allocation of free like ffs.
*
* Cnt == 1 means free; cnt == -1 means allocating.
*/
void
ext2_clusteracct(struct m_ext2fs *fs, char *bbp, int cg, daddr_t bno, int cnt)
{
int32_t *sump = fs->e2fs_clustersum[cg].cs_sum;
int32_t *lp;
int back, bit, end, forw, i, loc, start;
/* Initialize the cluster summary array. */
if (fs->e2fs_clustersum[cg].cs_init == 0) {
int run = 0;
bit = 1;
loc = 0;
for (i = 0; i < fs->e2fs->e2fs_fpg; i++) {
if ((bbp[loc] & bit) == 0)
run++;
else if (run != 0) {
if (run > fs->e2fs_contigsumsize)
run = fs->e2fs_contigsumsize;
sump[run]++;
run = 0;
}
if ((i & (NBBY - 1)) != (NBBY - 1))
bit <<= 1;
else {
loc++;
bit = 1;
}
}
if (run != 0) {
if (run > fs->e2fs_contigsumsize)
run = fs->e2fs_contigsumsize;
sump[run]++;
}
fs->e2fs_clustersum[cg].cs_init = 1;
}
if (fs->e2fs_contigsumsize <= 0)
return;
/* Find the size of the cluster going forward. */
start = bno + 1;
end = start + fs->e2fs_contigsumsize;
if (end > fs->e2fs->e2fs_fpg)
end = fs->e2fs->e2fs_fpg;
loc = start / NBBY;
bit = 1 << (start % NBBY);
for (i = start; i < end; i++) {
if ((bbp[loc] & bit) != 0)
break;
if ((i & (NBBY - 1)) != (NBBY - 1))
bit <<= 1;
else {
loc++;
bit = 1;
}
}
forw = i - start;
/* Find the size of the cluster going backward. */
start = bno - 1;
end = start - fs->e2fs_contigsumsize;
if (end < 0)
end = -1;
loc = start / NBBY;
bit = 1 << (start % NBBY);
for (i = start; i > end; i--) {
if ((bbp[loc] & bit) != 0)
break;
if ((i & (NBBY - 1)) != 0)
bit >>= 1;
else {
loc--;
bit = 1 << (NBBY - 1);
}
}
back = start - i;
/*
* Account for old cluster and the possibly new forward and
* back clusters.
*/
i = back + forw + 1;
if (i > fs->e2fs_contigsumsize)
i = fs->e2fs_contigsumsize;
sump[i] += cnt;
if (back > 0)
sump[back] -= cnt;
if (forw > 0)
sump[forw] -= cnt;
/* Update cluster summary information. */
lp = &sump[fs->e2fs_contigsumsize];
for (i = fs->e2fs_contigsumsize; i > 0; i--)
if (*lp-- > 0)
break;
fs->e2fs_maxcluster[cg] = i;
}

View File

@ -405,7 +405,7 @@ compute_sb_data(struct vnode *devvp, struct ext2fs *es,
* Things to do to update the mount:
* 1) invalidate all cached meta-data.
* 2) re-read superblock from disk.
* 3) re-read summary information from disk.
* 3) invalidate all cluster summary information.
* 4) invalidate all inactive vnodes.
* 5) invalidate all cached file data.
* 6) re-read inode data for all active vnodes.
@ -419,7 +419,9 @@ ext2_reload(struct mount *mp, struct thread *td)
struct buf *bp;
struct ext2fs *es;
struct m_ext2fs *fs;
int error;
struct csum *sump;
int error, i;
int32_t *lp;
if ((mp->mnt_flag & MNT_RDONLY) == 0)
return (EINVAL);
@ -456,6 +458,19 @@ ext2_reload(struct mount *mp, struct thread *td)
#endif
brelse(bp);
/*
* Step 3: invalidate all cluster summary information.
*/
if (fs->e2fs_contigsumsize > 0) {
lp = fs->e2fs_maxcluster;
sump = fs->e2fs_clustersum;
for (i = 0; i < fs->e2fs_gcount; i++, sump++) {
*lp++ = fs->e2fs_contigsumsize;
sump->cs_init = 0;
bzero(sump->cs_sum, fs->e2fs_contigsumsize + 1);
}
}
loop:
MNT_ILOCK(mp);
MNT_VNODE_FOREACH(vp, mp, mvp) {
@ -511,8 +526,11 @@ ext2_mountfs(struct vnode *devvp, struct mount *mp)
struct cdev *dev = devvp->v_rdev;
struct g_consumer *cp;
struct bufobj *bo;
struct csum *sump;
int error;
int ronly;
int i, size;
int32_t *lp;
ronly = vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0);
/* XXX: use VOP_ACESS to check FS perms */
@ -582,6 +600,33 @@ ext2_mountfs(struct vnode *devvp, struct mount *mp)
if ((error = compute_sb_data(devvp, ump->um_e2fs->e2fs, ump->um_e2fs)))
goto out;
/*
* Calculate the maximum contiguous blocks and size of cluster summary
* array. In FFS this is done by newfs; however the superblock in
* ext2fs doesn't have these variables so we just can calculate
* them here.
*/
ump->um_e2fs->e2fs_maxcontig = MAX(1, MAXPHYS / ump->um_e2fs->e2fs_bsize);
if (ump->um_e2fs->e2fs_maxcontig > 0)
ump->um_e2fs->e2fs_contigsumsize =
MIN(ump->um_e2fs->e2fs_maxcontig, EXT2_MAXCONTIG);
else
ump->um_e2fs->e2fs_contigsumsize = 0;
if (ump->um_e2fs->e2fs_contigsumsize > 0) {
size = ump->um_e2fs->e2fs_gcount * sizeof(int32_t);
ump->um_e2fs->e2fs_maxcluster = malloc(size, M_EXT2MNT, M_WAITOK);
size = ump->um_e2fs->e2fs_gcount * sizeof(struct csum);
ump->um_e2fs->e2fs_clustersum = malloc(size, M_EXT2MNT, M_WAITOK);
lp = ump->um_e2fs->e2fs_maxcluster;
sump = ump->um_e2fs->e2fs_clustersum;
for (i = 0; i < ump->um_e2fs->e2fs_gcount; i++, sump++) {
*lp++ = ump->um_e2fs->e2fs_contigsumsize;
sump->cs_init = 0;
sump->cs_sum = malloc((ump->um_e2fs->e2fs_contigsumsize + 1) *
sizeof(int32_t), M_EXT2MNT, M_WAITOK | M_ZERO);
}
}
brelse(bp);
bp = NULL;
fs = ump->um_e2fs;
@ -656,7 +701,8 @@ ext2_unmount(struct mount *mp, int mntflags)
{
struct ext2mount *ump;
struct m_ext2fs *fs;
int error, flags, ronly;
struct csum *sump;
int error, flags, i, ronly;
flags = 0;
if (mntflags & MNT_FORCE) {
@ -681,6 +727,11 @@ ext2_unmount(struct mount *mp, int mntflags)
g_topology_unlock();
PICKUP_GIANT();
vrele(ump->um_devvp);
sump = fs->e2fs_clustersum;
for (i = 0; i < fs->e2fs_gcount; i++, sump++)
free(sump->cs_sum, M_EXT2MNT);
free(fs->e2fs_clustersum, M_EXT2MNT);
free(fs->e2fs_maxcluster, M_EXT2MNT);
free(fs->e2fs_gd, M_EXT2MNT);
free(fs->e2fs_contigdirs, M_EXT2MNT);
free(fs->e2fs, M_EXT2MNT);

View File

@ -44,6 +44,16 @@
*/
#define EXT2_LINK_MAX 32000
/*
* A summary of contiguous blocks of various sizes is maintained
* in each cylinder group. Normally this is set by the initial
* value of fs_maxcontig.
*
* XXX:FS_MAXCONTIG is set to 16 to conserve space. Here we set
* EXT2_MAXCONTIG to 32 for better performance.
*/
#define EXT2_MAXCONTIG 32
/*
* Constants relative to the data blocks
*/
@ -151,6 +161,10 @@ struct m_ext2fs {
char e2fs_wasvalid; /* valid at mount time */
off_t e2fs_maxfilesize;
struct ext2_gd *e2fs_gd; /* Group Descriptors */
int32_t e2fs_maxcontig; /* max number of contiguous blks */
int32_t e2fs_contigsumsize; /* size of cluster summary array */
int32_t *e2fs_maxcluster; /* max cluster in each cyl group */
struct csum *e2fs_clustersum; /* cluster summary in each cyl group */
};
/*
@ -253,6 +267,13 @@ struct ext2_gd {
uint32_t reserved2[3];
};
/* cluster summary information */
struct csum {
int8_t cs_init; /* cluster summary has been initialized */
int32_t *cs_sum; /* cluster summary array */
};
/* EXT2FS metadatas are stored in little-endian byte order. These macros
* helps reading these metadatas
*/