Put in place the framework for consolodating contiguous blocks into

a smaller number of larger TRIM requests. The hope had been to have
the full TRIM consolodation in place for 12.0, but the algorithms
are still under development and need further testing. With this
framework in place it will be possible to easily add TRIM consolodation
once the optimal strategy has been found.

The only functional change with this patch is the elimination of TRIM
requests for blocks that are freed before they have been likely to
have been written.

Reviewed by: kib
Discussed with: Warner Losh and Chuck Silvers
Sponsored by: Netflix
This commit is contained in:
mckusick 2018-08-06 21:09:11 +00:00
parent b7d0a24dba
commit e7f491b70c
9 changed files with 242 additions and 67 deletions

View File

@ -110,8 +110,6 @@ static ufs2_daddr_t
static void ffs_blkfree_cg(struct ufsmount *, struct fs *,
struct vnode *, ufs2_daddr_t, long, ino_t,
struct workhead *);
static void ffs_blkfree_trim_completed(struct buf *);
static void ffs_blkfree_trim_task(void *ctx, int pending __unused);
#ifdef INVARIANTS
static int ffs_checkblk(struct inode *, ufs2_daddr_t, long);
#endif
@ -395,8 +393,23 @@ retry:
if (bno > 0) {
bp->b_blkno = fsbtodb(fs, bno);
if (!DOINGSOFTDEP(vp))
/*
* The usual case is that a smaller fragment that
* was just allocated has been replaced with a bigger
* fragment or a full-size block. If it is marked as
* B_DELWRI, the current contents have not been written
* to disk. It is possible that the block was written
* earlier, but very uncommon. If the block has never
* been written, there is no need to send a BIO_DELETE
* for it when it is freed. The gain from avoiding the
* TRIMs for the common case of unwritten blocks far
* exceeds the cost of the write amplification for the
* uncommon case of failing to send a TRIM for a block
* that had been written.
*/
ffs_blkfree(ump, fs, ump->um_devvp, bprev, (long)osize,
ip->i_number, vp->v_type, NULL);
ip->i_number, vp->v_type, NULL,
(bp->b_flags & B_DELWRI) != 0 ? NOTRIM : SINGLETON);
delta = btodb(nsize - osize);
DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + delta);
if (flags & IO_EXT)
@ -521,7 +534,7 @@ ffs_reallocblks_ufs1(ap)
struct fs *fs;
struct inode *ip;
struct vnode *vp;
struct buf *sbp, *ebp;
struct buf *sbp, *ebp, *bp;
ufs1_daddr_t *bap, *sbap, *ebap;
struct cluster_save *buflist;
struct ufsmount *ump;
@ -730,14 +743,29 @@ ffs_reallocblks_ufs1(ap)
printf("\n\tnew:");
#endif
for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) {
bp = buflist->bs_children[i];
if (!DOINGSOFTDEP(vp))
/*
* The usual case is that a set of N-contiguous blocks
* that was just allocated has been replaced with a
* set of N+1-contiguous blocks. If they are marked as
* B_DELWRI, the current contents have not been written
* to disk. It is possible that the blocks were written
* earlier, but very uncommon. If the blocks have never
* been written, there is no need to send a BIO_DELETE
* for them when they are freed. The gain from avoiding
* the TRIMs for the common case of unwritten blocks
* far exceeds the cost of the write amplification for
* the uncommon case of failing to send a TRIM for the
* blocks that had been written.
*/
ffs_blkfree(ump, fs, ump->um_devvp,
dbtofsb(fs, buflist->bs_children[i]->b_blkno),
fs->fs_bsize, ip->i_number, vp->v_type, NULL);
buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno);
dbtofsb(fs, bp->b_blkno),
fs->fs_bsize, ip->i_number, vp->v_type, NULL,
(bp->b_flags & B_DELWRI) != 0 ? NOTRIM : SINGLETON);
bp->b_blkno = fsbtodb(fs, blkno);
#ifdef INVARIANTS
if (!ffs_checkblk(ip,
dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
if (!ffs_checkblk(ip, dbtofsb(fs, bp->b_blkno), fs->fs_bsize))
panic("ffs_reallocblks: unallocated block 3");
#endif
#ifdef DEBUG
@ -771,7 +799,7 @@ ffs_reallocblks_ufs2(ap)
struct fs *fs;
struct inode *ip;
struct vnode *vp;
struct buf *sbp, *ebp;
struct buf *sbp, *ebp, *bp;
ufs2_daddr_t *bap, *sbap, *ebap;
struct cluster_save *buflist;
struct ufsmount *ump;
@ -978,14 +1006,29 @@ ffs_reallocblks_ufs2(ap)
printf("\n\tnew:");
#endif
for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) {
bp = buflist->bs_children[i];
if (!DOINGSOFTDEP(vp))
/*
* The usual case is that a set of N-contiguous blocks
* that was just allocated has been replaced with a
* set of N+1-contiguous blocks. If they are marked as
* B_DELWRI, the current contents have not been written
* to disk. It is possible that the blocks were written
* earlier, but very uncommon. If the blocks have never
* been written, there is no need to send a BIO_DELETE
* for them when they are freed. The gain from avoiding
* the TRIMs for the common case of unwritten blocks
* far exceeds the cost of the write amplification for
* the uncommon case of failing to send a TRIM for the
* blocks that had been written.
*/
ffs_blkfree(ump, fs, ump->um_devvp,
dbtofsb(fs, buflist->bs_children[i]->b_blkno),
fs->fs_bsize, ip->i_number, vp->v_type, NULL);
buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno);
dbtofsb(fs, bp->b_blkno),
fs->fs_bsize, ip->i_number, vp->v_type, NULL,
(bp->b_flags & B_DELWRI) != 0 ? NOTRIM : SINGLETON);
bp->b_blkno = fsbtodb(fs, blkno);
#ifdef INVARIANTS
if (!ffs_checkblk(ip,
dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
if (!ffs_checkblk(ip, dbtofsb(fs, bp->b_blkno), fs->fs_bsize))
panic("ffs_reallocblks: unallocated block 3");
#endif
#ifdef DEBUG
@ -1823,8 +1866,7 @@ gotit:
/* XXX Fixme. */
UFS_UNLOCK(ump);
if (DOINGSOFTDEP(ITOV(ip)))
softdep_setup_blkmapdep(bp, UFSTOVFS(ump), blkno,
size, 0);
softdep_setup_blkmapdep(bp, UFSTOVFS(ump), blkno, size, 0);
UFS_LOCK(ump);
return (blkno);
}
@ -2254,6 +2296,17 @@ ffs_blkfree_cg(ump, fs, devvp, bno, size, inum, dephd)
bdwrite(bp);
}
/*
* Structures and routines associated with trim management.
*/
MALLOC_DEFINE(M_TRIM, "ufs_trim", "UFS trim structures");
#define TRIMLIST_HASH(ump, inum) \
(&(ump)->um_trimhash[(inum) & (ump)->um_trimlisthashsize])
static void ffs_blkfree_trim_completed(struct buf *);
static void ffs_blkfree_trim_task(void *ctx, int pending __unused);
struct ffs_blkfree_trim_params {
struct task task;
struct ufsmount *ump;
@ -2277,7 +2330,7 @@ ffs_blkfree_trim_task(ctx, pending)
tp->inum, tp->pdephd);
vn_finished_secondary_write(UFSTOVFS(tp->ump));
atomic_add_int(&tp->ump->um_trim_inflight, -1);
free(tp, M_TEMP);
free(tp, M_TRIM);
}
static void
@ -2287,13 +2340,13 @@ ffs_blkfree_trim_completed(bp)
struct ffs_blkfree_trim_params *tp;
tp = bp->b_fsprivate1;
free(bp, M_TEMP);
free(bp, M_TRIM);
TASK_INIT(&tp->task, 0, ffs_blkfree_trim_task, tp);
taskqueue_enqueue(tp->ump->um_trim_tq, &tp->task);
}
void
ffs_blkfree(ump, fs, devvp, bno, size, inum, vtype, dephd)
ffs_blkfree(ump, fs, devvp, bno, size, inum, vtype, dephd, trimtype)
struct ufsmount *ump;
struct fs *fs;
struct vnode *devvp;
@ -2302,6 +2355,7 @@ ffs_blkfree(ump, fs, devvp, bno, size, inum, vtype, dephd)
ino_t inum;
enum vtype vtype;
struct workhead *dephd;
int trimtype;
{
struct mount *mp;
struct buf *bp;
@ -2319,10 +2373,11 @@ ffs_blkfree(ump, fs, devvp, bno, size, inum, vtype, dephd)
return;
}
/*
* Nothing to delay if TRIM is disabled, or the operation is
* performed on the snapshot.
* Nothing to delay if TRIM is not required for this block or TRIM
* is disabled or the operation is performed on a snapshot.
*/
if (((ump->um_flags) & UM_CANDELETE) == 0 || devvp->v_type == VREG) {
if (trimtype == NOTRIM || ((ump->um_flags & UM_CANDELETE) == 0) ||
devvp->v_type == VREG) {
ffs_blkfree_cg(ump, fs, devvp, bno, size, inum, dephd);
return;
}
@ -2334,7 +2389,7 @@ ffs_blkfree(ump, fs, devvp, bno, size, inum, vtype, dephd)
* and write some new data into it.
*/
atomic_add_int(&ump->um_trim_inflight, 1);
tp = malloc(sizeof(struct ffs_blkfree_trim_params), M_TEMP, M_WAITOK);
tp = malloc(sizeof(struct ffs_blkfree_trim_params), M_TRIM, M_WAITOK);
tp->ump = ump;
tp->devvp = devvp;
tp->bno = bno;
@ -2347,7 +2402,7 @@ ffs_blkfree(ump, fs, devvp, bno, size, inum, vtype, dephd)
} else
tp->pdephd = NULL;
bp = malloc(sizeof(*bp), M_TEMP, M_WAITOK | M_ZERO);
bp = malloc(sizeof(*bp), M_TRIM, M_WAITOK | M_ZERO);
bp->b_iocmd = BIO_DELETE;
bp->b_iooffset = dbtob(fsbtodb(fs, bno));
bp->b_iodone = ffs_blkfree_trim_completed;
@ -2824,7 +2879,7 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS)
long blkcnt, blksize;
struct file *fp, *vfp;
cap_rights_t rights;
int filetype, error;
int filetype, trimtype, error;
static struct fileops *origops, bufferedops;
if (req->newlen > sizeof cmd)
@ -2956,14 +3011,17 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS)
blkno = cmd.value;
blkcnt = cmd.size;
blksize = fs->fs_frag - (blkno % fs->fs_frag);
trimtype = (blksize < blkcnt) ? STARTFREE : SINGLETON;
while (blkcnt > 0) {
if (blksize > blkcnt)
blksize = blkcnt;
ffs_blkfree(ump, fs, ump->um_devvp, blkno,
blksize * fs->fs_fsize, UFS_ROOTINO, VDIR, NULL);
blksize * fs->fs_fsize, UFS_ROOTINO,
VDIR, NULL, trimtype);
blkno += blksize;
blkcnt -= blksize;
blksize = fs->fs_frag;
trimtype = (blksize < blkcnt) ? CONTINUEFREE : ENDFREE;
}
break;

View File

@ -553,7 +553,7 @@ fail:
lbns_remfree++;
#endif
ffs_blkfree(ump, fs, ump->um_devvp, *blkp, fs->fs_bsize,
ip->i_number, vp->v_type, NULL);
ip->i_number, vp->v_type, NULL, SINGLETON);
}
return (error);
}
@ -1147,7 +1147,7 @@ fail:
lbns_remfree++;
#endif
ffs_blkfree(ump, fs, ump->um_devvp, *blkp, fs->fs_bsize,
ip->i_number, vp->v_type, NULL);
ip->i_number, vp->v_type, NULL, SINGLETON);
}
return (error);
}

View File

@ -63,7 +63,7 @@ int ffs_balloc_ufs2(struct vnode *a_vp, off_t a_startoffset, int a_size,
struct ucred *a_cred, int a_flags, struct buf **a_bpp);
int ffs_blkatoff(struct vnode *, off_t, char **, struct buf **);
void ffs_blkfree(struct ufsmount *, struct fs *, struct vnode *,
ufs2_daddr_t, long, ino_t, enum vtype, struct workhead *);
ufs2_daddr_t, long, ino_t, enum vtype, struct workhead *, int);
ufs2_daddr_t ffs_blkpref_ufs1(struct inode *, ufs_lbn_t, int, ufs1_daddr_t *);
ufs2_daddr_t ffs_blkpref_ufs2(struct inode *, ufs_lbn_t, int, ufs2_daddr_t *);
int ffs_checkfreefile(struct fs *, struct vnode *, ino_t);
@ -111,11 +111,28 @@ vfs_vget_t ffs_vget;
int ffs_vgetf(struct mount *, ino_t, int, struct vnode **, int);
void process_deferred_inactive(struct mount *mp);
/*
* Flags to ffs_vgetf
*/
#define FFSV_FORCEINSMQ 0x0001
/*
* Flags to ffs_reload
*/
#define FFSR_FORCE 0x0001
#define FFSR_UNSUSPEND 0x0002
/*
* Trim type to ffs_blkfree - used to help with BIO_DELETE (trim) requests
*/
#define NOTRIM 1 /* never written, so don't call trim for it */
#define SINGLETON 2 /* only block being freed, so trim it now */
#define STARTFREE 3 /* beginning to free for this inum */
#define CONTINUEFREE 4 /* additional block free for this inum */
#define ENDFREE 5 /* last block to free for this inum */
#define MAXTRIMIO 1024 /* maximum expected outstanding trim requests */
extern struct vop_vector ffs_vnodeops1;
extern struct vop_vector ffs_fifoops1;
extern struct vop_vector ffs_vnodeops2;

View File

@ -195,7 +195,7 @@ ffs_truncate(vp, length, flags, cred)
struct ufsmount *ump;
int softdeptrunc, journaltrunc;
int needextclean, extblocks;
int offset, size, level, nblocks;
int trimtype, firstfree, offset, size, level, nblocks;
int i, error, allerror, indiroff, waitforupdate;
off_t osize;
@ -275,7 +275,7 @@ ffs_truncate(vp, length, flags, cred)
continue;
ffs_blkfree(ump, fs, ITODEVVP(ip), oldblks[i],
sblksize(fs, osize, i), ip->i_number,
vp->v_type, NULL);
vp->v_type, NULL, SINGLETON);
}
}
}
@ -523,7 +523,7 @@ ffs_truncate(vp, length, flags, cred)
DIP_SET(ip, i_ib[level], 0);
ffs_blkfree(ump, fs, ump->um_devvp, bn,
fs->fs_bsize, ip->i_number,
vp->v_type, NULL);
vp->v_type, NULL, SINGLETON);
blocksreleased += nblocks;
}
}
@ -534,6 +534,7 @@ ffs_truncate(vp, length, flags, cred)
/*
* All whole direct blocks or frags.
*/
firstfree = 1;
for (i = UFS_NDADDR - 1; i > lastblock; i--) {
long bsize;
@ -542,8 +543,23 @@ ffs_truncate(vp, length, flags, cred)
continue;
DIP_SET(ip, i_db[i], 0);
bsize = blksize(fs, ip, i);
if (firstfree) {
if (i - 1 == lastblock || DIP(ip, i_db[i - 1]) == 0) {
trimtype = SINGLETON;
} else {
trimtype = STARTFREE;
firstfree = 0;
}
} else {
if (i - 1 == lastblock || DIP(ip, i_db[i - 1]) == 0) {
trimtype = ENDFREE;
firstfree = 1;
} else {
trimtype = CONTINUEFREE;
}
}
ffs_blkfree(ump, fs, ump->um_devvp, bn, bsize, ip->i_number,
vp->v_type, NULL);
vp->v_type, NULL, trimtype);
blocksreleased += btodb(bsize);
}
if (lastblock < 0)
@ -575,7 +591,8 @@ ffs_truncate(vp, length, flags, cred)
*/
bn += numfrags(fs, newspace);
ffs_blkfree(ump, fs, ump->um_devvp, bn,
oldspace - newspace, ip->i_number, vp->v_type, NULL);
oldspace - newspace, ip->i_number, vp->v_type,
NULL, SINGLETON);
blocksreleased += btodb(oldspace - newspace);
}
}
@ -636,7 +653,7 @@ ffs_indirtrunc(ip, lbn, dbn, lastbn, level, countp)
struct fs *fs;
struct vnode *vp;
caddr_t copy = NULL;
int i, nblocks, error = 0, allerror = 0;
int i, trimtype, nblocks, firstfree, error = 0, allerror = 0;
ufs2_daddr_t nb, nlbn, last;
ufs2_daddr_t blkcount, factor, blocksreleased = 0;
ufs1_daddr_t *bap1 = NULL;
@ -719,6 +736,7 @@ ffs_indirtrunc(ip, lbn, dbn, lastbn, level, countp)
/*
* Recursively free totally unused blocks.
*/
firstfree = 1;
for (i = NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last;
i--, nlbn += factor) {
nb = BAP(ip, i);
@ -730,8 +748,23 @@ ffs_indirtrunc(ip, lbn, dbn, lastbn, level, countp)
allerror = error;
blocksreleased += blkcount;
}
if (firstfree) {
if (i - 1 == last || BAP(ip, i - 1) == 0) {
trimtype = SINGLETON;
} else {
trimtype = STARTFREE;
firstfree = 0;
}
} else {
if (i - 1 == last || BAP(ip, i - 1) == 0) {
trimtype = ENDFREE;
firstfree = 1;
} else {
trimtype = CONTINUEFREE;
}
}
ffs_blkfree(ITOUMP(ip), fs, ITODEVVP(ip), nb, fs->fs_bsize,
ip->i_number, vp->v_type, NULL);
ip->i_number, vp->v_type, NULL, trimtype);
blocksreleased += nblocks;
}

View File

@ -583,7 +583,7 @@ loop:
if (len != 0 && len < fs->fs_bsize) {
ffs_blkfree(ump, copy_fs, vp,
DIP(xp, i_db[loc]), len, xp->i_number,
xvp->v_type, NULL);
xvp->v_type, NULL, SINGLETON);
blkno = DIP(xp, i_db[loc]);
DIP_SET(xp, i_db[loc], 0);
}
@ -1265,7 +1265,7 @@ mapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, expungetype)
if (blkno == BLK_SNAP)
blkno = blkstofrags(fs, lblkno);
ffs_blkfree(ITOUMP(ip), fs, vp, blkno, fs->fs_bsize, inum,
vp->v_type, NULL);
vp->v_type, NULL, SINGLETON);
}
return (0);
}
@ -1549,7 +1549,7 @@ mapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, expungetype)
if (blkno == BLK_SNAP)
blkno = blkstofrags(fs, lblkno);
ffs_blkfree(ITOUMP(ip), fs, vp, blkno, fs->fs_bsize, inum,
vp->v_type, NULL);
vp->v_type, NULL, SINGLETON);
}
return (0);
}

View File

@ -869,7 +869,7 @@ static void cancel_allocdirect(struct allocdirectlst *,
struct allocdirect *, struct freeblks *);
static int check_inode_unwritten(struct inodedep *);
static int free_inodedep(struct inodedep *);
static void freework_freeblock(struct freework *);
static void freework_freeblock(struct freework *, int);
static void freework_enqueue(struct freework *);
static int handle_workitem_freeblocks(struct freeblks *, int);
static int handle_complete_freeblocks(struct freeblks *, int);
@ -884,7 +884,7 @@ static struct allocindir *newallocindir(struct inode *, int, ufs2_daddr_t,
ufs2_daddr_t, ufs_lbn_t);
static void handle_workitem_freefrag(struct freefrag *);
static struct freefrag *newfreefrag(struct inode *, ufs2_daddr_t, long,
ufs_lbn_t);
ufs_lbn_t, int);
static void allocdirect_merge(struct allocdirectlst *,
struct allocdirect *, struct allocdirect *);
static struct freefrag *allocindir_merge(struct allocindir *,
@ -5289,7 +5289,22 @@ softdep_setup_allocdirect(ip, off, newblkno, oldblkno, newsize, oldsize, bp)
KASSERT(MOUNTEDSOFTDEP(mp) != 0,
("softdep_setup_allocdirect called on non-softdep filesystem"));
if (oldblkno && oldblkno != newblkno)
freefrag = newfreefrag(ip, oldblkno, oldsize, lbn);
/*
* The usual case is that a smaller fragment that
* was just allocated has been replaced with a bigger
* fragment or a full-size block. If it is marked as
* B_DELWRI, the current contents have not been written
* to disk. It is possible that the block was written
* earlier, but very uncommon. If the block has never
* been written, there is no need to send a BIO_DELETE
* for it when it is freed. The gain from avoiding the
* TRIMs for the common case of unwritten blocks far
* exceeds the cost of the write amplification for the
* uncommon case of failing to send a TRIM for a block
* that had been written.
*/
freefrag = newfreefrag(ip, oldblkno, oldsize, lbn,
(bp->b_flags & B_DELWRI) != 0 ? NOTRIM : SINGLETON);
else
freefrag = NULL;
@ -5566,11 +5581,12 @@ newjfreefrag(freefrag, ip, blkno, size, lbn)
* Allocate a new freefrag structure.
*/
static struct freefrag *
newfreefrag(ip, blkno, size, lbn)
newfreefrag(ip, blkno, size, lbn, trimtype)
struct inode *ip;
ufs2_daddr_t blkno;
long size;
ufs_lbn_t lbn;
int trimtype;
{
struct freefrag *freefrag;
struct ufsmount *ump;
@ -5591,6 +5607,7 @@ newfreefrag(ip, blkno, size, lbn)
freefrag->ff_vtype = ITOV(ip)->v_type;
freefrag->ff_blkno = blkno;
freefrag->ff_fragsize = size;
freefrag->ff_trimtype = trimtype;
if (MOUNTEDSUJ(UFSTOVFS(ump))) {
freefrag->ff_jdep = (struct worklist *)
@ -5636,7 +5653,8 @@ handle_workitem_freefrag(freefrag)
}
FREE_LOCK(ump);
ffs_blkfree(ump, ump->um_fs, ump->um_devvp, freefrag->ff_blkno,
freefrag->ff_fragsize, freefrag->ff_inum, freefrag->ff_vtype, &wkhd);
freefrag->ff_fragsize, freefrag->ff_inum, freefrag->ff_vtype, &wkhd,
freefrag->ff_trimtype);
ACQUIRE_LOCK(ump);
WORKITEM_FREE(freefrag, D_FREEFRAG);
FREE_LOCK(ump);
@ -5676,7 +5694,22 @@ softdep_setup_allocext(ip, off, newblkno, oldblkno, newsize, oldsize, bp)
lbn = bp->b_lblkno;
if (oldblkno && oldblkno != newblkno)
freefrag = newfreefrag(ip, oldblkno, oldsize, lbn);
/*
* The usual case is that a smaller fragment that
* was just allocated has been replaced with a bigger
* fragment or a full-size block. If it is marked as
* B_DELWRI, the current contents have not been written
* to disk. It is possible that the block was written
* earlier, but very uncommon. If the block has never
* been written, there is no need to send a BIO_DELETE
* for it when it is freed. The gain from avoiding the
* TRIMs for the common case of unwritten blocks far
* exceeds the cost of the write amplification for the
* uncommon case of failing to send a TRIM for a block
* that had been written.
*/
freefrag = newfreefrag(ip, oldblkno, oldsize, lbn,
(bp->b_flags & B_DELWRI) != 0 ? NOTRIM : SINGLETON);
else
freefrag = NULL;
@ -5789,7 +5822,8 @@ newallocindir(ip, ptrno, newblkno, oldblkno, lbn)
struct jnewblk *jnewblk;
if (oldblkno)
freefrag = newfreefrag(ip, oldblkno, ITOFS(ip)->fs_bsize, lbn);
freefrag = newfreefrag(ip, oldblkno, ITOFS(ip)->fs_bsize, lbn,
SINGLETON);
else
freefrag = NULL;
ACQUIRE_LOCK(ITOUMP(ip));
@ -7724,8 +7758,9 @@ free_inodedep(inodedep)
* in memory immediately.
*/
static void
freework_freeblock(freework)
freework_freeblock(freework, trimtype)
struct freework *freework;
int trimtype;
{
struct freeblks *freeblks;
struct jnewblk *jnewblk;
@ -7779,10 +7814,10 @@ freework_freeblock(freework)
FREE_LOCK(ump);
freeblks_free(ump, freeblks, btodb(bsize));
CTR4(KTR_SUJ,
"freework_freeblock: ino %d blkno %jd lbn %jd size %ld",
"freework_freeblock: ino %jd blkno %jd lbn %jd size %d",
freeblks->fb_inum, freework->fw_blkno, freework->fw_lbn, bsize);
ffs_blkfree(ump, fs, freeblks->fb_devvp, freework->fw_blkno, bsize,
freeblks->fb_inum, freeblks->fb_vtype, &wkhd);
freeblks->fb_inum, freeblks->fb_vtype, &wkhd, trimtype);
ACQUIRE_LOCK(ump);
/*
* The jnewblk will be discarded and the bits in the map never
@ -7835,7 +7870,7 @@ handle_workitem_indirblk(freework)
return;
}
if (freework->fw_off == NINDIR(fs)) {
freework_freeblock(freework);
freework_freeblock(freework, SINGLETON);
return;
}
freework->fw_state |= INPROGRESS;
@ -7889,16 +7924,19 @@ handle_workitem_freeblocks(freeblks, flags)
struct freeblks *freeblks;
int flags;
{
struct freework *freework;
struct freework *freework, *prevfreework;
struct newblk *newblk;
struct allocindir *aip;
struct ufsmount *ump;
struct worklist *wk;
int trimtype;
KASSERT(LIST_EMPTY(&freeblks->fb_jblkdephd),
("handle_workitem_freeblocks: Journal entries not written."));
ump = VFSTOUFS(freeblks->fb_list.wk_mp);
ACQUIRE_LOCK(ump);
prevfreework = NULL;
trimtype = 0;
while ((wk = LIST_FIRST(&freeblks->fb_freeworkhd)) != NULL) {
WORKLIST_REMOVE(wk);
switch (wk->wk_type) {
@ -7932,16 +7970,26 @@ handle_workitem_freeblocks(freeblks, flags)
case D_FREEWORK:
freework = WK_FREEWORK(wk);
if (freework->fw_lbn <= -UFS_NDADDR)
if (freework->fw_lbn <= -UFS_NDADDR) {
handle_workitem_indirblk(freework);
else
freework_freeblock(freework);
continue;
} else if (prevfreework == NULL) {
trimtype = SINGLETON;
} else if (trimtype == SINGLETON) {
freework_freeblock(prevfreework, STARTFREE);
trimtype = ENDFREE;
} else {
freework_freeblock(prevfreework, CONTINUEFREE);
}
prevfreework = freework;
continue;
default:
panic("handle_workitem_freeblocks: Unknown type %s",
TYPENAME(wk->wk_type));
}
}
if (prevfreework != NULL)
freework_freeblock(prevfreework, trimtype);
if (freeblks->fb_ref != 0) {
freeblks->fb_state &= ~INPROGRESS;
wake_worklist(&freeblks->fb_list);
@ -8080,13 +8128,8 @@ indir_trunc(freework, dbn, lbn)
ufs1_daddr_t *bap1;
ufs2_daddr_t nb, nnb, *bap2;
ufs_lbn_t lbnadd, nlbn;
int i, nblocks, ufs1fmt;
int freedblocks;
int goingaway;
int freedeps;
int needj;
int level;
int cnt;
int nblocks, ufs1fmt, firstfree, trimtype, freedblocks;
int goingaway, freedeps, needj, level, cnt, i;
freeblks = freework->fw_freeblks;
ump = VFSTOUFS(freeblks->fb_list.wk_mp);
@ -8180,6 +8223,7 @@ indir_trunc(freework, dbn, lbn)
* arranges for the current level to be freed when subordinates
* are free when journaling.
*/
firstfree = 1;
for (i = freework->fw_off; i < NINDIR(fs); i++, nb = nnb) {
if (i != NINDIR(fs) - 1) {
if (ufs1fmt)
@ -8215,11 +8259,26 @@ indir_trunc(freework, dbn, lbn)
freedeps++;
}
CTR3(KTR_SUJ,
"indir_trunc: ino %d blkno %jd size %ld",
"indir_trunc: ino %jd blkno %jd size %d",
freeblks->fb_inum, nb, fs->fs_bsize);
if (firstfree) {
if (i == NINDIR(fs) - 1 || nnb == 0) {
trimtype = SINGLETON;
} else {
trimtype = STARTFREE;
firstfree = 0;
}
} else {
if (i == NINDIR(fs) - 1 || nnb == 0) {
trimtype = ENDFREE;
firstfree = 1;
} else {
trimtype = CONTINUEFREE;
}
}
ffs_blkfree(ump, fs, freeblks->fb_devvp, nb,
fs->fs_bsize, freeblks->fb_inum,
freeblks->fb_vtype, &wkhd);
freeblks->fb_vtype, &wkhd, trimtype);
}
}
if (goingaway) {
@ -8244,7 +8303,7 @@ indir_trunc(freework, dbn, lbn)
if (level == 0)
freeblks->fb_cgwait += freedeps;
if (freework->fw_ref == 0)
freework_freeblock(freework);
freework_freeblock(freework, SINGLETON);
FREE_LOCK(ump);
return;
}
@ -8253,10 +8312,10 @@ indir_trunc(freework, dbn, lbn)
*/
dbn = dbtofsb(fs, dbn);
CTR3(KTR_SUJ,
"indir_trunc 2: ino %d blkno %jd size %ld",
"indir_trunc 2: ino %jd blkno %jd size %d",
freeblks->fb_inum, dbn, fs->fs_bsize);
ffs_blkfree(ump, fs, freeblks->fb_devvp, dbn, fs->fs_bsize,
freeblks->fb_inum, freeblks->fb_vtype, NULL);
freeblks->fb_inum, freeblks->fb_vtype, NULL, SINGLETON);
/* Non SUJ softdep does single-threaded truncations. */
if (freework->fw_blkno == dbn) {
freework->fw_state |= ALLCOMPLETE;

View File

@ -978,6 +978,8 @@ ffs_mountfs(devvp, mp, td)
taskqueue_thread_enqueue, &ump->um_trim_tq);
taskqueue_start_threads(&ump->um_trim_tq, 1, PVFS,
"%s trim", mp->mnt_stat.f_mntonname);
ump->um_trimhash = hashinit(MAXTRIMIO, M_TRIM,
&ump->um_trimlisthashsize);
}
}
@ -1256,6 +1258,7 @@ ffs_unmount(mp, mntflags)
pause("ufsutr", hz);
taskqueue_drain_all(ump->um_trim_tq);
taskqueue_free(ump->um_trim_tq);
free (ump->um_trimhash, M_TRIM);
}
g_topology_lock();
if (ump->um_fsckpid > 0) {

View File

@ -557,6 +557,7 @@ struct freefrag {
long ff_fragsize; /* size of fragment being deleted */
ino_t ff_inum; /* owning inode number */
enum vtype ff_vtype; /* owning inode's file type */
int ff_trimtype; /* trim status when deleted */
};
/*

View File

@ -47,6 +47,7 @@ struct ufs_args {
#ifdef MALLOC_DECLARE
MALLOC_DECLARE(M_UFSMNT);
MALLOC_DECLARE(M_TRIM);
#endif
struct buf;
@ -63,6 +64,7 @@ struct inodedep;
TAILQ_HEAD(inodedeplst, inodedep);
LIST_HEAD(bmsafemaphd, bmsafemap);
LIST_HEAD(trimlist_hashhead, ffs_blkfree_trim_params);
/*
* This structure describes the UFS specific mount structure data.
@ -101,6 +103,8 @@ struct ufsmount {
u_int um_flags; /* (i) filesystem flags */
u_int um_trim_inflight; /* (a) outstanding trim count */
struct taskqueue *um_trim_tq; /* (c) trim request queue */
struct trimlist_hashhead *um_trimhash; /* (i) trimlist hash table */
u_long um_trimlisthashsize; /* (i) trim hash table size-1 */
/* (c) - below function ptrs */
int (*um_balloc)(struct vnode *, off_t, int, struct ucred *,
int, struct buf **);