Add kernel side support for BIO_DELETE/TRIM on UFS.

The FS_TRIM fs flag indicates that administrator requested issuing of
TRIM commands for the volume. UFS will only send the command to disk
if the disk reports GEOM::candelete attribute.

Since disk queue is reordered, data block is marked as free in the bitmap
only after TRIM command completed. Due to need to sleep waiting for
i/o to finish, TRIM bio_done routine schedules taskqueue to set the
bitmap bit.

Based on the patch by:	mckusick
Reviewed by:	mckusick, pjd
Tested by:	pho
MFC after:	1 month
This commit is contained in:
Konstantin Belousov 2010-12-29 12:25:28 +00:00
parent d2d6c59245
commit 8c2a54de80
4 changed files with 117 additions and 2 deletions

View File

@ -80,9 +80,12 @@ __FBSDID("$FreeBSD$");
#include <sys/syscallsubr.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>
#include <sys/taskqueue.h>
#include <security/audit/audit.h>
#include <geom/geom.h>
#include <ufs/ufs/dir.h>
#include <ufs/ufs/extattr.h>
#include <ufs/ufs/quota.h>
@ -92,6 +95,7 @@ __FBSDID("$FreeBSD$");
#include <ufs/ffs/fs.h>
#include <ufs/ffs/ffs_extern.h>
#include <ufs/ffs/softdep.h>
typedef ufs2_daddr_t allocfcn_t(struct inode *ip, u_int cg, ufs2_daddr_t bpref,
int size, int rsize);
@ -99,6 +103,11 @@ typedef ufs2_daddr_t allocfcn_t(struct inode *ip, u_int cg, ufs2_daddr_t bpref,
static ufs2_daddr_t ffs_alloccg(struct inode *, u_int, ufs2_daddr_t, int, int);
static ufs2_daddr_t
ffs_alloccgblk(struct inode *, struct buf *, ufs2_daddr_t, int);
static void ffs_blkfree_cg(struct ufsmount *, struct fs *,
struct vnode *, ufs2_daddr_t, long, ino_t,
struct workhead *);
static void ffs_blkfree_trim_completed(struct bio *);
static void ffs_blkfree_trim_task(void *ctx, int pending __unused);
#ifdef INVARIANTS
static int ffs_checkblk(struct inode *, ufs2_daddr_t, long);
#endif
@ -1831,8 +1840,8 @@ gotit:
* free map. If a fragment is deallocated, a possible
* block reassembly is checked.
*/
void
ffs_blkfree(ump, fs, devvp, bno, size, inum, dephd)
static void
ffs_blkfree_cg(ump, fs, devvp, bno, size, inum, dephd)
struct ufsmount *ump;
struct fs *fs;
struct vnode *devvp;
@ -1964,6 +1973,95 @@ ffs_blkfree(ump, fs, devvp, bno, size, inum, dephd)
bdwrite(bp);
}
TASKQUEUE_DEFINE_THREAD(ffs_trim);
struct ffs_blkfree_trim_params {
struct task task;
struct ufsmount *ump;
struct vnode *devvp;
ufs2_daddr_t bno;
long size;
ino_t inum;
struct workhead *pdephd;
struct workhead dephd;
};
static void
ffs_blkfree_trim_task(ctx, pending)
void *ctx;
int pending;
{
struct ffs_blkfree_trim_params *tp;
tp = ctx;
ffs_blkfree_cg(tp->ump, tp->ump->um_fs, tp->devvp, tp->bno, tp->size,
tp->inum, tp->pdephd);
vn_finished_secondary_write(UFSTOVFS(tp->ump));
free(tp, M_TEMP);
}
static void
ffs_blkfree_trim_completed(bip)
struct bio *bip;
{
struct ffs_blkfree_trim_params *tp;
tp = bip->bio_caller2;
g_destroy_bio(bip);
TASK_INIT(&tp->task, 0, ffs_blkfree_trim_task, tp);
taskqueue_enqueue(taskqueue_ffs_trim, &tp->task);
}
void
ffs_blkfree(ump, fs, devvp, bno, size, inum, dephd)
struct ufsmount *ump;
struct fs *fs;
struct vnode *devvp;
ufs2_daddr_t bno;
long size;
ino_t inum;
struct workhead *dephd;
{
struct mount *mp;
struct bio *bip;
struct ffs_blkfree_trim_params *tp;
if (!ump->um_candelete) {
ffs_blkfree_cg(ump, fs, devvp, bno, size, inum, dephd);
return;
}
/*
* Postpone the set of the free bit in the cg bitmap until the
* BIO_DELETE is completed. Otherwise, due to disk queue
* reordering, TRIM might be issued after we reuse the block
* and write some new data into it.
*/
tp = malloc(sizeof(struct ffs_blkfree_trim_params), M_TEMP, M_WAITOK);
tp->ump = ump;
tp->devvp = devvp;
tp->bno = bno;
tp->size = size;
tp->inum = inum;
if (dephd != NULL) {
LIST_INIT(&tp->dephd);
LIST_SWAP(dephd, &tp->dephd, worklist, wk_list);
tp->pdephd = &tp->dephd;
} else
tp->pdephd = NULL;
bip = g_alloc_bio();
bip->bio_cmd = BIO_DELETE;
bip->bio_offset = dbtob(fsbtodb(fs, bno));
bip->bio_done = ffs_blkfree_trim_completed;
bip->bio_length = size;
bip->bio_caller2 = tp;
mp = UFSTOVFS(ump);
vn_start_secondary_write(NULL, &mp, 0);
g_io_request(bip, (struct g_consumer *)devvp->v_bufobj.bo_private);
}
#ifdef INVARIANTS
/*
* Verify allocation of a block or fragment. Returns true if block or

View File

@ -895,6 +895,21 @@ ffs_mountfs(devvp, mp, td)
mp->mnt_stat.f_mntonname);
#endif
}
if ((fs->fs_flags & FS_TRIM) != 0) {
size = sizeof(int);
if (g_io_getattr("GEOM::candelete", cp, &size,
&ump->um_candelete) == 0) {
if (!ump->um_candelete)
printf(
"WARNING: %s: TRIM flag on fs but disk does not support TRIM\n",
mp->mnt_stat.f_mntonname);
} else {
printf(
"WARNING: %s: TRIM flag on fs but cannot get whether disk supports TRIM\n",
mp->mnt_stat.f_mntonname);
ump->um_candelete = 0;
}
}
ump->um_mountp = mp;
ump->um_dev = dev;

View File

@ -417,6 +417,7 @@ CTASSERT(sizeof(struct fs) == 1376);
#define FS_FLAGS_UPDATED 0x0080 /* flags have been moved to new location */
#define FS_NFS4ACLS 0x0100 /* file system has NFSv4 ACLs enabled */
#define FS_INDEXDIRS 0x0200 /* kernel supports indexed directories */
#define FS_TRIM 0x0400 /* issue BIO_DELETE for deleted blocks */
/*
* Macros to access bits in the fs_active array.

View File

@ -95,6 +95,7 @@ struct ufsmount {
time_t um_itime[MAXQUOTAS]; /* inode quota time limit */
char um_qflags[MAXQUOTAS]; /* quota specific flags */
int64_t um_savedmaxfilesize; /* XXX - limit maxfilesize */
int um_candelete; /* devvp supports TRIM */
int (*um_balloc)(struct vnode *, off_t, int, struct ucred *, int, struct buf **);
int (*um_blkatoff)(struct vnode *, off_t, char **, struct buf **);
int (*um_truncate)(struct vnode *, off_t, int, struct ucred *, struct thread *);