Use VOP_ADVISE() with POSIX_FADV_DONTNEED instead of IO_DIRECT to
implement not double-caching for reads from vnode-backed md devices. Use VOP_ADVISE() similarly instead of !IO_DIRECT unsimilarly for writes. Add a "cache" option to mdconfig to allow changing the default of not caching. This depends on a recent commit to fix VOP_ADVISE(). A previous version had optimizations for sequential i/o's (merge the i/o's and only uncache for discontiguous i/o's and for full blocks), but optimizations and knowledge of block boundaries belong in VOP_ADVISE(). Read-ahead should also be handled better, by supporting it in md and discarding it in VOP_ADVISE(). POSIX_FADV_DONTNEED is ignored by zfs, but so is IO_DIRECT. POSIX_FADV_DONTNEED works better than IO_DIRECT if it is not ignored, since it only discards from the buffer cache immediately, while IO_DIRECT also discards from the page cache immediately. IO_DIRECT was not used for writes since it was claimed to be too slow, but most of the slowness for writes is from doing them synchronously by default. Non-synchronous writes still deadlock in many cases. IO_DIRECT only has a special implementation for ffs reads with DIRECTIO configured. Otherwise, if it is not ignored than it uses the buffer and page caches normally except for discarding everything after each i/o, and then it has much the same overheads as POSIX_FADV_DONTNEED. The overheads for reading with ffs and DIRECTIO were similar in tests of md. Reviewed by: kib
This commit is contained in:
parent
f15290be5b
commit
de7f82e077
@ -37,7 +37,7 @@
|
||||
.\"
|
||||
.\" $FreeBSD$
|
||||
.\"
|
||||
.Dd August 28, 2017
|
||||
.Dd December 21, 2018
|
||||
.Dt MDCONFIG 8
|
||||
.Os
|
||||
.Sh NAME
|
||||
@ -206,6 +206,32 @@ backed devices: avoid
|
||||
.Dv IO_SYNC
|
||||
for increased performance but
|
||||
at the risk of deadlocking the entire kernel.
|
||||
.It Oo Cm no Oc Ns Cm cache
|
||||
For
|
||||
.Cm vnode
|
||||
backed devices: enable/disable caching of data in system caches.
|
||||
The default is to not cache.
|
||||
.Pp
|
||||
Accesses via the device are converted to accesses via the vnode.
|
||||
The caching policy for the vnode is used initially.
|
||||
This is normally to cache.
|
||||
This caching policy is retained if the
|
||||
.Cm cache
|
||||
option is used.
|
||||
Otherwise, caching is limited
|
||||
by releasing data from caches soon after each access.
|
||||
The release has the same semantics as the
|
||||
.Dv POSIX_FADV_DONTNEED
|
||||
feature of
|
||||
.Xr posix_fadvise 2 .
|
||||
The result is that with normal (non-zfs) caching,
|
||||
buffers are released from the buffer cache soon after they are constructed,
|
||||
but their data is kept in the page cache at lower priority.
|
||||
.Pp
|
||||
The
|
||||
.Cm cache
|
||||
option tends to waste memory by giving unwanted double caching,
|
||||
but it saves time if there is memory to spare.
|
||||
.It Oo Cm no Oc Ns Cm reserve
|
||||
Allocate and reserve all needed storage from the start, rather than as needed.
|
||||
.It Oo Cm no Oc Ns Cm cluster
|
||||
|
@ -88,7 +88,7 @@ usage(void)
|
||||
" mdconfig -l [-v] [-n] [-f file] [-u unit]\n"
|
||||
" mdconfig file\n");
|
||||
fprintf(stderr, "\t\ttype = {malloc, vnode, swap}\n");
|
||||
fprintf(stderr, "\t\toption = {cluster, compress, force,\n");
|
||||
fprintf(stderr, "\t\toption = {cache, cluster, compress, force,\n");
|
||||
fprintf(stderr, "\t\t readonly, reserve, ro, verify}\n");
|
||||
fprintf(stderr, "\t\tsize = %%d (512 byte blocks), %%db (B),\n");
|
||||
fprintf(stderr, "\t\t %%dk (kB), %%dm (MB), %%dg (GB), \n");
|
||||
@ -178,6 +178,10 @@ main(int argc, char **argv)
|
||||
mdio.md_options |= MD_ASYNC;
|
||||
else if (!strcmp(optarg, "noasync"))
|
||||
mdio.md_options &= ~MD_ASYNC;
|
||||
else if (!strcmp(optarg, "cache"))
|
||||
mdio.md_options |= MD_CACHE;
|
||||
else if (!strcmp(optarg, "nocache"))
|
||||
mdio.md_options &= ~MD_CACHE;
|
||||
else if (!strcmp(optarg, "cluster"))
|
||||
mdio.md_options |= MD_CLUSTER;
|
||||
else if (!strcmp(optarg, "nocluster"))
|
||||
|
@ -880,7 +880,7 @@ mdstart_vnode(struct md_s *sc, struct bio *bp)
|
||||
struct buf *pb;
|
||||
bus_dma_segment_t *vlist;
|
||||
struct thread *td;
|
||||
off_t iolen, len, zerosize;
|
||||
off_t iolen, iostart, len, zerosize;
|
||||
int ma_offs, npages;
|
||||
|
||||
switch (bp->bio_cmd) {
|
||||
@ -983,13 +983,10 @@ mdstart_vnode(struct md_s *sc, struct bio *bp)
|
||||
auio.uio_iov = &aiov;
|
||||
auio.uio_iovcnt = 1;
|
||||
}
|
||||
/*
|
||||
* When reading set IO_DIRECT to try to avoid double-caching
|
||||
* the data. When writing IO_DIRECT is not optimal.
|
||||
*/
|
||||
iostart = auio.uio_offset;
|
||||
if (auio.uio_rw == UIO_READ) {
|
||||
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
|
||||
error = VOP_READ(vp, &auio, IO_DIRECT, sc->cred);
|
||||
error = VOP_READ(vp, &auio, 0, sc->cred);
|
||||
VOP_UNLOCK(vp, 0);
|
||||
} else {
|
||||
(void) vn_start_write(vp, &mp, V_WAIT);
|
||||
@ -1002,6 +999,11 @@ mdstart_vnode(struct md_s *sc, struct bio *bp)
|
||||
sc->flags &= ~MD_VERIFY;
|
||||
}
|
||||
|
||||
/* When MD_CACHE is set, try to avoid double-caching the data. */
|
||||
if (error == 0 && (sc->flags & MD_CACHE) == 0)
|
||||
VOP_ADVISE(vp, iostart, auio.uio_offset - 1,
|
||||
POSIX_FADV_DONTNEED);
|
||||
|
||||
if (pb != NULL) {
|
||||
pmap_qremove((vm_offset_t)pb->b_data, npages);
|
||||
if (error == 0) {
|
||||
@ -1464,7 +1466,8 @@ mdcreate_vnode(struct md_s *sc, struct md_req *mdr, struct thread *td)
|
||||
sc->fwheads = mdr->md_fwheads;
|
||||
snprintf(sc->ident, sizeof(sc->ident), "MD-DEV%ju-INO%ju",
|
||||
(uintmax_t)vattr.va_fsid, (uintmax_t)vattr.va_fileid);
|
||||
sc->flags = mdr->md_options & (MD_FORCE | MD_ASYNC | MD_VERIFY);
|
||||
sc->flags = mdr->md_options & (MD_ASYNC | MD_CACHE | MD_FORCE |
|
||||
MD_VERIFY);
|
||||
if (!(flags & FWRITE))
|
||||
sc->flags |= MD_READONLY;
|
||||
sc->vnode = nd.ni_vp;
|
||||
@ -2184,6 +2187,9 @@ g_md_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
|
||||
g_conf_printf_escaped(sb, "%s", mp->file);
|
||||
sbuf_printf(sb, "</file>\n");
|
||||
}
|
||||
if (mp->type == MD_VNODE)
|
||||
sbuf_printf(sb, "%s<cache>%s</cache>\n", indent,
|
||||
(mp->flags & MD_CACHE) == 0 ? "off": "on");
|
||||
sbuf_printf(sb, "%s<label>", indent);
|
||||
g_conf_printf_escaped(sb, "%s", mp->label);
|
||||
sbuf_printf(sb, "</label>\n");
|
||||
|
@ -92,5 +92,6 @@ struct md_ioctl {
|
||||
#define MD_FORCE 0x20 /* Don't try to prevent foot-shooting */
|
||||
#define MD_ASYNC 0x40 /* Asynchronous mode */
|
||||
#define MD_VERIFY 0x80 /* Open file with O_VERIFY (vnode only) */
|
||||
#define MD_CACHE 0x100 /* Cache vnode data */
|
||||
|
||||
#endif /* _SYS_MDIOCTL_H_*/
|
||||
|
Loading…
Reference in New Issue
Block a user