MMAP Optimization

Enable zfs_getpage, zfs_fillpage, zfs_putpage, zfs_putapage functions. The functions have been modified to make them Linux friendly. ZFS uses these functions to read/write the mmapped pages. Using them from readpage/writepage results in clear code. The patch also adds readpages and writepages interface functions to read/write list of pages in one function call. The code change handles the first mmap optimization mentioned on https://github.com/behlendorf/zfs/issues/225 Signed-off-by: Prasad Joshi <pjoshi@stec-inc.com> Signed-off-by: Brian Behlendorf <behlendorf@llnl.gov> Issue #255
2011-05-28 02:53:07 +01:00 · 2011-05-28 02:53:07 +01:00 · dde471ef5a
commit dde471ef5a
parent 2a005961a4
4 changed files with 178 additions and 327 deletions
--- a/include/sys/zfs_vnops.h
+++ b/include/sys/zfs_vnops.h
@ -71,6 +71,9 @@ extern int zfs_getsecattr(struct inode *ip, vsecattr_t *vsecp, int flag,
    cred_t *cr);
 extern int zfs_setsecattr(struct inode *ip, vsecattr_t *vsecp, int flag,
    cred_t *cr);
+extern int zfs_getpage(struct inode *ip, struct page *pl[], int nr_pages);
+extern int zfs_putpage(struct page *page, struct writeback_control *wbc,
+    void *data);

 #ifdef	__cplusplus
 }
--- a/include/sys/zpl.h
+++ b/include/sys/zpl.h
@ -29,6 +29,7 @@
 #include <linux/vfs_compat.h>
 #include <linux/xattr_compat.h>
 #include <linux/exportfs.h>
+#include <linux/writeback.h>

 /* zpl_inode.c */
 extern const struct inode_operations zpl_inode_operations;
--- a/module/zfs/zfs_vnops.c
+++ b/module/zfs/zfs_vnops.c
@ -3725,53 +3725,31 @@ zfs_null_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp,
 	pvn_write_done(pp, B_INVAL|B_FORCE|B_ERROR);
 	return (0);
 }
+#endif /* HAVE_MMAP */

 /*
- * Push a page out to disk, klustering if possible.
+ * Push a page out to disk
 *
 *	IN:	vp	- file to push page to.
 *		pp	- page to push.
- *		flags	- additional flags.
- *		cr	- credentials of caller.
+ *		off	- start of range pushed.
+ *		len	- len of range pushed.
 *
- *	OUT:	offp	- start of range pushed.
- *		lenp	- len of range pushed.
 *
 *	RETURN:	0 if success
 *		error code if failure
 *
- * NOTE: callers must have locked the page to be pushed.  On
- * exit, the page (and all other pages in the kluster) must be
- * unlocked.
+ * NOTE: callers must have locked the page to be pushed.
 */
 /* ARGSUSED */
 static int
-zfs_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp,
-		size_t *lenp, int flags, cred_t *cr)
+zfs_putapage(struct inode *ip, struct page *pp, u_offset_t off, size_t len)
 {
-	znode_t		*zp = VTOZ(vp);
-	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
-	dmu_tx_t	*tx;
-	u_offset_t	off, koff;
-	size_t		len, klen;
-	int		err;
-
-	off = pp->p_offset;
-	len = PAGESIZE;
-	/*
-	 * If our blocksize is bigger than the page size, try to kluster
-	 * multiple pages so that we write a full block (thus avoiding
-	 * a read-modify-write).
-	 */
-	if (off < zp->z_size && zp->z_blksz > PAGESIZE) {
-		klen = P2ROUNDUP((ulong_t)zp->z_blksz, PAGESIZE);
-		koff = ISP2(klen) ? P2ALIGN(off, (u_offset_t)klen) : 0;
-		ASSERT(koff <= zp->z_size);
-		if (koff + klen > zp->z_size)
-			klen = P2ROUNDUP(zp->z_size - koff, (uint64_t)PAGESIZE);
-		pp = pvn_write_kluster(vp, pp, &off, &len, koff, klen, flags);
-	}
-	ASSERT3U(btop(len), ==, btopr(len));
+	znode_t    *zp  = ITOZ(ip);
+	zfs_sb_t   *zsb = ITOZSB(ip);
+	dmu_tx_t   *tx;
+	caddr_t	   va;
+	int        err;

 	/*
 	 * Can't push pages past end-of-file.
@ -3780,24 +3758,16 @@ zfs_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp,
 		/* ignore all pages */
 		err = 0;
 		goto out;
-	} else if (off + len > zp->z_size) {
-		int npages = btopr(zp->z_size - off);
-		page_t *trunc;
-
-		page_list_break(&pp, &trunc, npages);
-		/* ignore pages past end of file */
-		if (trunc)
-			pvn_write_done(trunc, flags);
+	} else if (off + len > zp->z_size)
 		len = zp->z_size - off;
-	}

-	if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) ||
-	    zfs_owner_overquota(zfsvfs, zp, B_TRUE)) {
+	if (zfs_owner_overquota(zsb, zp, B_FALSE) ||
+	    zfs_owner_overquota(zsb, zp, B_TRUE)) {
 		err = EDQUOT;
 		goto out;
 	}
 top:
-	tx = dmu_tx_create(zfsvfs->z_os);
+	tx = dmu_tx_create(zsb->z_os);
 	dmu_tx_hold_write(tx, zp->z_id, off, len);

 	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
@ -3813,52 +3783,38 @@ zfs_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp,
 		goto out;
 	}

-	if (zp->z_blksz <= PAGESIZE) {
-		caddr_t va = zfs_map_page(pp, S_READ);
-		ASSERT3U(len, <=, PAGESIZE);
-		dmu_write(zfsvfs->z_os, zp->z_id, off, len, va, tx);
-		zfs_unmap_page(pp, va);
-	} else {
-		err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, pp, tx);
-	}
+	va = kmap(pp);
+	ASSERT3U(len, <=, PAGESIZE);
+	dmu_write(zsb->z_os, zp->z_id, off, len, va, tx);
+	kunmap(pp);

 	if (err == 0) {
 		uint64_t mtime[2], ctime[2];
 		sa_bulk_attr_t bulk[3];
 		int count = 0;

-		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb), NULL,
 		    &mtime, 16);
-		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL,
 		    &ctime, 16);
-		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL,
 		    &zp->z_pflags, 8);
 		zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime,
 		    B_TRUE);
-		zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len, 0);
+		zfs_log_write(zsb->z_log, tx, TX_WRITE, zp, off, len, 0);
 	}
 	dmu_tx_commit(tx);

 out:
-	pvn_write_done(pp, (err ? B_ERROR : 0) | flags);
-	if (offp)
-		*offp = off;
-	if (lenp)
-		*lenp = len;
-
 	return (err);
 }

 /*
- * Copy the portion of the file indicated from pages into the file.
- * The pages are stored in a page list attached to the files vnode.
+ * Copy the portion of the file indicated from page into the file.
 *
- *	IN:	vp	- vnode of file to push page data to.
- *		off	- position in file to put data.
- *		len	- amount of data to write.
- *		flags	- flags to control the operation.
- *		cr	- credentials of caller.
- *		ct	- caller context.
+ *	IN:	ip	- inode of file to push page data to.
+ *		wbc	- Unused parameter
+ *		data	- pointer to address_space
 *
 *	RETURN:	0 if success
 *		error code if failure
@ -3867,87 +3823,45 @@ zfs_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp,
 *	vp - ctime|mtime updated
 */
 /*ARGSUSED*/
-static int
-zfs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr)
+int
+zfs_putpage(struct page *page, struct writeback_control *wbc, void *data)
 {
-	znode_t		*zp = VTOZ(vp);
-	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
-	page_t		*pp;
-	size_t		io_len;
-	u_offset_t	io_off;
-	uint_t		blksz;
-	rl_t		*rl;
-	int		error = 0;
+	struct address_space *mapping = data;
+	struct inode         *ip      = mapping->host;
+	znode_t              *zp      = ITOZ(ip);
+	zfs_sb_t             *zsb     = ITOZSB(ip);
+	rl_t		     *rl;
+	u_offset_t	     io_off;
+	size_t		     io_len;
+	size_t		     len;
+	int		     error;

-	ZFS_ENTER(zfsvfs);
+	io_off = page_offset(page);
+	io_len = PAGESIZE;
+
+	ZFS_ENTER(zsb);
 	ZFS_VERIFY_ZP(zp);

-	/*
-	 * Align this request to the file block size in case we kluster.
-	 * XXX - this can result in pretty aggresive locking, which can
-	 * impact simultanious read/write access.  One option might be
-	 * to break up long requests (len == 0) into block-by-block
-	 * operations to get narrower locking.
-	 */
-	blksz = zp->z_blksz;
-	if (ISP2(blksz))
-		io_off = P2ALIGN_TYPED(off, blksz, u_offset_t);
-	else
-		io_off = 0;
-	if (len > 0 && ISP2(blksz))
-		io_len = P2ROUNDUP_TYPED(len + (off - io_off), blksz, size_t);
-	else
-		io_len = 0;
-
-	if (io_len == 0) {
-		/*
-		 * Search the entire vp list for pages >= io_off.
-		 */
-		rl = zfs_range_lock(zp, io_off, UINT64_MAX, RL_WRITER);
-		error = pvn_vplist_dirty(vp, io_off, zfs_putapage, flags, cr);
-		goto out;
-	}
 	rl = zfs_range_lock(zp, io_off, io_len, RL_WRITER);

-	if (off > zp->z_size) {
+	if (io_off > zp->z_size) {
 		/* past end of file */
 		zfs_range_unlock(rl);
-		ZFS_EXIT(zfsvfs);
+		ZFS_EXIT(zsb);
 		return (0);
 	}

 	len = MIN(io_len, P2ROUNDUP(zp->z_size, PAGESIZE) - io_off);

-	for (off = io_off; io_off < off + len; io_off += io_len) {
-		if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) {
-			pp = page_lookup(vp, io_off,
-			    (flags & (B_INVAL | B_FREE)) ? SE_EXCL : SE_SHARED);
-		} else {
-			pp = page_lookup_nowait(vp, io_off,
-			    (flags & B_FREE) ? SE_EXCL : SE_SHARED);
-		}
-
-		if (pp != NULL && pvn_getdirty(pp, flags)) {
-			int err;
-
-			/*
-			 * Found a dirty page to push
-			 */
-			err = zfs_putapage(vp, pp, &io_off, &io_len, flags, cr);
-			if (err)
-				error = err;
-		} else {
-			io_len = PAGESIZE;
-		}
-	}
-out:
+	error = zfs_putapage(ip, page, io_off, len);
 	zfs_range_unlock(rl);
-	if ((flags & B_ASYNC) == 0 || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
-		zil_commit(zfsvfs->z_log, zp->z_id);
-	ZFS_EXIT(zfsvfs);
+
+	if (zsb->z_os->os_sync == ZFS_SYNC_ALWAYS)
+		zil_commit(zsb->z_log, zp->z_id);
+	ZFS_EXIT(zsb);
 	return (error);
 }
-#endif /* HAVE_MMAP */
+EXPORT_SYMBOL(zfs_putpage);

 /*ARGSUSED*/
 void
@ -4039,102 +3953,62 @@ zfs_frlock(vnode_t *vp, int cmd, flock64_t *bfp, int flag, offset_t offset,
 	ZFS_EXIT(zfsvfs);
 	return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct));
 }
+#endif /* HAVE_MMAP */

 /*
- * If we can't find a page in the cache, we will create a new page
- * and fill it with file data.  For efficiency, we may try to fill
- * multiple pages at once (klustering) to fill up the supplied page
- * list.  Note that the pages to be filled are held with an exclusive
- * lock to prevent access by other threads while they are being filled.
+ * Fill pages with data from the disk.
 */
 static int
-zfs_fillpage(vnode_t *vp, u_offset_t off, struct seg *seg,
-    caddr_t addr, page_t *pl[], size_t plsz, enum seg_rw rw)
+zfs_fillpage(struct inode *ip, struct page *pl[], int nr_pages)
 {
-	znode_t *zp = VTOZ(vp);
-	page_t *pp, *cur_pp;
-	objset_t *os = zp->z_zfsvfs->z_os;
-	u_offset_t io_off, total;
-	size_t io_len;
-	int err;
+	znode_t	    *zp = ITOZ(ip);
+	zfs_sb_t    *zsb = ITOZSB(ip);
+	objset_t    *os;
+	struct page *cur_pp;
+	u_offset_t  io_off, total;
+	size_t      io_len;
+	loff_t      i_size;
+	unsigned    page_idx;
+	int         err;

-	if (plsz == PAGESIZE || zp->z_blksz <= PAGESIZE) {
-		/*
-		 * We only have a single page, don't bother klustering
-		 */
-		io_off = off;
-		io_len = PAGESIZE;
-		pp = page_create_va(vp, io_off, io_len,
-		    PG_EXCL | PG_WAIT, seg, addr);
-	} else {
-		/*
-		 * Try to find enough pages to fill the page list
-		 */
-		pp = pvn_read_kluster(vp, off, seg, addr, &io_off,
-		    &io_len, off, plsz, 0);
-	}
-	if (pp == NULL) {
-		/*
-		 * The page already exists, nothing to do here.
-		 */
-		*pl = NULL;
-		return (0);
-	}
+	os     = zsb->z_os;
+	io_len = nr_pages << PAGE_CACHE_SHIFT;
+	i_size = i_size_read(ip);
+	io_off = page_offset(pl[0]);
+
+	if (io_off + io_len > i_size)
+		io_len = i_size - io_off;

 	/*
-	 * Fill the pages in the kluster.
+	 * Iterate over list of pages and read each page individually.
 	 */
-	cur_pp = pp;
+	page_idx = 0;
+	cur_pp   = pl[0];
 	for (total = io_off + io_len; io_off < total; io_off += PAGESIZE) {
 		caddr_t va;

-		ASSERT3U(io_off, ==, cur_pp->p_offset);
-		va = zfs_map_page(cur_pp, S_WRITE);
+		va = kmap(cur_pp);
 		err = dmu_read(os, zp->z_id, io_off, PAGESIZE, va,
 		    DMU_READ_PREFETCH);
-		zfs_unmap_page(cur_pp, va);
+		kunmap(cur_pp);
 		if (err) {
-			/* On error, toss the entire kluster */
-			pvn_read_done(pp, B_ERROR);
 			/* convert checksum errors into IO errors */
 			if (err == ECKSUM)
 				err = EIO;
 			return (err);
 		}
-		cur_pp = cur_pp->p_next;
+		cur_pp = pl[++page_idx];
 	}

-	/*
-	 * Fill in the page list array from the kluster starting
-	 * from the desired offset `off'.
-	 * NOTE: the page list will always be null terminated.
-	 */
-	pvn_plist_init(pp, pl, plsz, off, io_len, rw);
-	ASSERT(pl == NULL || (*pl)->p_offset == off);
-
 	return (0);
 }

 /*
- * Return pointers to the pages for the file region [off, off + len]
- * in the pl array.  If plsz is greater than len, this function may
- * also return page pointers from after the specified region
- * (i.e. the region [off, off + plsz]).  These additional pages are
- * only returned if they are already in the cache, or were created as
- * part of a klustered read.
+ * Uses zfs_fillpage to read data from the file and fill the pages.
 *
- *	IN:	vp	- vnode of file to get data from.
- *		off	- position in file to get data from.
- *		len	- amount of data to retrieve.
- *		plsz	- length of provided page list.
- *		seg	- segment to obtain pages for.
- *		addr	- virtual address of fault.
- *		rw	- mode of created pages.
- *		cr	- credentials of caller.
- *		ct	- caller context.
- *
- *	OUT:	protp	- protection mode of created pages.
- *		pl	- list of pages created.
+ *	IN:	ip	 - inode of file to get data from.
+ *		pl	 - list of pages to read
+ *		nr_pages - number of pages to read
 *
 *	RETURN:	0 if success
 *		error code if failure
@ -4143,80 +4017,30 @@ zfs_fillpage(vnode_t *vp, u_offset_t off, struct seg *seg,
 *	vp - atime updated
 */
 /* ARGSUSED */
-static int
-zfs_getpage(vnode_t *vp, offset_t off, size_t len, uint_t *protp,
-	page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr,
-	enum seg_rw rw, cred_t *cr)
+int
+zfs_getpage(struct inode *ip, struct page *pl[], int nr_pages)
 {
-	znode_t		*zp = VTOZ(vp);
-	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
-	page_t		**pl0 = pl;
-	int		err = 0;
+	znode_t	 *zp  = ITOZ(ip);
+	zfs_sb_t *zsb = ITOZSB(ip);
+	int	 err;

-	/* we do our own caching, faultahead is unnecessary */
 	if (pl == NULL)
 		return (0);
-	else if (len > plsz)
-		len = plsz;
-	else
-		len = P2ROUNDUP(len, PAGESIZE);
-	ASSERT(plsz >= len);

-	ZFS_ENTER(zfsvfs);
+	ZFS_ENTER(zsb);
 	ZFS_VERIFY_ZP(zp);

-	if (protp)
-		*protp = PROT_ALL;
+	err = zfs_fillpage(ip, pl, nr_pages);

-	/*
-	 * Loop through the requested range [off, off + len) looking
-	 * for pages.  If we don't find a page, we will need to create
-	 * a new page and fill it with data from the file.
-	 */
-	while (len > 0) {
-		if (*pl = page_lookup(vp, off, SE_SHARED))
-			*(pl+1) = NULL;
-		else if (err = zfs_fillpage(vp, off, seg, addr, pl, plsz, rw))
-			goto out;
-		while (*pl) {
-			ASSERT3U((*pl)->p_offset, ==, off);
-			off += PAGESIZE;
-			addr += PAGESIZE;
-			if (len > 0) {
-				ASSERT3U(len, >=, PAGESIZE);
-				len -= PAGESIZE;
-			}
-			ASSERT3U(plsz, >=, PAGESIZE);
-			plsz -= PAGESIZE;
-			pl++;
-		}
-	}
+	if (!err)
+		ZFS_ACCESSTIME_STAMP(zsb, zp);

-	/*
-	 * Fill out the page array with any pages already in the cache.
-	 */
-	while (plsz > 0 &&
-	    (*pl++ = page_lookup_nowait(vp, off, SE_SHARED))) {
-			off += PAGESIZE;
-			plsz -= PAGESIZE;
-	}
-out:
-	if (err) {
-		/*
-		 * Release any pages we have previously locked.
-		 */
-		while (pl > pl0)
-			page_unlock(*--pl);
-	} else {
-		ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
-	}
-
-	*pl = NULL;
-
-	ZFS_EXIT(zfsvfs);
+	ZFS_EXIT(zsb);
 	return (err);
 }
+EXPORT_SYMBOL(zfs_getpage);

+#ifdef HAVE_MMAP
 /*
 * Request a memory map for a section of a file.  This code interacts
 * with common code and the VM system as follows:
--- a/module/zfs/zpl_file.c
+++ b/module/zfs/zpl_file.c
@ -254,6 +254,60 @@ zpl_mmap(struct file *filp, struct vm_area_struct *vma)
 	return (error);
 }

+static struct page **
+pages_vector_from_list(struct list_head *pages, unsigned nr_pages)
+{
+	struct page **pl;
+	struct page *t;
+	unsigned page_idx;
+
+	pl = kmalloc(sizeof(*pl) * nr_pages, GFP_NOFS);
+	if (!pl)
+		return ERR_PTR(-ENOMEM);
+
+	page_idx = 0;
+	list_for_each_entry_reverse(t, pages, lru) {
+		pl[page_idx] = t;
+		page_idx++;
+	}
+
+	return pl;
+}
+
+static int
+zpl_readpages(struct file *file, struct address_space *mapping,
+	struct list_head *pages, unsigned nr_pages)
+{
+	struct inode *ip;
+	struct page  **pl;
+	struct page  *p, *n;
+	int          error;
+
+	ip = mapping->host;
+
+	pl = pages_vector_from_list(pages, nr_pages);
+	if (IS_ERR(pl))
+		return PTR_ERR(pl);
+
+	error = -zfs_getpage(ip, pl, nr_pages);
+	if (error)
+		goto error;
+
+	list_for_each_entry_safe_reverse(p, n, pages, lru) {
+
+		list_del(&p->lru);
+
+		flush_dcache_page(p);
+		SetPageUptodate(p);
+		unlock_page(p);
+		page_cache_release(p);
+	}
+
+error:
+	kfree(pl);
+	return error;
+}
+
 /*
 * Populate a page with data for the Linux page cache.  This function is
 * only used to support mmap(2).  There will be an identical copy of the
@ -267,33 +321,14 @@ static int
 zpl_readpage(struct file *filp, struct page *pp)
 {
 	struct inode *ip;
-	loff_t off, i_size;
-	size_t len, wrote;
-	cred_t *cr = CRED();
-	void *pb;
+	struct page *pl[1];
 	int error = 0;

 	ASSERT(PageLocked(pp));
 	ip = pp->mapping->host;
-	off = page_offset(pp);
-	i_size = i_size_read(ip);
-	ASSERT3S(off, <, i_size);
+	pl[0] = pp;

-	crhold(cr);
-	len = MIN(PAGE_CACHE_SIZE, i_size - off);
-
-	pb = kmap(pp);
-
-	/* O_DIRECT is passed to bypass the page cache and avoid deadlock. */
-	wrote = zpl_read_common(ip, pb, len, off, UIO_SYSSPACE, O_DIRECT, cr);
-	if (wrote != len)
-		error = -EIO;
-
-	if (!error && (len < PAGE_CACHE_SIZE))
-		memset(pb + len, 0, PAGE_CACHE_SIZE - len);
-
-	kunmap(pp);
-	crfree(cr);
+	error = -zfs_getpage(ip, pl, 1);

 	if (error) {
 		SetPageError(pp);
@ -305,47 +340,15 @@ zpl_readpage(struct file *filp, struct page *pp)
 	}

 	unlock_page(pp);
-
-	return (error);
+	return error;
 }

-/*
- * Write out dirty pages to the ARC, this function is only required to
- * support mmap(2).  Mapped pages may be dirtied by memory operations
- * which never call .write().  These dirty pages are kept in sync with
- * the ARC buffers via this hook.
- *
- * Currently this function relies on zpl_write_common() and the O_DIRECT
- * flag to push out the page.  This works but the more correct way is
- * to update zfs_putapage() to be Linux friendly and use that interface.
- */
-static int
-zpl_writepage(struct page *pp, struct writeback_control *wbc)
+int
+zpl_putpage(struct page *pp, struct writeback_control *wbc, void *data)
 {
-	struct inode *ip;
-	loff_t off, i_size;
-	size_t len, read;
-	cred_t *cr = CRED();
-	void *pb;
-	int error = 0;
+	int error;

-	ASSERT(PageLocked(pp));
-	ip = pp->mapping->host;
-	off = page_offset(pp);
-	i_size = i_size_read(ip);
-
-	crhold(cr);
-	len = MIN(PAGE_CACHE_SIZE, i_size - off);
-
-	pb = kmap(pp);
-
-	/* O_DIRECT is passed to bypass the page cache and avoid deadlock. */
-	read = zpl_write_common(ip, pb, len, off, UIO_SYSSPACE, O_DIRECT, cr);
-	if (read != len)
-		error = -EIO;
-
-	kunmap(pp);
-	crfree(cr);
+	error = -zfs_putpage(pp, wbc, data);

 	if (error) {
 		SetPageError(pp);
@ -353,16 +356,36 @@ zpl_writepage(struct page *pp, struct writeback_control *wbc)
 	} else {
 		ClearPageError(pp);
 		SetPageUptodate(pp);
+		flush_dcache_page(pp);
 	}

 	unlock_page(pp);
+	return error;
+}

-	return (error);
+static int
+zpl_writepages(struct address_space *mapping, struct writeback_control *wbc)
+{
+	return write_cache_pages(mapping, wbc, zpl_putpage, mapping);
+}
+
+/*
+ * Write out dirty pages to the ARC, this function is only required to
+ * support mmap(2).  Mapped pages may be dirtied by memory operations
+ * which never call .write().  These dirty pages are kept in sync with
+ * the ARC buffers via this hook.
+ */
+static int
+zpl_writepage(struct page *pp, struct writeback_control *wbc)
+{
+	return zpl_putpage(pp, wbc, pp->mapping);
 }

 const struct address_space_operations zpl_address_space_operations = {
+	.readpages	= zpl_readpages,
 	.readpage	= zpl_readpage,
 	.writepage	= zpl_writepage,
+	.writepages     = zpl_writepages,
 };

 const struct file_operations zpl_file_operations = {