From d7511a40a749d2c42fa6f3fd74c7bdf37ad62a2c Mon Sep 17 00:00:00 2001
From: "Pedro F. Giffuni" <pfg@FreeBSD.org>
Date: Mon, 12 Aug 2013 21:34:48 +0000
Subject: [PATCH] Add read-only support for extents in ext2fs.

Basic support for extents was implemented by Zheng Liu as part
of his Google Summer of Code in 2010. This support is read-only
at this time.

In addition to extents we also support the huge_file extension
for read-only purposes. This works nicely with the additional
support for birthtime/nanosec timestamps and dir_index that
have been added lately.

The implementation may not work for all ext4 filesystems as
it doesn't support some features that are being enabled by
default on recent linux like flex_bg. Nevertheless, the feature
should be very useful for migration or simple access in
filesystems that have been converted from ext2/3 or don't use
incompatible features.

Special thanks to Zheng Liu for his dedication and continued
work to support ext2 in FreeBSD.

Submitted by:	Zheng Liu (lz@)
Reviewed by:	Mike Ma, Christoph Mallon (previous version)
Sponsored by:	Google Inc.
MFC after:	3 weeks
---
 sys/conf/files                 |   1 +
 sys/fs/ext2fs/ext2_bmap.c      |  56 ++++++++++-
 sys/fs/ext2fs/ext2_dinode.h    |   2 +
 sys/fs/ext2fs/ext2_extents.c   | 177 +++++++++++++++++++++++++++++++++
 sys/fs/ext2fs/ext2_extents.h   |  99 ++++++++++++++++++
 sys/fs/ext2fs/ext2_extern.h    |   2 +-
 sys/fs/ext2fs/ext2_inode_cnv.c |  37 +++++--
 sys/fs/ext2fs/ext2_subr.c      |  51 +++++++++-
 sys/fs/ext2fs/ext2_vfsops.c    |  14 ++-
 sys/fs/ext2fs/ext2_vnops.c     | 128 +++++++++++++++++++++++-
 sys/fs/ext2fs/ext2fs.h         |   8 +-
 sys/fs/ext2fs/inode.h          |  16 ++-
 sys/modules/ext2fs/Makefile    |   6 +-
 13 files changed, 565 insertions(+), 32 deletions(-)
 create mode 100644 sys/fs/ext2fs/ext2_extents.c
 create mode 100644 sys/fs/ext2fs/ext2_extents.h

diff --git a/sys/conf/files b/sys/conf/files
index 17ec4050e437..d41fbf40d44e 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -2714,6 +2714,7 @@ geom/zero/g_zero.c		optional geom_zero
 fs/ext2fs/ext2_alloc.c		optional ext2fs
 fs/ext2fs/ext2_balloc.c		optional ext2fs
 fs/ext2fs/ext2_bmap.c		optional ext2fs
+fs/ext2fs/ext2_extents.c	optional ext2fs
 fs/ext2fs/ext2_inode.c		optional ext2fs
 fs/ext2fs/ext2_inode_cnv.c	optional ext2fs
 fs/ext2fs/ext2_hash.c		optional ext2fs
diff --git a/sys/fs/ext2fs/ext2_bmap.c b/sys/fs/ext2fs/ext2_bmap.c
index 86a197361c65..73cc0c55d15e 100644
--- a/sys/fs/ext2fs/ext2_bmap.c
+++ b/sys/fs/ext2fs/ext2_bmap.c
@@ -46,10 +46,14 @@
 #include <sys/stat.h>
 
 #include <fs/ext2fs/inode.h>
+#include <fs/ext2fs/fs.h>
 #include <fs/ext2fs/ext2fs.h>
+#include <fs/ext2fs/ext2_dinode.h>
 #include <fs/ext2fs/ext2_extern.h>
 #include <fs/ext2fs/ext2_mount.h>
 
+static int ext4_bmapext(struct vnode *, int32_t, int64_t *, int *, int *);
+
 /*
  * Bmap converts the logical block number of a file to its physical block
  * number on the disk. The conversion is done by using the logical block
@@ -58,7 +62,7 @@
 int
 ext2_bmap(struct vop_bmap_args *ap)
 {
-	int32_t blkno;
+	int64_t blkno;
 	int error;
 
 	/*
@@ -70,12 +74,56 @@ ext2_bmap(struct vop_bmap_args *ap)
 	if (ap->a_bnp == NULL)
 		return (0);
 
-	error = ext2_bmaparray(ap->a_vp, ap->a_bn, &blkno,
-	    ap->a_runp, ap->a_runb);
+	if (VTOI(ap->a_vp)->i_flags & EXT4_EXTENTS)
+		error = ext4_bmapext(ap->a_vp, ap->a_bn, &blkno,
+		    ap->a_runp, ap->a_runb);
+	else
+		error = ext2_bmaparray(ap->a_vp, ap->a_bn, &blkno,
+		    ap->a_runp, ap->a_runb);
 	*ap->a_bnp = blkno;
 	return (error);
 }
 
+/*
+ * This function converts the logical block number of a file to
+ * its physical block number on the disk within ext4 extents.
+ */
+static int
+ext4_bmapext(struct vnode *vp, int32_t bn, int64_t *bnp, int *runp, int *runb)
+{
+	struct inode *ip;
+	struct m_ext2fs *fs;
+	struct ext4_extent *ep;
+	struct ext4_extent_path path;
+	daddr_t lbn;
+
+	ip = VTOI(vp);
+	fs = ip->i_e2fs;
+	lbn = bn;
+
+	/*
+	 * TODO: need to implement read ahead to improve the performance.
+	 */
+	if (runp != NULL)
+		*runp = 0;
+
+	if (runb != NULL)
+		*runb = 0;
+
+	ext4_ext_find_extent(fs, ip, lbn, &path);
+	ep = path.ep_ext;
+	if (ep == NULL)
+		return (EIO);
+
+	*bnp = fsbtodb(fs, lbn - ep->e_blk +
+	    (ep->e_start_lo | (daddr_t)ep->e_start_hi << 32));
+
+	if (*bnp == 0)
+		*bnp = -1;
+
+	return (0);
+}
+
 /*
  * Indirect blocks are now on the vnode for the file.  They are given negative
  * logical block numbers.  Indirect blocks are addressed by the negative
@@ -91,7 +139,7 @@ ext2_bmap(struct vop_bmap_args *ap)
  */
 
 int
-ext2_bmaparray(struct vnode *vp, int32_t bn, int32_t *bnp, int *runp, int *runb)
+ext2_bmaparray(struct vnode *vp, int32_t bn, int64_t *bnp, int *runp, int *runb)
 {
 	struct inode *ip;
 	struct buf *bp;
diff --git a/sys/fs/ext2fs/ext2_dinode.h b/sys/fs/ext2fs/ext2_dinode.h
index 1526cb5bf3c5..e492a0858c17 100644
--- a/sys/fs/ext2fs/ext2_dinode.h
+++ b/sys/fs/ext2fs/ext2_dinode.h
@@ -79,6 +79,8 @@
 
 #define E2DI_HAS_XTIME(ip)	(EXT2_HAS_RO_COMPAT_FEATURE(ip->i_e2fs,	\
 				    EXT2F_ROCOMPAT_EXTRA_ISIZE))
+#define E2DI_HAS_HUGE_FILE(ip)	(EXT2_HAS_RO_COMPAT_FEATURE(ip->i_e2fs,	\
+				    EXT2F_ROCOMPAT_HUGE_FILE))
 
 /*
  * Constants relative to the data blocks
diff --git a/sys/fs/ext2fs/ext2_extents.c b/sys/fs/ext2fs/ext2_extents.c
new file mode 100644
index 000000000000..26e6a222cc94
--- /dev/null
+++ b/sys/fs/ext2fs/ext2_extents.c
@@ -0,0 +1,177 @@
+/*-
+ * Copyright (c) 2010 Zheng Liu <lz@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/types.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/vnode.h>
+#include <sys/bio.h>
+#include <sys/buf.h>
+#include <sys/conf.h>
+
+#include <fs/ext2fs/ext2_mount.h>
+#include <fs/ext2fs/fs.h>
+#include <fs/ext2fs/inode.h>
+#include <fs/ext2fs/ext2fs.h>
+#include <fs/ext2fs/ext2_extents.h>
+#include <fs/ext2fs/ext2_extern.h>
+
+static void ext4_ext_binsearch_index(struct inode *ip, struct ext4_extent_path
+		*path, daddr_t lbn)
+{
+	struct ext4_extent_header *ehp = path->ep_header;
+	struct ext4_extent_index *l, *r, *m;
+
+	l = (struct ext4_extent_index *)(char *)(ehp + 1);
+	r = (struct ext4_extent_index *)(char *)(ehp + 1) + ehp->eh_ecount - 1;
+	while (l <= r) {
+		m = l + (r - l) / 2;
+		if (lbn < m->ei_blk)
+			r = m - 1;
+		else
+			l = m + 1;
+	}
+
+	path->ep_index = l - 1;
+}
+
+static void
+ext4_ext_binsearch(struct inode *ip, struct ext4_extent_path *path, daddr_t lbn)
+{
+	struct ext4_extent_header *ehp = path->ep_header;
+	struct ext4_extent *l, *r, *m;
+
+	if (ehp->eh_ecount == 0)
+		return;
+
+	l = (struct ext4_extent *)(char *)(ehp + 1);
+	r = (struct ext4_extent *)(char *)(ehp + 1) + ehp->eh_ecount - 1;
+	while (l <= r) {
+		m = l + (r - l) / 2;
+		if (lbn < m->e_blk)
+			r = m - 1;
+		else
+			l = m + 1;
+	}
+
+	path->ep_ext = l - 1;
+}
+
+/*
+ * Find a block in ext4 extent cache.
+ */
+int
+ext4_ext_in_cache(struct inode *ip, daddr_t lbn, struct ext4_extent *ep)
+{
+	struct ext4_extent_cache *ecp;
+	int ret = EXT4_EXT_CACHE_NO;
+
+	ecp = &ip->i_ext_cache;
+
+	/* cache is invalid */
+	if (ecp->ec_type == EXT4_EXT_CACHE_NO)
+		return (ret);
+
+	if (lbn >= ecp->ec_blk && lbn < ecp->ec_blk + ecp->ec_len) {
+		ep->e_blk = ecp->ec_blk;
+		ep->e_start_lo = ecp->ec_start & 0xffffffff;
+		ep->e_start_hi = ecp->ec_start >> 32 & 0xffff;
+		ep->e_len = ecp->ec_len;
+		ret = ecp->ec_type;
+	}
+	return (ret);
+}
+
+/*
+ * Put an ext4_extent structure in ext4 cache.
+ */
+void
+ext4_ext_put_cache(struct inode *ip, struct ext4_extent *ep, int type)
+{
+	struct ext4_extent_cache *ecp;
+
+	ecp = &ip->i_ext_cache;
+	ecp->ec_type = type;
+	ecp->ec_blk = ep->e_blk;
+	ecp->ec_len = ep->e_len;
+	ecp->ec_start = (daddr_t)ep->e_start_hi << 32 | ep->e_start_lo;
+}
+
+/*
+ * Find an extent.
+ */
+struct ext4_extent_path *
+ext4_ext_find_extent(struct m_ext2fs *fs, struct inode *ip,
+		     daddr_t lbn, struct ext4_extent_path *path)
+{
+	struct vnode *vp;
+	struct ext4_extent_header *ehp;
+	uint16_t i;
+	int error, size;
+	daddr_t nblk;
+
+	vp = ITOV(ip);
+	ehp = (struct ext4_extent_header *)(char *)ip->i_db;
+
+	if (ehp->eh_magic != EXT4_EXT_MAGIC)
+		return (NULL);
+
+	path->ep_header = ehp;
+
+	for (i = ehp->eh_depth; i != 0; --i) {
+		ext4_ext_binsearch_index(ip, path, lbn);
+		path->ep_depth = 0;
+		path->ep_ext = NULL;
+
+		nblk = (daddr_t)path->ep_index->ei_leaf_hi << 32 |
+		    path->ep_index->ei_leaf_lo;
+		size = blksize(fs, ip, nblk);
+		if (path->ep_bp != NULL) {
+			brelse(path->ep_bp);
+			path->ep_bp = NULL;
+		}
+		error = bread(ip->i_devvp, fsbtodb(fs, nblk), size, NOCRED,
+			    &path->ep_bp);
+		if (error) {
+			brelse(path->ep_bp);
+			path->ep_bp = NULL;
+			return (NULL);
+		}
+		ehp = (struct ext4_extent_header *)path->ep_bp->b_data;
+		path->ep_header = ehp;
+	}
+
+	path->ep_depth = i;
+	path->ep_ext = NULL;
+	path->ep_index = NULL;
+
+	ext4_ext_binsearch(ip, path, lbn);
+	return (path);
+}
diff --git a/sys/fs/ext2fs/ext2_extents.h b/sys/fs/ext2fs/ext2_extents.h
new file mode 100644
index 000000000000..89aedc5fead2
--- /dev/null
+++ b/sys/fs/ext2fs/ext2_extents.h
@@ -0,0 +1,99 @@
+/*-
+ * Copyright (c) 2012, 2010 Zheng Liu <lz@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+#ifndef _FS_EXT2FS_EXT2_EXTENTS_H_
+#define _FS_EXT2FS_EXT2_EXTENTS_H_
+
+#include <sys/types.h>
+
+#define EXT4_EXT_MAGIC  0xf30a
+
+#define EXT4_EXT_CACHE_NO	0
+#define EXT4_EXT_CACHE_GAP	1
+#define EXT4_EXT_CACHE_IN	2
+
+/*
+ * Ext4 file system extent on disk.
+ */
+struct ext4_extent {
+	uint32_t e_blk;	/* first logical block */
+	uint16_t e_len;	/* number of blocks */
+	uint16_t e_start_hi;	/* high 16 bits of physical block */
+	uint32_t e_start_lo;	/* low 32 bits of physical block */
+};
+
+/*
+ * Extent index on disk.
+ */
+struct ext4_extent_index {
+	uint32_t ei_blk;	/* indexes logical blocks */
+	uint32_t ei_leaf_lo;	/* points to physical block of the
+				 * next level */
+	uint16_t ei_leaf_hi;	/* high 16 bits of physical block */
+	uint16_t ei_unused;
+};
+
+/*
+ * Extent tree header.
+ */
+struct ext4_extent_header {
+	uint16_t eh_magic;	/* magic number: 0xf30a */
+	uint16_t eh_ecount;	/* number of valid entries */
+	uint16_t eh_max;	/* capacity of store in entries */
+	uint16_t eh_depth;	/* the depth of extent tree */
+	uint32_t eh_gen;	/* generation of extent tree */
+};
+
+/*
+ * Save cached extent.
+ */
+struct ext4_extent_cache {
+	daddr_t	ec_start;	/* extent start */
+	uint32_t ec_blk;	/* logical block */
+	uint32_t ec_len;
+	uint32_t ec_type;
+};
+
+/*
+ * Save path to some extent.
+ */
+struct ext4_extent_path {
+	uint16_t ep_depth;
+	struct buf *ep_bp;
+	struct ext4_extent *ep_ext;
+	struct ext4_extent_index *ep_index;
+	struct ext4_extent_header *ep_header;
+};
+
+struct inode;
+struct m_ext2fs;
+int	ext4_ext_in_cache(struct inode *, daddr_t, struct ext4_extent *);
+void	ext4_ext_put_cache(struct inode *, struct ext4_extent *, int);
+struct ext4_extent_path *ext4_ext_find_extent(struct m_ext2fs *fs,
+    struct inode *, daddr_t, struct ext4_extent_path *);
+
+#endif /* !_FS_EXT2FS_EXT2_EXTENTS_H_ */
diff --git a/sys/fs/ext2fs/ext2_extern.h b/sys/fs/ext2fs/ext2_extern.h
index f9c87cb67821..859a97860f93 100644
--- a/sys/fs/ext2fs/ext2_extern.h
+++ b/sys/fs/ext2fs/ext2_extern.h
@@ -57,7 +57,7 @@ int	ext2_blkatoff(struct vnode *, off_t, char **, struct buf **);
 void	ext2_blkfree(struct inode *, int32_t, long);
 int32_t	ext2_blkpref(struct inode *, e2fs_lbn_t, int, int32_t *, int32_t);
 int	ext2_bmap(struct vop_bmap_args *);
-int	ext2_bmaparray(struct vnode *, int32_t, int32_t *, int *, int *);
+int	ext2_bmaparray(struct vnode *, int32_t, int64_t *, int *, int *);
 void	ext2_clusteracct(struct m_ext2fs *, char *, int, daddr_t, int);
 void	ext2_dirbad(struct inode *ip, doff_t offset, char *how);
 void	ext2_ei2i(struct ext2fs_dinode *, struct inode *);
diff --git a/sys/fs/ext2fs/ext2_inode_cnv.c b/sys/fs/ext2fs/ext2_inode_cnv.c
index cade4a6869dc..c26784b3ffc5 100644
--- a/sys/fs/ext2fs/ext2_inode_cnv.c
+++ b/sys/fs/ext2fs/ext2_inode_cnv.c
@@ -32,6 +32,7 @@
 #include <sys/stat.h>
 #include <sys/vnode.h>
 
+#include <fs/ext2fs/fs.h>
 #include <fs/ext2fs/inode.h>
 #include <fs/ext2fs/ext2fs.h>
 #include <fs/ext2fs/ext2_dinode.h>
@@ -44,22 +45,34 @@ void
 ext2_print_inode(struct inode *in)
 {
 	int i;
+	struct ext4_extent_header *ehp;
+	struct ext4_extent *ep;
 
 	printf( "Inode: %5ju", (uintmax_t)in->i_number);
 	printf( /* "Inode: %5d" */
 		" Type: %10s Mode: 0x%o Flags: 0x%x  Version: %d\n",
 		"n/a", in->i_mode, in->i_flags, in->i_gen);
-	printf( "User: %5lu Group: %5lu  Size: %lu\n",
-		(unsigned long)in->i_uid, (unsigned long)in->i_gid,
-		(unsigned long)in->i_size);
-	printf( "Links: %3d Blockcount: %d\n",
-		in->i_nlink, in->i_blocks);
+	printf("User: %5u Group: %5u  Size: %ju\n",
+	    in->i_uid, in->i_gid, (uintmax_t)in->i_size);
+	printf("Links: %3d Blockcount: %ju\n",
+	    in->i_nlink, (uintmax_t)in->i_blocks);
 	printf( "ctime: 0x%x", in->i_ctime);
 	printf( "atime: 0x%x", in->i_atime);
 	printf( "mtime: 0x%x", in->i_mtime);
-	printf( "BLOCKS: ");
-	for(i=0; i < (in->i_blocks <= 24 ? ((in->i_blocks+1)/2): 12); i++)
-		printf("%d ", in->i_db[i]);
+	if (E2DI_HAS_XTIME(in))
+		printf("crtime %#x ", in->i_birthtime);
+	printf("BLOCKS:");
+	for (i = 0; i < (in->i_blocks <= 24 ? (in->i_blocks + 1) / 2 : 12); i++)
+		printf("  %d", in->i_db[i]);
+	printf("\n");
+	printf("Extents:\n");
+	ehp = (struct ext4_extent_header *)in->i_db;
+	printf("Header (magic 0x%x entries %d max %d depth %d gen %d)\n",
+	    ehp->eh_magic, ehp->eh_ecount, ehp->eh_max, ehp->eh_depth,
+	    ehp->eh_gen);
+	ep = (struct ext4_extent *)(char *)(ehp + 1);
+	printf("Index (blk %d len %d start_lo %d start_hi %d)\n", ep->e_blk,
+	    ep->e_len, ep->e_start_lo, ep->e_start_hi);
 	printf("\n");
 }
 
@@ -96,6 +109,11 @@ ext2_ei2i(struct ext2fs_dinode *ei, struct inode *ip)
 	ip->i_flags |= (ei->e2di_flags & EXT2_IMMUTABLE) ? SF_IMMUTABLE : 0;
 	ip->i_flags |= (ei->e2di_flags & EXT2_NODUMP) ? UF_NODUMP : 0;
 	ip->i_blocks = ei->e2di_nblock;
+	if (E2DI_HAS_HUGE_FILE(ip)) {
+		ip->i_blocks |= (uint64_t)ei->e2di_nblock_high << 32;
+		if (ei->e2di_flags & EXT4_HUGE_FILE)
+		      ip->i_blocks = fsbtodb(ip->i_e2fs, ip->i_blocks);
+	}
 	ip->i_gen = ei->e2di_gen;
 	ip->i_uid = ei->e2di_uid;
 	ip->i_gid = ei->e2di_gid;
@@ -138,7 +156,8 @@ ext2_i2ei(struct inode *ip, struct ext2fs_dinode *ei)
 	ei->e2di_flags |= (ip->i_flags & SF_APPEND) ? EXT2_APPEND: 0;
 	ei->e2di_flags |= (ip->i_flags & SF_IMMUTABLE) ? EXT2_IMMUTABLE: 0;
 	ei->e2di_flags |= (ip->i_flags & UF_NODUMP) ? EXT2_NODUMP: 0;
-	ei->e2di_nblock = ip->i_blocks;
+	ei->e2di_nblock = ip->i_blocks & 0xffffffff;
+	ei->e2di_nblock_high = ip->i_blocks >> 32 & 0xffff;
 	ei->e2di_gen = ip->i_gen;
 	ei->e2di_uid = ip->i_uid;
 	ei->e2di_gid = ip->i_gid;
diff --git a/sys/fs/ext2fs/ext2_subr.c b/sys/fs/ext2fs/ext2_subr.c
index df6e430f0f08..cd4a06b6b8b9 100644
--- a/sys/fs/ext2fs/ext2_subr.c
+++ b/sys/fs/ext2fs/ext2_subr.c
@@ -50,10 +50,11 @@
 #include <fs/ext2fs/ext2_extern.h>
 #include <fs/ext2fs/ext2fs.h>
 #include <fs/ext2fs/fs.h>
+#include <fs/ext2fs/ext2_extents.h>
+#include <fs/ext2fs/ext2_mount.h>
+#include <fs/ext2fs/ext2_dinode.h>
 
 #ifdef KDB
-#include <fs/ext2fs/ext2_mount.h>
-
 void	ext2_checkoverlap(struct buf *, struct inode *);
 #endif
 
@@ -70,21 +71,63 @@ ext2_blkatoff(struct vnode *vp, off_t offset, char **res, struct buf **bpp)
 	struct buf *bp;
 	e2fs_lbn_t lbn;
 	int bsize, error;
+	daddr_t newblk;
+	struct ext4_extent *ep;
+	struct ext4_extent_path path;
 
 	ip = VTOI(vp);
 	fs = ip->i_e2fs;
 	lbn = lblkno(fs, offset);
 	bsize = blksize(fs, ip, lbn);
-
 	*bpp = NULL;
-	if ((error = bread(vp, lbn, bsize, NOCRED, &bp)) != 0) {
+
+	/*
+	 * The EXT4_EXTENTS requires special treatment, otherwise we can
+	 * fall back to the normal path.
+	 */
+	if (!(ip->i_flags & EXT4_EXTENTS))
+		goto normal;
+
+	memset(&path, 0, sizeof(path));
+	if (ext4_ext_find_extent(fs, ip, lbn, &path) == NULL)
+		goto normal;
+	ep = path.ep_ext;
+	if (ep == NULL)
+		goto normal;
+
+	newblk = lbn - ep->e_blk +
+	    (ep->e_start_lo | (daddr_t)ep->e_start_hi << 32);
+
+	if (path.ep_bp != NULL) {
+		brelse(path.ep_bp);
+		path.ep_bp = NULL;
+	}
+	error = bread(ip->i_devvp, fsbtodb(fs, newblk), bsize, NOCRED, &bp);
+	if (error != 0) {
 		brelse(bp);
 		return (error);
 	}
 	if (res)
 		*res = (char *)bp->b_data + blkoff(fs, offset);
+	/*
+	 * If EXT4_EXTENTS is enabled we would get a wrong offset so
+	 * reset b_offset here.
+	 */
+	bp->b_offset = lbn * bsize;
 	*bpp = bp;
 	return (0);
+
+normal:
+	if (*bpp == NULL) {
+		if ((error = bread(vp, lbn, bsize, NOCRED, &bp)) != 0) {
+			brelse(bp);
+			return (error);
+		}
+		if (res)
+			*res = (char *)bp->b_data + blkoff(fs, offset);
+		*bpp = bp;
+	}
+	return (0);
 }
 
 #ifdef KDB
diff --git a/sys/fs/ext2fs/ext2_vfsops.c b/sys/fs/ext2fs/ext2_vfsops.c
index 1c8e1aa3dc19..b4683564bd8f 100644
--- a/sys/fs/ext2fs/ext2_vfsops.c
+++ b/sys/fs/ext2fs/ext2_vfsops.c
@@ -397,9 +397,11 @@ compute_sb_data(struct vnode *devvp, struct ext2fs *es,
 	if (es->e2fs_rev == E2FS_REV0 ||
 	    !EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_LARGEFILE))
 		fs->e2fs_maxfilesize = 0x7fffffff;
-	else
-		fs->e2fs_maxfilesize = 0x7fffffffffffffff;
-
+	else {
+		fs->e2fs_maxfilesize = 0xffffffffffff;
+		if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_HUGE_FILE))
+			fs->e2fs_maxfilesize = 0x7fffffffffffffff;
+	}
 	if (es->e4fs_flags & E2FS_UNSIGNED_HASH) {
 		fs->e2fs_uhash = 3;
 	} else if ((es->e4fs_flags & E2FS_SIGNED_HASH) == 0) {
@@ -961,8 +963,12 @@ ext2_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp)
 	 * Now we want to make sure that block pointers for unused
 	 * blocks are zeroed out - ext2_balloc depends on this
 	 * although for regular files and directories only
+	 *
+	 * If EXT4_EXTENTS flag is enabled, unused blocks aren't
+	 * zeroed out because we could corrupt the extent tree.
 	 */
-	if(S_ISDIR(ip->i_mode) || S_ISREG(ip->i_mode)) {
+	if (!(ip->i_flags & EXT4_EXTENTS) &&
+	    (S_ISDIR(ip->i_mode) || S_ISREG(ip->i_mode))) {
 		used_blocks = (ip->i_size+fs->e2fs_bsize-1) / fs->e2fs_bsize;
 		for (i = used_blocks; i < EXT2_NDIR_BLOCKS; i++)
 			ip->i_db[i] = 0;
diff --git a/sys/fs/ext2fs/ext2_vnops.c b/sys/fs/ext2fs/ext2_vnops.c
index c2f8a8fe9f96..e26a31dbe1f5 100644
--- a/sys/fs/ext2fs/ext2_vnops.c
+++ b/sys/fs/ext2fs/ext2_vnops.c
@@ -84,6 +84,8 @@
 
 static int ext2_makeinode(int mode, struct vnode *, struct vnode **, struct componentname *);
 static void ext2_itimes_locked(struct vnode *);
+static int ext4_ext_read(struct vop_read_args *);
+static int ext2_ind_read(struct vop_read_args *);
 
 static vop_access_t	ext2_access;
 static int ext2_chmod(struct vnode *, int, struct ucred *, struct thread *);
@@ -1327,7 +1329,7 @@ ext2_strategy(struct vop_strategy_args *ap)
 	struct vnode *vp = ap->a_vp;
 	struct inode *ip;
 	struct bufobj *bo;
-	int32_t blkno;
+	int64_t blkno;
 	int error;
 
 	ip = VTOI(vp);
@@ -1604,6 +1606,29 @@ bad:
  */
 static int
 ext2_read(struct vop_read_args *ap)
+{
+	struct vnode *vp;
+	struct inode *ip;
+	int error;
+
+	vp = ap->a_vp;
+	ip = VTOI(vp);
+
+	/*EXT4_EXT_LOCK(ip);*/
+	if (ip->i_flags & EXT4_EXTENTS)
+		error = ext4_ext_read(ap);
+	else
+		error = ext2_ind_read(ap);
+	/*EXT4_EXT_UNLOCK(ip);*/
+	return (error);
+}
+
+
+/*
+ * Vnode op for reading.
+ */
+static int
+ext2_ind_read(struct vop_read_args *ap)
 {
 	struct vnode *vp;
 	struct inode *ip;
@@ -1757,6 +1782,107 @@ ext2_ioctl(struct vop_ioctl_args *ap)
 	}
 }
 
+/*
+ * this function handles ext4 extents block mapping
+ */
+static int
+ext4_ext_read(struct vop_read_args *ap)
+{
+	struct vnode *vp;
+	struct inode *ip;
+	struct uio *uio;
+	struct m_ext2fs *fs;
+	struct buf *bp;
+	struct ext4_extent nex, *ep;
+	struct ext4_extent_path path;
+	daddr_t lbn, newblk;
+	off_t bytesinfile;
+	int cache_type;
+	ssize_t orig_resid;
+	int error;
+	long size, xfersize, blkoffset;
+
+	vp = ap->a_vp;
+	ip = VTOI(vp);
+	uio = ap->a_uio;
+	memset(&path, 0, sizeof(path));
+
+	orig_resid = uio->uio_resid;
+	KASSERT(orig_resid >= 0, ("%s: uio->uio_resid < 0", __func__));
+	if (orig_resid == 0)
+		return (0);
+	KASSERT(uio->uio_offset >= 0, ("%s: uio->uio_offset < 0", __func__));
+	fs = ip->i_e2fs;
+	if (uio->uio_offset < ip->i_size && uio->uio_offset >= fs->e2fs_maxfilesize)
+		return (EOVERFLOW);
+
+	while (uio->uio_resid > 0) {
+		if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0)
+			break;
+		lbn = lblkno(fs, uio->uio_offset);
+		size = blksize(fs, ip, lbn);
+		blkoffset = blkoff(fs, uio->uio_offset);
+
+		xfersize = fs->e2fs_fsize - blkoffset;
+		xfersize = MIN(xfersize, uio->uio_resid);
+		xfersize = MIN(xfersize, bytesinfile);
+
+		/* get block from ext4 extent cache */
+		cache_type = ext4_ext_in_cache(ip, lbn, &nex);
+		switch (cache_type) {
+		case EXT4_EXT_CACHE_NO:
+			ext4_ext_find_extent(fs, ip, lbn, &path);
+			ep = path.ep_ext;
+			if (ep == NULL)
+				return (EIO);
+
+			ext4_ext_put_cache(ip, ep, EXT4_EXT_CACHE_IN);
+
+			newblk = lbn - ep->e_blk + (ep->e_start_lo |
+			    (daddr_t)ep->e_start_hi << 32);
+
+			if (path.ep_bp != NULL) {
+				brelse(path.ep_bp);
+				path.ep_bp = NULL;
+			}
+			break;
+
+		case EXT4_EXT_CACHE_GAP:
+			/* block has not been allocated yet */
+			return (0);
+
+		case EXT4_EXT_CACHE_IN:
+			newblk = lbn - nex.e_blk + (nex.e_start_lo |
+			    (daddr_t)nex.e_start_hi << 32);
+			break;
+
+		default:
+			panic("%s: invalid cache type", __func__);
+		}
+
+		error = bread(ip->i_devvp, fsbtodb(fs, newblk), size, NOCRED, &bp);
+		if (error) {
+			brelse(bp);
+			return (error);
+		}
+
+		size -= bp->b_resid;
+		if (size < xfersize) {
+			if (size == 0) {
+				bqrelse(bp);
+				break;
+			}
+			xfersize = size;
+		}
+		error = uiomove(bp->b_data + blkoffset, (int)xfersize, uio);
+		bqrelse(bp);
+		if (error)
+			return (error);
+	}
+
+	return (0);
+}
+
 /*
  * Vnode op for writing.
  */
diff --git a/sys/fs/ext2fs/ext2fs.h b/sys/fs/ext2fs/ext2fs.h
index 7b16f0fcd40b..b562287cac52 100644
--- a/sys/fs/ext2fs/ext2fs.h
+++ b/sys/fs/ext2fs/ext2fs.h
@@ -201,12 +201,18 @@ struct csum {
  * - EXT2F_ROCOMPAT_SPARSESUPER
  * - EXT2F_ROCOMPAT_LARGEFILE
  * - EXT2F_INCOMPAT_FTYPE
+ *
+ * We partially (read-only) support the following EXT4 features:
+ * - EXT2F_ROCOMPAT_HUGE_FILE
+ * - EXT2F_ROCOMPAT_EXTRA_ISIZE
+ * - EXT2F_INCOMPAT_EXTENTS
  */
 #define EXT2F_COMPAT_SUPP		0x0000
 #define EXT2F_ROCOMPAT_SUPP		(EXT2F_ROCOMPAT_SPARSESUPER | \
 					 EXT2F_ROCOMPAT_LARGEFILE | \
 					 EXT2F_ROCOMPAT_EXTRA_ISIZE)
-#define EXT2F_INCOMPAT_SUPP		EXT2F_INCOMPAT_FTYPE
+#define EXT2F_INCOMPAT_SUPP		(EXT2F_INCOMPAT_FTYPE |	\
+					 EXT2F_INCOMPAT_EXTENTS)
 
 /* Assume that user mode programs are passing in an ext2fs superblock, not
  * a kernel struct super_block.  This will allow us to call the feature-test
diff --git a/sys/fs/ext2fs/inode.h b/sys/fs/ext2fs/inode.h
index d939987965e9..4ffb2bbb66ab 100644
--- a/sys/fs/ext2fs/inode.h
+++ b/sys/fs/ext2fs/inode.h
@@ -38,9 +38,13 @@
 #ifndef _FS_EXT2FS_INODE_H_
 #define	_FS_EXT2FS_INODE_H_
 
+#include <sys/param.h>
 #include <sys/lock.h>
+#include <sys/mutex.h>
 #include <sys/queue.h>
 
+#include <fs/ext2fs/ext2_extents.h>
+
 /*
  * This must agree with the definition in <ufs/ufs/dir.h>.
  */
@@ -86,7 +90,10 @@ struct inode {
 	/* Fields from struct dinode in UFS. */
 	uint16_t	i_mode;		/* IFMT, permissions; see below. */
 	int16_t		i_nlink;	/* File link count. */
+	uint32_t	i_uid;		/* File owner. */
+	uint32_t	i_gid;		/* File group. */
 	uint64_t	i_size;		/* File byte count. */
+	uint64_t	i_blocks;	/* Blocks actually held. */
 	int32_t		i_atime;	/* Last access time. */
 	int32_t		i_mtime;	/* Last modified time. */
 	int32_t		i_ctime;	/* Last inode change time. */
@@ -95,13 +102,12 @@ struct inode {
 	int32_t		i_atimensec;	/* Last access time. */
 	int32_t		i_ctimensec;	/* Last inode change time. */
 	int32_t		i_birthnsec;	/* Inode creation time. */
+	uint32_t	i_gen;		/* Generation number. */
+	uint32_t	i_flags;	/* Status flags (chflags). */
 	uint32_t	i_db[NDADDR];	/* Direct disk blocks. */
 	uint32_t	i_ib[NIADDR];	/* Indirect disk blocks. */
-	uint32_t	i_flags;	/* Status flags (chflags). */
-	uint32_t	i_blocks;	/* Blocks actually held. */
-	uint32_t	i_gen;		/* Generation number. */
-	uint32_t	i_uid;		/* File owner. */
-	uint32_t	i_gid;		/* File group. */
+
+	struct ext4_extent_cache i_ext_cache; /* cache for ext4 extent */
 };
 
 /*
diff --git a/sys/modules/ext2fs/Makefile b/sys/modules/ext2fs/Makefile
index a38a63e245e4..fc10ab0928ba 100644
--- a/sys/modules/ext2fs/Makefile
+++ b/sys/modules/ext2fs/Makefile
@@ -3,8 +3,8 @@
 .PATH:	${.CURDIR}/../../fs/ext2fs
 KMOD=	ext2fs
 SRCS=	opt_ddb.h opt_directio.h opt_quota.h opt_suiddir.h vnode_if.h \
-	ext2_alloc.c ext2_balloc.c ext2_bmap.c ext2_hash.c ext2_htree.c \
-	ext2_inode.c ext2_inode_cnv.c ext2_lookup.c ext2_subr.c ext2_vfsops.c \
-	ext2_vnops.c
+	ext2_alloc.c ext2_balloc.c ext2_bmap.c ext2_extents.c ext2_hash.c \
+	ext2_htree.c ext2_inode.c ext2_inode_cnv.c ext2_lookup.c ext2_subr.c \
+	ext2_vfsops.c ext2_vnops.c
 
 .include <bsd.kmod.mk>