From d7511a40a749d2c42fa6f3fd74c7bdf37ad62a2c Mon Sep 17 00:00:00 2001 From: "Pedro F. Giffuni" <pfg@FreeBSD.org> Date: Mon, 12 Aug 2013 21:34:48 +0000 Subject: [PATCH] Add read-only support for extents in ext2fs. Basic support for extents was implemented by Zheng Liu as part of his Google Summer of Code in 2010. This support is read-only at this time. In addition to extents we also support the huge_file extension for read-only purposes. This works nicely with the additional support for birthtime/nanosec timestamps and dir_index that have been added lately. The implementation may not work for all ext4 filesystems as it doesn't support some features that are being enabled by default on recent linux like flex_bg. Nevertheless, the feature should be very useful for migration or simple access in filesystems that have been converted from ext2/3 or don't use incompatible features. Special thanks to Zheng Liu for his dedication and continued work to support ext2 in FreeBSD. Submitted by: Zheng Liu (lz@) Reviewed by: Mike Ma, Christoph Mallon (previous version) Sponsored by: Google Inc. MFC after: 3 weeks --- sys/conf/files | 1 + sys/fs/ext2fs/ext2_bmap.c | 56 ++++++++++- sys/fs/ext2fs/ext2_dinode.h | 2 + sys/fs/ext2fs/ext2_extents.c | 177 +++++++++++++++++++++++++++++++++ sys/fs/ext2fs/ext2_extents.h | 99 ++++++++++++++++++ sys/fs/ext2fs/ext2_extern.h | 2 +- sys/fs/ext2fs/ext2_inode_cnv.c | 37 +++++-- sys/fs/ext2fs/ext2_subr.c | 51 +++++++++- sys/fs/ext2fs/ext2_vfsops.c | 14 ++- sys/fs/ext2fs/ext2_vnops.c | 128 +++++++++++++++++++++++- sys/fs/ext2fs/ext2fs.h | 8 +- sys/fs/ext2fs/inode.h | 16 ++- sys/modules/ext2fs/Makefile | 6 +- 13 files changed, 565 insertions(+), 32 deletions(-) create mode 100644 sys/fs/ext2fs/ext2_extents.c create mode 100644 sys/fs/ext2fs/ext2_extents.h diff --git a/sys/conf/files b/sys/conf/files index 17ec4050e437..d41fbf40d44e 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -2714,6 +2714,7 @@ geom/zero/g_zero.c optional geom_zero fs/ext2fs/ext2_alloc.c optional ext2fs fs/ext2fs/ext2_balloc.c optional ext2fs fs/ext2fs/ext2_bmap.c optional ext2fs +fs/ext2fs/ext2_extents.c optional ext2fs fs/ext2fs/ext2_inode.c optional ext2fs fs/ext2fs/ext2_inode_cnv.c optional ext2fs fs/ext2fs/ext2_hash.c optional ext2fs diff --git a/sys/fs/ext2fs/ext2_bmap.c b/sys/fs/ext2fs/ext2_bmap.c index 86a197361c65..73cc0c55d15e 100644 --- a/sys/fs/ext2fs/ext2_bmap.c +++ b/sys/fs/ext2fs/ext2_bmap.c @@ -46,10 +46,14 @@ #include <sys/stat.h> #include <fs/ext2fs/inode.h> +#include <fs/ext2fs/fs.h> #include <fs/ext2fs/ext2fs.h> +#include <fs/ext2fs/ext2_dinode.h> #include <fs/ext2fs/ext2_extern.h> #include <fs/ext2fs/ext2_mount.h> +static int ext4_bmapext(struct vnode *, int32_t, int64_t *, int *, int *); + /* * Bmap converts the logical block number of a file to its physical block * number on the disk. The conversion is done by using the logical block @@ -58,7 +62,7 @@ int ext2_bmap(struct vop_bmap_args *ap) { - int32_t blkno; + int64_t blkno; int error; /* @@ -70,12 +74,56 @@ ext2_bmap(struct vop_bmap_args *ap) if (ap->a_bnp == NULL) return (0); - error = ext2_bmaparray(ap->a_vp, ap->a_bn, &blkno, - ap->a_runp, ap->a_runb); + if (VTOI(ap->a_vp)->i_flags & EXT4_EXTENTS) + error = ext4_bmapext(ap->a_vp, ap->a_bn, &blkno, + ap->a_runp, ap->a_runb); + else + error = ext2_bmaparray(ap->a_vp, ap->a_bn, &blkno, + ap->a_runp, ap->a_runb); *ap->a_bnp = blkno; return (error); } +/* + * This function converts the logical block number of a file to + * its physical block number on the disk within ext4 extents. + */ +static int +ext4_bmapext(struct vnode *vp, int32_t bn, int64_t *bnp, int *runp, int *runb) +{ + struct inode *ip; + struct m_ext2fs *fs; + struct ext4_extent *ep; + struct ext4_extent_path path; + daddr_t lbn; + + ip = VTOI(vp); + fs = ip->i_e2fs; + lbn = bn; + + /* + * TODO: need to implement read ahead to improve the performance. + */ + if (runp != NULL) + *runp = 0; + + if (runb != NULL) + *runb = 0; + + ext4_ext_find_extent(fs, ip, lbn, &path); + ep = path.ep_ext; + if (ep == NULL) + return (EIO); + + *bnp = fsbtodb(fs, lbn - ep->e_blk + + (ep->e_start_lo | (daddr_t)ep->e_start_hi << 32)); + + if (*bnp == 0) + *bnp = -1; + + return (0); +} + /* * Indirect blocks are now on the vnode for the file. They are given negative * logical block numbers. Indirect blocks are addressed by the negative @@ -91,7 +139,7 @@ ext2_bmap(struct vop_bmap_args *ap) */ int -ext2_bmaparray(struct vnode *vp, int32_t bn, int32_t *bnp, int *runp, int *runb) +ext2_bmaparray(struct vnode *vp, int32_t bn, int64_t *bnp, int *runp, int *runb) { struct inode *ip; struct buf *bp; diff --git a/sys/fs/ext2fs/ext2_dinode.h b/sys/fs/ext2fs/ext2_dinode.h index 1526cb5bf3c5..e492a0858c17 100644 --- a/sys/fs/ext2fs/ext2_dinode.h +++ b/sys/fs/ext2fs/ext2_dinode.h @@ -79,6 +79,8 @@ #define E2DI_HAS_XTIME(ip) (EXT2_HAS_RO_COMPAT_FEATURE(ip->i_e2fs, \ EXT2F_ROCOMPAT_EXTRA_ISIZE)) +#define E2DI_HAS_HUGE_FILE(ip) (EXT2_HAS_RO_COMPAT_FEATURE(ip->i_e2fs, \ + EXT2F_ROCOMPAT_HUGE_FILE)) /* * Constants relative to the data blocks diff --git a/sys/fs/ext2fs/ext2_extents.c b/sys/fs/ext2fs/ext2_extents.c new file mode 100644 index 000000000000..26e6a222cc94 --- /dev/null +++ b/sys/fs/ext2fs/ext2_extents.c @@ -0,0 +1,177 @@ +/*- + * Copyright (c) 2010 Zheng Liu <lz@freebsd.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/types.h> +#include <sys/kernel.h> +#include <sys/malloc.h> +#include <sys/vnode.h> +#include <sys/bio.h> +#include <sys/buf.h> +#include <sys/conf.h> + +#include <fs/ext2fs/ext2_mount.h> +#include <fs/ext2fs/fs.h> +#include <fs/ext2fs/inode.h> +#include <fs/ext2fs/ext2fs.h> +#include <fs/ext2fs/ext2_extents.h> +#include <fs/ext2fs/ext2_extern.h> + +static void ext4_ext_binsearch_index(struct inode *ip, struct ext4_extent_path + *path, daddr_t lbn) +{ + struct ext4_extent_header *ehp = path->ep_header; + struct ext4_extent_index *l, *r, *m; + + l = (struct ext4_extent_index *)(char *)(ehp + 1); + r = (struct ext4_extent_index *)(char *)(ehp + 1) + ehp->eh_ecount - 1; + while (l <= r) { + m = l + (r - l) / 2; + if (lbn < m->ei_blk) + r = m - 1; + else + l = m + 1; + } + + path->ep_index = l - 1; +} + +static void +ext4_ext_binsearch(struct inode *ip, struct ext4_extent_path *path, daddr_t lbn) +{ + struct ext4_extent_header *ehp = path->ep_header; + struct ext4_extent *l, *r, *m; + + if (ehp->eh_ecount == 0) + return; + + l = (struct ext4_extent *)(char *)(ehp + 1); + r = (struct ext4_extent *)(char *)(ehp + 1) + ehp->eh_ecount - 1; + while (l <= r) { + m = l + (r - l) / 2; + if (lbn < m->e_blk) + r = m - 1; + else + l = m + 1; + } + + path->ep_ext = l - 1; +} + +/* + * Find a block in ext4 extent cache. + */ +int +ext4_ext_in_cache(struct inode *ip, daddr_t lbn, struct ext4_extent *ep) +{ + struct ext4_extent_cache *ecp; + int ret = EXT4_EXT_CACHE_NO; + + ecp = &ip->i_ext_cache; + + /* cache is invalid */ + if (ecp->ec_type == EXT4_EXT_CACHE_NO) + return (ret); + + if (lbn >= ecp->ec_blk && lbn < ecp->ec_blk + ecp->ec_len) { + ep->e_blk = ecp->ec_blk; + ep->e_start_lo = ecp->ec_start & 0xffffffff; + ep->e_start_hi = ecp->ec_start >> 32 & 0xffff; + ep->e_len = ecp->ec_len; + ret = ecp->ec_type; + } + return (ret); +} + +/* + * Put an ext4_extent structure in ext4 cache. + */ +void +ext4_ext_put_cache(struct inode *ip, struct ext4_extent *ep, int type) +{ + struct ext4_extent_cache *ecp; + + ecp = &ip->i_ext_cache; + ecp->ec_type = type; + ecp->ec_blk = ep->e_blk; + ecp->ec_len = ep->e_len; + ecp->ec_start = (daddr_t)ep->e_start_hi << 32 | ep->e_start_lo; +} + +/* + * Find an extent. + */ +struct ext4_extent_path * +ext4_ext_find_extent(struct m_ext2fs *fs, struct inode *ip, + daddr_t lbn, struct ext4_extent_path *path) +{ + struct vnode *vp; + struct ext4_extent_header *ehp; + uint16_t i; + int error, size; + daddr_t nblk; + + vp = ITOV(ip); + ehp = (struct ext4_extent_header *)(char *)ip->i_db; + + if (ehp->eh_magic != EXT4_EXT_MAGIC) + return (NULL); + + path->ep_header = ehp; + + for (i = ehp->eh_depth; i != 0; --i) { + ext4_ext_binsearch_index(ip, path, lbn); + path->ep_depth = 0; + path->ep_ext = NULL; + + nblk = (daddr_t)path->ep_index->ei_leaf_hi << 32 | + path->ep_index->ei_leaf_lo; + size = blksize(fs, ip, nblk); + if (path->ep_bp != NULL) { + brelse(path->ep_bp); + path->ep_bp = NULL; + } + error = bread(ip->i_devvp, fsbtodb(fs, nblk), size, NOCRED, + &path->ep_bp); + if (error) { + brelse(path->ep_bp); + path->ep_bp = NULL; + return (NULL); + } + ehp = (struct ext4_extent_header *)path->ep_bp->b_data; + path->ep_header = ehp; + } + + path->ep_depth = i; + path->ep_ext = NULL; + path->ep_index = NULL; + + ext4_ext_binsearch(ip, path, lbn); + return (path); +} diff --git a/sys/fs/ext2fs/ext2_extents.h b/sys/fs/ext2fs/ext2_extents.h new file mode 100644 index 000000000000..89aedc5fead2 --- /dev/null +++ b/sys/fs/ext2fs/ext2_extents.h @@ -0,0 +1,99 @@ +/*- + * Copyright (c) 2012, 2010 Zheng Liu <lz@freebsd.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ +#ifndef _FS_EXT2FS_EXT2_EXTENTS_H_ +#define _FS_EXT2FS_EXT2_EXTENTS_H_ + +#include <sys/types.h> + +#define EXT4_EXT_MAGIC 0xf30a + +#define EXT4_EXT_CACHE_NO 0 +#define EXT4_EXT_CACHE_GAP 1 +#define EXT4_EXT_CACHE_IN 2 + +/* + * Ext4 file system extent on disk. + */ +struct ext4_extent { + uint32_t e_blk; /* first logical block */ + uint16_t e_len; /* number of blocks */ + uint16_t e_start_hi; /* high 16 bits of physical block */ + uint32_t e_start_lo; /* low 32 bits of physical block */ +}; + +/* + * Extent index on disk. + */ +struct ext4_extent_index { + uint32_t ei_blk; /* indexes logical blocks */ + uint32_t ei_leaf_lo; /* points to physical block of the + * next level */ + uint16_t ei_leaf_hi; /* high 16 bits of physical block */ + uint16_t ei_unused; +}; + +/* + * Extent tree header. + */ +struct ext4_extent_header { + uint16_t eh_magic; /* magic number: 0xf30a */ + uint16_t eh_ecount; /* number of valid entries */ + uint16_t eh_max; /* capacity of store in entries */ + uint16_t eh_depth; /* the depth of extent tree */ + uint32_t eh_gen; /* generation of extent tree */ +}; + +/* + * Save cached extent. + */ +struct ext4_extent_cache { + daddr_t ec_start; /* extent start */ + uint32_t ec_blk; /* logical block */ + uint32_t ec_len; + uint32_t ec_type; +}; + +/* + * Save path to some extent. + */ +struct ext4_extent_path { + uint16_t ep_depth; + struct buf *ep_bp; + struct ext4_extent *ep_ext; + struct ext4_extent_index *ep_index; + struct ext4_extent_header *ep_header; +}; + +struct inode; +struct m_ext2fs; +int ext4_ext_in_cache(struct inode *, daddr_t, struct ext4_extent *); +void ext4_ext_put_cache(struct inode *, struct ext4_extent *, int); +struct ext4_extent_path *ext4_ext_find_extent(struct m_ext2fs *fs, + struct inode *, daddr_t, struct ext4_extent_path *); + +#endif /* !_FS_EXT2FS_EXT2_EXTENTS_H_ */ diff --git a/sys/fs/ext2fs/ext2_extern.h b/sys/fs/ext2fs/ext2_extern.h index f9c87cb67821..859a97860f93 100644 --- a/sys/fs/ext2fs/ext2_extern.h +++ b/sys/fs/ext2fs/ext2_extern.h @@ -57,7 +57,7 @@ int ext2_blkatoff(struct vnode *, off_t, char **, struct buf **); void ext2_blkfree(struct inode *, int32_t, long); int32_t ext2_blkpref(struct inode *, e2fs_lbn_t, int, int32_t *, int32_t); int ext2_bmap(struct vop_bmap_args *); -int ext2_bmaparray(struct vnode *, int32_t, int32_t *, int *, int *); +int ext2_bmaparray(struct vnode *, int32_t, int64_t *, int *, int *); void ext2_clusteracct(struct m_ext2fs *, char *, int, daddr_t, int); void ext2_dirbad(struct inode *ip, doff_t offset, char *how); void ext2_ei2i(struct ext2fs_dinode *, struct inode *); diff --git a/sys/fs/ext2fs/ext2_inode_cnv.c b/sys/fs/ext2fs/ext2_inode_cnv.c index cade4a6869dc..c26784b3ffc5 100644 --- a/sys/fs/ext2fs/ext2_inode_cnv.c +++ b/sys/fs/ext2fs/ext2_inode_cnv.c @@ -32,6 +32,7 @@ #include <sys/stat.h> #include <sys/vnode.h> +#include <fs/ext2fs/fs.h> #include <fs/ext2fs/inode.h> #include <fs/ext2fs/ext2fs.h> #include <fs/ext2fs/ext2_dinode.h> @@ -44,22 +45,34 @@ void ext2_print_inode(struct inode *in) { int i; + struct ext4_extent_header *ehp; + struct ext4_extent *ep; printf( "Inode: %5ju", (uintmax_t)in->i_number); printf( /* "Inode: %5d" */ " Type: %10s Mode: 0x%o Flags: 0x%x Version: %d\n", "n/a", in->i_mode, in->i_flags, in->i_gen); - printf( "User: %5lu Group: %5lu Size: %lu\n", - (unsigned long)in->i_uid, (unsigned long)in->i_gid, - (unsigned long)in->i_size); - printf( "Links: %3d Blockcount: %d\n", - in->i_nlink, in->i_blocks); + printf("User: %5u Group: %5u Size: %ju\n", + in->i_uid, in->i_gid, (uintmax_t)in->i_size); + printf("Links: %3d Blockcount: %ju\n", + in->i_nlink, (uintmax_t)in->i_blocks); printf( "ctime: 0x%x", in->i_ctime); printf( "atime: 0x%x", in->i_atime); printf( "mtime: 0x%x", in->i_mtime); - printf( "BLOCKS: "); - for(i=0; i < (in->i_blocks <= 24 ? ((in->i_blocks+1)/2): 12); i++) - printf("%d ", in->i_db[i]); + if (E2DI_HAS_XTIME(in)) + printf("crtime %#x ", in->i_birthtime); + printf("BLOCKS:"); + for (i = 0; i < (in->i_blocks <= 24 ? (in->i_blocks + 1) / 2 : 12); i++) + printf(" %d", in->i_db[i]); + printf("\n"); + printf("Extents:\n"); + ehp = (struct ext4_extent_header *)in->i_db; + printf("Header (magic 0x%x entries %d max %d depth %d gen %d)\n", + ehp->eh_magic, ehp->eh_ecount, ehp->eh_max, ehp->eh_depth, + ehp->eh_gen); + ep = (struct ext4_extent *)(char *)(ehp + 1); + printf("Index (blk %d len %d start_lo %d start_hi %d)\n", ep->e_blk, + ep->e_len, ep->e_start_lo, ep->e_start_hi); printf("\n"); } @@ -96,6 +109,11 @@ ext2_ei2i(struct ext2fs_dinode *ei, struct inode *ip) ip->i_flags |= (ei->e2di_flags & EXT2_IMMUTABLE) ? SF_IMMUTABLE : 0; ip->i_flags |= (ei->e2di_flags & EXT2_NODUMP) ? UF_NODUMP : 0; ip->i_blocks = ei->e2di_nblock; + if (E2DI_HAS_HUGE_FILE(ip)) { + ip->i_blocks |= (uint64_t)ei->e2di_nblock_high << 32; + if (ei->e2di_flags & EXT4_HUGE_FILE) + ip->i_blocks = fsbtodb(ip->i_e2fs, ip->i_blocks); + } ip->i_gen = ei->e2di_gen; ip->i_uid = ei->e2di_uid; ip->i_gid = ei->e2di_gid; @@ -138,7 +156,8 @@ ext2_i2ei(struct inode *ip, struct ext2fs_dinode *ei) ei->e2di_flags |= (ip->i_flags & SF_APPEND) ? EXT2_APPEND: 0; ei->e2di_flags |= (ip->i_flags & SF_IMMUTABLE) ? EXT2_IMMUTABLE: 0; ei->e2di_flags |= (ip->i_flags & UF_NODUMP) ? EXT2_NODUMP: 0; - ei->e2di_nblock = ip->i_blocks; + ei->e2di_nblock = ip->i_blocks & 0xffffffff; + ei->e2di_nblock_high = ip->i_blocks >> 32 & 0xffff; ei->e2di_gen = ip->i_gen; ei->e2di_uid = ip->i_uid; ei->e2di_gid = ip->i_gid; diff --git a/sys/fs/ext2fs/ext2_subr.c b/sys/fs/ext2fs/ext2_subr.c index df6e430f0f08..cd4a06b6b8b9 100644 --- a/sys/fs/ext2fs/ext2_subr.c +++ b/sys/fs/ext2fs/ext2_subr.c @@ -50,10 +50,11 @@ #include <fs/ext2fs/ext2_extern.h> #include <fs/ext2fs/ext2fs.h> #include <fs/ext2fs/fs.h> +#include <fs/ext2fs/ext2_extents.h> +#include <fs/ext2fs/ext2_mount.h> +#include <fs/ext2fs/ext2_dinode.h> #ifdef KDB -#include <fs/ext2fs/ext2_mount.h> - void ext2_checkoverlap(struct buf *, struct inode *); #endif @@ -70,21 +71,63 @@ ext2_blkatoff(struct vnode *vp, off_t offset, char **res, struct buf **bpp) struct buf *bp; e2fs_lbn_t lbn; int bsize, error; + daddr_t newblk; + struct ext4_extent *ep; + struct ext4_extent_path path; ip = VTOI(vp); fs = ip->i_e2fs; lbn = lblkno(fs, offset); bsize = blksize(fs, ip, lbn); - *bpp = NULL; - if ((error = bread(vp, lbn, bsize, NOCRED, &bp)) != 0) { + + /* + * The EXT4_EXTENTS requires special treatment, otherwise we can + * fall back to the normal path. + */ + if (!(ip->i_flags & EXT4_EXTENTS)) + goto normal; + + memset(&path, 0, sizeof(path)); + if (ext4_ext_find_extent(fs, ip, lbn, &path) == NULL) + goto normal; + ep = path.ep_ext; + if (ep == NULL) + goto normal; + + newblk = lbn - ep->e_blk + + (ep->e_start_lo | (daddr_t)ep->e_start_hi << 32); + + if (path.ep_bp != NULL) { + brelse(path.ep_bp); + path.ep_bp = NULL; + } + error = bread(ip->i_devvp, fsbtodb(fs, newblk), bsize, NOCRED, &bp); + if (error != 0) { brelse(bp); return (error); } if (res) *res = (char *)bp->b_data + blkoff(fs, offset); + /* + * If EXT4_EXTENTS is enabled we would get a wrong offset so + * reset b_offset here. + */ + bp->b_offset = lbn * bsize; *bpp = bp; return (0); + +normal: + if (*bpp == NULL) { + if ((error = bread(vp, lbn, bsize, NOCRED, &bp)) != 0) { + brelse(bp); + return (error); + } + if (res) + *res = (char *)bp->b_data + blkoff(fs, offset); + *bpp = bp; + } + return (0); } #ifdef KDB diff --git a/sys/fs/ext2fs/ext2_vfsops.c b/sys/fs/ext2fs/ext2_vfsops.c index 1c8e1aa3dc19..b4683564bd8f 100644 --- a/sys/fs/ext2fs/ext2_vfsops.c +++ b/sys/fs/ext2fs/ext2_vfsops.c @@ -397,9 +397,11 @@ compute_sb_data(struct vnode *devvp, struct ext2fs *es, if (es->e2fs_rev == E2FS_REV0 || !EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_LARGEFILE)) fs->e2fs_maxfilesize = 0x7fffffff; - else - fs->e2fs_maxfilesize = 0x7fffffffffffffff; - + else { + fs->e2fs_maxfilesize = 0xffffffffffff; + if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_HUGE_FILE)) + fs->e2fs_maxfilesize = 0x7fffffffffffffff; + } if (es->e4fs_flags & E2FS_UNSIGNED_HASH) { fs->e2fs_uhash = 3; } else if ((es->e4fs_flags & E2FS_SIGNED_HASH) == 0) { @@ -961,8 +963,12 @@ ext2_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp) * Now we want to make sure that block pointers for unused * blocks are zeroed out - ext2_balloc depends on this * although for regular files and directories only + * + * If EXT4_EXTENTS flag is enabled, unused blocks aren't + * zeroed out because we could corrupt the extent tree. */ - if(S_ISDIR(ip->i_mode) || S_ISREG(ip->i_mode)) { + if (!(ip->i_flags & EXT4_EXTENTS) && + (S_ISDIR(ip->i_mode) || S_ISREG(ip->i_mode))) { used_blocks = (ip->i_size+fs->e2fs_bsize-1) / fs->e2fs_bsize; for (i = used_blocks; i < EXT2_NDIR_BLOCKS; i++) ip->i_db[i] = 0; diff --git a/sys/fs/ext2fs/ext2_vnops.c b/sys/fs/ext2fs/ext2_vnops.c index c2f8a8fe9f96..e26a31dbe1f5 100644 --- a/sys/fs/ext2fs/ext2_vnops.c +++ b/sys/fs/ext2fs/ext2_vnops.c @@ -84,6 +84,8 @@ static int ext2_makeinode(int mode, struct vnode *, struct vnode **, struct componentname *); static void ext2_itimes_locked(struct vnode *); +static int ext4_ext_read(struct vop_read_args *); +static int ext2_ind_read(struct vop_read_args *); static vop_access_t ext2_access; static int ext2_chmod(struct vnode *, int, struct ucred *, struct thread *); @@ -1327,7 +1329,7 @@ ext2_strategy(struct vop_strategy_args *ap) struct vnode *vp = ap->a_vp; struct inode *ip; struct bufobj *bo; - int32_t blkno; + int64_t blkno; int error; ip = VTOI(vp); @@ -1604,6 +1606,29 @@ bad: */ static int ext2_read(struct vop_read_args *ap) +{ + struct vnode *vp; + struct inode *ip; + int error; + + vp = ap->a_vp; + ip = VTOI(vp); + + /*EXT4_EXT_LOCK(ip);*/ + if (ip->i_flags & EXT4_EXTENTS) + error = ext4_ext_read(ap); + else + error = ext2_ind_read(ap); + /*EXT4_EXT_UNLOCK(ip);*/ + return (error); +} + + +/* + * Vnode op for reading. + */ +static int +ext2_ind_read(struct vop_read_args *ap) { struct vnode *vp; struct inode *ip; @@ -1757,6 +1782,107 @@ ext2_ioctl(struct vop_ioctl_args *ap) } } +/* + * this function handles ext4 extents block mapping + */ +static int +ext4_ext_read(struct vop_read_args *ap) +{ + struct vnode *vp; + struct inode *ip; + struct uio *uio; + struct m_ext2fs *fs; + struct buf *bp; + struct ext4_extent nex, *ep; + struct ext4_extent_path path; + daddr_t lbn, newblk; + off_t bytesinfile; + int cache_type; + ssize_t orig_resid; + int error; + long size, xfersize, blkoffset; + + vp = ap->a_vp; + ip = VTOI(vp); + uio = ap->a_uio; + memset(&path, 0, sizeof(path)); + + orig_resid = uio->uio_resid; + KASSERT(orig_resid >= 0, ("%s: uio->uio_resid < 0", __func__)); + if (orig_resid == 0) + return (0); + KASSERT(uio->uio_offset >= 0, ("%s: uio->uio_offset < 0", __func__)); + fs = ip->i_e2fs; + if (uio->uio_offset < ip->i_size && uio->uio_offset >= fs->e2fs_maxfilesize) + return (EOVERFLOW); + + while (uio->uio_resid > 0) { + if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0) + break; + lbn = lblkno(fs, uio->uio_offset); + size = blksize(fs, ip, lbn); + blkoffset = blkoff(fs, uio->uio_offset); + + xfersize = fs->e2fs_fsize - blkoffset; + xfersize = MIN(xfersize, uio->uio_resid); + xfersize = MIN(xfersize, bytesinfile); + + /* get block from ext4 extent cache */ + cache_type = ext4_ext_in_cache(ip, lbn, &nex); + switch (cache_type) { + case EXT4_EXT_CACHE_NO: + ext4_ext_find_extent(fs, ip, lbn, &path); + ep = path.ep_ext; + if (ep == NULL) + return (EIO); + + ext4_ext_put_cache(ip, ep, EXT4_EXT_CACHE_IN); + + newblk = lbn - ep->e_blk + (ep->e_start_lo | + (daddr_t)ep->e_start_hi << 32); + + if (path.ep_bp != NULL) { + brelse(path.ep_bp); + path.ep_bp = NULL; + } + break; + + case EXT4_EXT_CACHE_GAP: + /* block has not been allocated yet */ + return (0); + + case EXT4_EXT_CACHE_IN: + newblk = lbn - nex.e_blk + (nex.e_start_lo | + (daddr_t)nex.e_start_hi << 32); + break; + + default: + panic("%s: invalid cache type", __func__); + } + + error = bread(ip->i_devvp, fsbtodb(fs, newblk), size, NOCRED, &bp); + if (error) { + brelse(bp); + return (error); + } + + size -= bp->b_resid; + if (size < xfersize) { + if (size == 0) { + bqrelse(bp); + break; + } + xfersize = size; + } + error = uiomove(bp->b_data + blkoffset, (int)xfersize, uio); + bqrelse(bp); + if (error) + return (error); + } + + return (0); +} + /* * Vnode op for writing. */ diff --git a/sys/fs/ext2fs/ext2fs.h b/sys/fs/ext2fs/ext2fs.h index 7b16f0fcd40b..b562287cac52 100644 --- a/sys/fs/ext2fs/ext2fs.h +++ b/sys/fs/ext2fs/ext2fs.h @@ -201,12 +201,18 @@ struct csum { * - EXT2F_ROCOMPAT_SPARSESUPER * - EXT2F_ROCOMPAT_LARGEFILE * - EXT2F_INCOMPAT_FTYPE + * + * We partially (read-only) support the following EXT4 features: + * - EXT2F_ROCOMPAT_HUGE_FILE + * - EXT2F_ROCOMPAT_EXTRA_ISIZE + * - EXT2F_INCOMPAT_EXTENTS */ #define EXT2F_COMPAT_SUPP 0x0000 #define EXT2F_ROCOMPAT_SUPP (EXT2F_ROCOMPAT_SPARSESUPER | \ EXT2F_ROCOMPAT_LARGEFILE | \ EXT2F_ROCOMPAT_EXTRA_ISIZE) -#define EXT2F_INCOMPAT_SUPP EXT2F_INCOMPAT_FTYPE +#define EXT2F_INCOMPAT_SUPP (EXT2F_INCOMPAT_FTYPE | \ + EXT2F_INCOMPAT_EXTENTS) /* Assume that user mode programs are passing in an ext2fs superblock, not * a kernel struct super_block. This will allow us to call the feature-test diff --git a/sys/fs/ext2fs/inode.h b/sys/fs/ext2fs/inode.h index d939987965e9..4ffb2bbb66ab 100644 --- a/sys/fs/ext2fs/inode.h +++ b/sys/fs/ext2fs/inode.h @@ -38,9 +38,13 @@ #ifndef _FS_EXT2FS_INODE_H_ #define _FS_EXT2FS_INODE_H_ +#include <sys/param.h> #include <sys/lock.h> +#include <sys/mutex.h> #include <sys/queue.h> +#include <fs/ext2fs/ext2_extents.h> + /* * This must agree with the definition in <ufs/ufs/dir.h>. */ @@ -86,7 +90,10 @@ struct inode { /* Fields from struct dinode in UFS. */ uint16_t i_mode; /* IFMT, permissions; see below. */ int16_t i_nlink; /* File link count. */ + uint32_t i_uid; /* File owner. */ + uint32_t i_gid; /* File group. */ uint64_t i_size; /* File byte count. */ + uint64_t i_blocks; /* Blocks actually held. */ int32_t i_atime; /* Last access time. */ int32_t i_mtime; /* Last modified time. */ int32_t i_ctime; /* Last inode change time. */ @@ -95,13 +102,12 @@ struct inode { int32_t i_atimensec; /* Last access time. */ int32_t i_ctimensec; /* Last inode change time. */ int32_t i_birthnsec; /* Inode creation time. */ + uint32_t i_gen; /* Generation number. */ + uint32_t i_flags; /* Status flags (chflags). */ uint32_t i_db[NDADDR]; /* Direct disk blocks. */ uint32_t i_ib[NIADDR]; /* Indirect disk blocks. */ - uint32_t i_flags; /* Status flags (chflags). */ - uint32_t i_blocks; /* Blocks actually held. */ - uint32_t i_gen; /* Generation number. */ - uint32_t i_uid; /* File owner. */ - uint32_t i_gid; /* File group. */ + + struct ext4_extent_cache i_ext_cache; /* cache for ext4 extent */ }; /* diff --git a/sys/modules/ext2fs/Makefile b/sys/modules/ext2fs/Makefile index a38a63e245e4..fc10ab0928ba 100644 --- a/sys/modules/ext2fs/Makefile +++ b/sys/modules/ext2fs/Makefile @@ -3,8 +3,8 @@ .PATH: ${.CURDIR}/../../fs/ext2fs KMOD= ext2fs SRCS= opt_ddb.h opt_directio.h opt_quota.h opt_suiddir.h vnode_if.h \ - ext2_alloc.c ext2_balloc.c ext2_bmap.c ext2_hash.c ext2_htree.c \ - ext2_inode.c ext2_inode_cnv.c ext2_lookup.c ext2_subr.c ext2_vfsops.c \ - ext2_vnops.c + ext2_alloc.c ext2_balloc.c ext2_bmap.c ext2_extents.c ext2_hash.c \ + ext2_htree.c ext2_inode.c ext2_inode_cnv.c ext2_lookup.c ext2_subr.c \ + ext2_vfsops.c ext2_vnops.c .include <bsd.kmod.mk>