diff --git a/lib/libufs/Makefile b/lib/libufs/Makefile index c1948b3a0f0f..c80183d3b428 100644 --- a/lib/libufs/Makefile +++ b/lib/libufs/Makefile @@ -5,7 +5,7 @@ LIB= ufs SHLIBDIR?= /lib SHLIB_MAJOR= 6 -SRCS= block.c cgroup.c inode.c sblock.c type.c ffs_subr.c ffs_tables.c +SRCS= block.c cgroup.c crc32.c inode.c sblock.c type.c ffs_subr.c ffs_tables.c INCS= libufs.h MAN= bread.3 cgread.3 libufs.3 sbread.3 ufs_disk_close.3 @@ -18,7 +18,7 @@ MLINKS+= ufs_disk_close.3 ufs_disk_fillout.3 MLINKS+= ufs_disk_close.3 ufs_disk_fillout_blank.3 MLINKS+= ufs_disk_close.3 ufs_disk_write.3 -.PATH: ${SRCTOP}/sys/ufs/ffs +.PATH: ${SRCTOP}/sys/libkern ${SRCTOP}/sys/ufs/ffs WARNS?= 2 diff --git a/lib/libufs/cgroup.c b/lib/libufs/cgroup.c index dfb10d3feb94..622df059bb10 100644 --- a/lib/libufs/cgroup.c +++ b/lib/libufs/cgroup.c @@ -214,6 +214,11 @@ cgwrite1(struct uufsd *disk, int c) struct fs *fs; fs = &disk->d_fs; + if ((fs->fs_metackhash & CK_CYLGRP) != 0) { + disk->d_cg.cg_ckhash = 0; + disk->d_cg.cg_ckhash = + calculate_crc32c(~0L, (void *)&disk->d_cg, fs->fs_cgsize); + } if (bwrite(disk, fsbtodb(fs, cgtod(fs, c)), disk->d_cgunion.d_buf, fs->fs_bsize) == -1) { ERROR(disk, "unable to write cylinder group"); diff --git a/lib/libufs/libufs.h b/lib/libufs/libufs.h index ddc5c9b6b055..4a79207a519d 100644 --- a/lib/libufs/libufs.h +++ b/lib/libufs/libufs.h @@ -144,6 +144,11 @@ int ffs_isblock(struct fs *, u_char *, ufs1_daddr_t); int ffs_isfreeblock(struct fs *, u_char *, ufs1_daddr_t); void ffs_setblock(struct fs *, u_char *, ufs1_daddr_t); +/* + * crc32c.c + */ +uint32_t calculate_crc32c(uint32_t, const void *, size_t); + __END_DECLS #endif /* __LIBUFS_H__ */ diff --git a/sbin/dumpfs/dumpfs.c b/sbin/dumpfs/dumpfs.c index cea9a95ba4ae..e163b4b69747 100644 --- a/sbin/dumpfs/dumpfs.c +++ b/sbin/dumpfs/dumpfs.c @@ -276,6 +276,24 @@ dumpfs(const char *name) if (fsflags != 0) printf("unknown flags (%#x)", fsflags); putchar('\n'); + printf("check hashes\t"); + fsflags = afs.fs_metackhash; + if (fsflags == 0) + printf("none"); + if (fsflags & CK_SUPERBLOCK) + printf("superblock "); + if (fsflags & CK_CYLGRP) + printf("cylinder-groups "); + if (fsflags & CK_INODE) + printf("inodes "); + if (fsflags & CK_INDIR) + printf("indirect-blocks "); + if (fsflags & CK_DIR) + printf("directories "); + fsflags &= ~(CK_SUPERBLOCK | CK_CYLGRP | CK_INODE | CK_INDIR | CK_DIR); + if (fsflags != 0) + printf("unknown flags (%#x)", fsflags); + putchar('\n'); printf("fsmnt\t%s\n", afs.fs_fsmnt); printf("volname\t%s\tswuid\t%ju\tprovidersize\t%ju\n", afs.fs_volname, (uintmax_t)afs.fs_swuid, diff --git a/sbin/fsck_ffs/fsutil.c b/sbin/fsck_ffs/fsutil.c index f4da123b18f9..16d960d155af 100644 --- a/sbin/fsck_ffs/fsutil.c +++ b/sbin/fsck_ffs/fsutil.c @@ -58,6 +58,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include "fsck.h" @@ -349,6 +350,20 @@ flush(int fd, struct bufarea *bp) if (!bp->b_dirty) return; + /* + * Calculate any needed check hashes. + */ + switch (bp->b_type) { + case BT_CYLGRP: + if ((sblock.fs_metackhash & CK_CYLGRP) == 0) + break; + bp->b_un.b_cg->cg_ckhash = 0; + bp->b_un.b_cg->cg_ckhash = + calculate_crc32c(~0L, bp->b_un.b_buf, bp->b_size); + break; + default: + break; + } bp->b_dirty = 0; if (fswritefd < 0) { pfatal("WRITING IN READ_ONLY MODE.\n"); diff --git a/sbin/fsck_ffs/pass5.c b/sbin/fsck_ffs/pass5.c index ef289c4342ac..16c46bece00b 100644 --- a/sbin/fsck_ffs/pass5.c +++ b/sbin/fsck_ffs/pass5.c @@ -71,6 +71,13 @@ pass5(void) inoinfo(UFS_WINO)->ino_state = USTATE; memset(newcg, 0, (size_t)fs->fs_cgsize); newcg->cg_niblk = fs->fs_ipg; + if (preen == 0 && yflag == 0 && fs->fs_magic == FS_UFS2_MAGIC && + fswritefd != -1 && (fs->fs_metackhash & CK_CYLGRP) == 0 && + reply("ADD CYLINDER GROUP CHECKSUM PROTECTION") != 0) { + fs->fs_metackhash |= CK_CYLGRP; + rewritecg = 1; + sbdirty(); + } if (cvtlevel >= 3) { if (fs->fs_maxcontig < 2 && fs->fs_contigsumsize > 0) { if (preen) @@ -305,6 +312,12 @@ pass5(void) sump[run]++; } } + if ((fs->fs_metackhash & CK_CYLGRP) != 0) { + newcg->cg_ckhash = 0; + newcg->cg_ckhash = + calculate_crc32c(~0L, (void *)newcg, fs->fs_cgsize); + } + if (bkgrdflag != 0) { cstotal.cs_nffree += cg->cg_cs.cs_nffree; cstotal.cs_nbfree += cg->cg_cs.cs_nbfree; diff --git a/sbin/fsdb/Makefile b/sbin/fsdb/Makefile index a3e7545c7c5a..96b93cd1690b 100644 --- a/sbin/fsdb/Makefile +++ b/sbin/fsdb/Makefile @@ -10,7 +10,7 @@ SRCS= fsdb.c fsdbutil.c \ pass5.c setup.c utilities.c ffs_subr.c ffs_tables.c globs.c CFLAGS+= -I${.CURDIR:H}/fsck_ffs WARNS?= 2 -LIBADD= edit +LIBADD= edit ufs .PATH: ${.CURDIR:H}/fsck_ffs ${SRCTOP}/sys/ufs/ffs .include diff --git a/sbin/newfs/mkfs.c b/sbin/newfs/mkfs.c index dbfab4536bd6..f68c42ec6b36 100644 --- a/sbin/newfs/mkfs.c +++ b/sbin/newfs/mkfs.c @@ -98,6 +98,7 @@ static void iput(union dinode *, ino_t); static int makedir(struct direct *, int); static void setblock(struct fs *, unsigned char *, int); static void wtfs(ufs2_daddr_t, int, char *); +static void cgckhash(struct cg *); static u_int32_t newfs_random(void); static int @@ -491,6 +492,11 @@ restart: sblock.fs_old_cstotal.cs_nifree = sblock.fs_cstotal.cs_nifree; sblock.fs_old_cstotal.cs_nffree = sblock.fs_cstotal.cs_nffree; } + /* + * Set flags for metadata that is being check-hashed. + */ + if (Oflag > 1) + sblock.fs_metackhash = CK_CYLGRP; /* * Dump out summary information about file system. @@ -791,6 +797,7 @@ initcg(int cylno, time_t utime) } } *cs = acg.cg_cs; + cgckhash(&acg); /* * Write out the duplicate super block, the cylinder group map * and two blocks worth of inodes in a single write. @@ -1006,6 +1013,7 @@ goth: setbit(cg_blksfree(&acg), d + i); } /* XXX cgwrite(&disk, 0)??? */ + cgckhash(&acg); wtfs(fsbtodb(&sblock, cgtod(&sblock, 0)), sblock.fs_cgsize, (char *)&acg); return ((ufs2_daddr_t)d); @@ -1027,6 +1035,7 @@ iput(union dinode *ip, ino_t ino) } acg.cg_cs.cs_nifree--; setbit(cg_inosused(&acg), ino); + cgckhash(&acg); wtfs(fsbtodb(&sblock, cgtod(&sblock, 0)), sblock.fs_cgsize, (char *)&acg); sblock.fs_cstotal.cs_nifree--; @@ -1059,6 +1068,20 @@ wtfs(ufs2_daddr_t bno, int size, char *bf) err(36, "wtfs: %d bytes at sector %jd", size, (intmax_t)bno); } +/* + * Calculate the check-hash of the cylinder group. + */ +static void +cgckhash(cgp) + struct cg *cgp; +{ + + if ((sblock.fs_metackhash & CK_CYLGRP) == 0) + return; + cgp->cg_ckhash = 0; + cgp->cg_ckhash = calculate_crc32c(~0L, (void *)cgp, sblock.fs_cgsize); +} + /* * check if a block is available */ diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index 77490be4d1f1..dd1f9bbd235a 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -119,6 +119,8 @@ static void vfs_vmio_truncate(struct buf *bp, int npages); static void vfs_vmio_extend(struct buf *bp, int npages, int size); static int vfs_bio_clcheck(struct vnode *vp, int size, daddr_t lblkno, daddr_t blkno); +static void breada(struct vnode *, daddr_t *, int *, int, struct ucred *, int, + void (*)(struct buf *)); static int buf_flush(struct vnode *vp, int); static int buf_recycle(bool); static int buf_scan(bool); @@ -1783,15 +1785,14 @@ bufkva_reclaim(vmem_t *vmem, int flags) return; } - /* * Attempt to initiate asynchronous I/O on read-ahead blocks. We must * clear BIO_ERROR and B_INVAL prior to initiating I/O . If B_CACHE is set, * the buffer is valid and we do not have to do anything. */ -void -breada(struct vnode * vp, daddr_t * rablkno, int * rabsize, - int cnt, struct ucred * cred) +static void +breada(struct vnode * vp, daddr_t * rablkno, int * rabsize, int cnt, + struct ucred * cred, int flags, void (*ckhashfunc)(struct buf *)) { struct buf *rabp; int i; @@ -1800,31 +1801,34 @@ breada(struct vnode * vp, daddr_t * rablkno, int * rabsize, if (inmem(vp, *rablkno)) continue; rabp = getblk(vp, *rablkno, *rabsize, 0, 0, 0); - - if ((rabp->b_flags & B_CACHE) == 0) { - if (!TD_IS_IDLETHREAD(curthread)) { -#ifdef RACCT - if (racct_enable) { - PROC_LOCK(curproc); - racct_add_buf(curproc, rabp, 0); - PROC_UNLOCK(curproc); - } -#endif /* RACCT */ - curthread->td_ru.ru_inblock++; - } - rabp->b_flags |= B_ASYNC; - rabp->b_flags &= ~B_INVAL; - rabp->b_ioflags &= ~BIO_ERROR; - rabp->b_iocmd = BIO_READ; - if (rabp->b_rcred == NOCRED && cred != NOCRED) - rabp->b_rcred = crhold(cred); - vfs_busy_pages(rabp, 0); - BUF_KERNPROC(rabp); - rabp->b_iooffset = dbtob(rabp->b_blkno); - bstrategy(rabp); - } else { + if ((rabp->b_flags & B_CACHE) != 0) { brelse(rabp); + continue; } + if (!TD_IS_IDLETHREAD(curthread)) { +#ifdef RACCT + if (racct_enable) { + PROC_LOCK(curproc); + racct_add_buf(curproc, rabp, 0); + PROC_UNLOCK(curproc); + } +#endif /* RACCT */ + curthread->td_ru.ru_inblock++; + } + rabp->b_flags |= B_ASYNC; + rabp->b_flags &= ~B_INVAL; + if ((flags & GB_CKHASH) != 0) { + rabp->b_flags |= B_CKHASH; + rabp->b_ckhashcalc = ckhashfunc; + } + rabp->b_ioflags &= ~BIO_ERROR; + rabp->b_iocmd = BIO_READ; + if (rabp->b_rcred == NOCRED && cred != NOCRED) + rabp->b_rcred = crhold(cred); + vfs_busy_pages(rabp, 0); + BUF_KERNPROC(rabp); + rabp->b_iooffset = dbtob(rabp->b_blkno); + bstrategy(rabp); } } @@ -1840,10 +1844,11 @@ breada(struct vnode * vp, daddr_t * rablkno, int * rabsize, */ int breadn_flags(struct vnode *vp, daddr_t blkno, int size, daddr_t *rablkno, - int *rabsize, int cnt, struct ucred *cred, int flags, struct buf **bpp) + int *rabsize, int cnt, struct ucred *cred, int flags, + void (*ckhashfunc)(struct buf *), struct buf **bpp) { struct buf *bp; - int rv = 0, readwait = 0; + int readwait, rv; CTR3(KTR_BUF, "breadn(%p, %jd, %d)", vp, blkno, size); /* @@ -1853,7 +1858,10 @@ breadn_flags(struct vnode *vp, daddr_t blkno, int size, daddr_t *rablkno, if (bp == NULL) return (EBUSY); - /* if not found in cache, do some I/O */ + /* + * If not found in cache, do some I/O + */ + readwait = 0; if ((bp->b_flags & B_CACHE) == 0) { if (!TD_IS_IDLETHREAD(curthread)) { #ifdef RACCT @@ -1867,6 +1875,10 @@ breadn_flags(struct vnode *vp, daddr_t blkno, int size, daddr_t *rablkno, } bp->b_iocmd = BIO_READ; bp->b_flags &= ~B_INVAL; + if ((flags & GB_CKHASH) != 0) { + bp->b_flags |= B_CKHASH; + bp->b_ckhashcalc = ckhashfunc; + } bp->b_ioflags &= ~BIO_ERROR; if (bp->b_rcred == NOCRED && cred != NOCRED) bp->b_rcred = crhold(cred); @@ -1876,8 +1888,12 @@ breadn_flags(struct vnode *vp, daddr_t blkno, int size, daddr_t *rablkno, ++readwait; } - breada(vp, rablkno, rabsize, cnt, cred); + /* + * Attempt to initiate asynchronous I/O on read-ahead blocks. + */ + breada(vp, rablkno, rabsize, cnt, cred, flags, ckhashfunc); + rv = 0; if (readwait) { rv = bufwait(bp); if (rv != 0) { @@ -4048,6 +4064,10 @@ bufdone(struct buf *bp) runningbufwakeup(bp); if (bp->b_iocmd == BIO_WRITE) dropobj = bp->b_bufobj; + else if ((bp->b_flags & B_CKHASH) != 0) { + KASSERT(buf_mapped(bp), ("biodone: bp %p not mapped", bp)); + (*bp->b_ckhashcalc)(bp); + } /* call optional completion function if requested */ if (bp->b_iodone != NULL) { biodone = bp->b_iodone; diff --git a/sys/libkern/crc32.c b/sys/libkern/crc32.c index 7eaacd454661..149de3de6dee 100644 --- a/sys/libkern/crc32.c +++ b/sys/libkern/crc32.c @@ -46,6 +46,8 @@ __FBSDID("$FreeBSD$"); #include + +#ifdef _KERNEL #include #include @@ -57,6 +59,7 @@ __FBSDID("$FreeBSD$"); #if defined(__aarch64__) #include #endif +#endif /* _KERNEL */ const uint32_t crc32_tab[] = { 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, @@ -759,6 +762,7 @@ calculate_crc32c(uint32_t crc32c, const unsigned char *buffer, unsigned int length) { +#ifdef _KERNEL #if defined(__amd64__) || defined(__i386__) if ((cpu_feature2 & CPUID2_SSE42) != 0) { return (sse42_crc32c(crc32c, buffer, length)); @@ -776,6 +780,7 @@ calculate_crc32c(uint32_t crc32c, return (armv8_crc32c(crc32c, buffer, length)); } else #endif +#endif /* _KERNEL */ if (length < 4) { return (singletable_crc32c(crc32c, buffer, length)); } else { diff --git a/sys/sys/buf.h b/sys/sys/buf.h index 58bd91e34ff9..5d4f0dc6784e 100644 --- a/sys/sys/buf.h +++ b/sys/sys/buf.h @@ -70,7 +70,7 @@ extern struct bio_ops { struct vm_object; struct vm_page; -typedef unsigned char b_xflags_t; +typedef uint32_t b_xflags_t; /* * The buffer header describes an I/O operation in the kernel. @@ -104,6 +104,8 @@ struct buf { off_t b_iooffset; long b_resid; void (*b_iodone)(struct buf *); + void (*b_ckhashcalc)(struct buf *); + uint64_t b_ckhash; /* B_CKHASH requested check-hash */ daddr_t b_blkno; /* Underlying physical block number. */ off_t b_offset; /* Offset into file. */ TAILQ_ENTRY(buf) b_bobufs; /* (V) Buffer's associated vnode. */ @@ -209,7 +211,7 @@ struct buf { #define B_CACHE 0x00000020 /* Bread found us in the cache. */ #define B_VALIDSUSPWRT 0x00000040 /* Valid write during suspension. */ #define B_DELWRI 0x00000080 /* Delay I/O until buffer reused. */ -#define B_00000100 0x00000100 /* Available flag. */ +#define B_CKHASH 0x00000100 /* checksum hash calculated on read */ #define B_DONE 0x00000200 /* I/O completed. */ #define B_EINTR 0x00000400 /* I/O was interrupted */ #define B_NOREUSE 0x00000800 /* Contents not reused once released. */ @@ -242,12 +244,17 @@ struct buf { /* * These flags are kept in b_xflags. + * + * BX_FSPRIV reserves a set of eight flags that may be used by individual + * filesystems for their own purpose. Their specific definitions are + * found in the header files for each filesystem that uses them. */ #define BX_VNDIRTY 0x00000001 /* On vnode dirty list */ #define BX_VNCLEAN 0x00000002 /* On vnode clean list */ #define BX_BKGRDWRITE 0x00000010 /* Do writes in background */ #define BX_BKGRDMARKER 0x00000020 /* Mark buffer for splay tree */ #define BX_ALTDATA 0x00000040 /* Holds extended data */ +#define BX_FSPRIV 0x00FF0000 /* filesystem-specific flags mask */ #define PRINT_BUF_XFLAGS "\20\7altdata\6bkgrdmarker\5bkgrdwrite\2clean\1dirty" @@ -467,6 +474,7 @@ buf_track(struct buf *bp, const char *location) #define GB_NOWAIT_BD 0x0004 /* Do not wait for bufdaemon. */ #define GB_UNMAPPED 0x0008 /* Do not mmap buffer pages. */ #define GB_KVAALLOC 0x0010 /* But allocate KVA. */ +#define GB_CKHASH 0x0020 /* If reading, calc checksum hash */ #ifdef _KERNEL extern int nbuf; /* The number of buffer headers */ @@ -504,15 +512,15 @@ int buf_dirty_count_severe(void); void bremfree(struct buf *); void bremfreef(struct buf *); /* XXX Force bremfree, only for nfs. */ #define bread(vp, blkno, size, cred, bpp) \ - breadn_flags(vp, blkno, size, NULL, NULL, 0, cred, 0, bpp) + breadn_flags(vp, blkno, size, NULL, NULL, 0, cred, 0, NULL, bpp) #define bread_gb(vp, blkno, size, cred, gbflags, bpp) \ breadn_flags(vp, blkno, size, NULL, NULL, 0, cred, \ - gbflags, bpp) + gbflags, NULL, bpp) #define breadn(vp, blkno, size, rablkno, rabsize, cnt, cred, bpp) \ - breadn_flags(vp, blkno, size, rablkno, rabsize, cnt, cred, 0, bpp) + breadn_flags(vp, blkno, size, rablkno, rabsize, cnt, cred, \ + 0, NULL, bpp) int breadn_flags(struct vnode *, daddr_t, int, daddr_t *, int *, int, - struct ucred *, int, struct buf **); -void breada(struct vnode *, daddr_t *, int *, int, struct ucred *); + struct ucred *, int, void (*)(struct buf *), struct buf **); void bdwrite(struct buf *); void bawrite(struct buf *); void babarrierwrite(struct buf *); diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c index f1dbfab28fd0..3f40fca07177 100644 --- a/sys/ufs/ffs/ffs_alloc.c +++ b/sys/ufs/ffs/ffs_alloc.c @@ -123,6 +123,7 @@ static ufs2_daddr_t ffs_nodealloccg(struct inode *, u_int, ufs2_daddr_t, int, static ufs1_daddr_t ffs_mapsearch(struct fs *, struct cg *, ufs2_daddr_t, int); static int ffs_reallocblks_ufs1(struct vop_reallocblks_args *); static int ffs_reallocblks_ufs2(struct vop_reallocblks_args *); +static void ffs_ckhash_cg(struct buf *); /* * Allocate a block in the filesystem. @@ -2596,27 +2597,55 @@ ffs_getcg(fs, devvp, cg, bpp, cgpp) { struct buf *bp; struct cg *cgp; - int error; + int flags, error; *bpp = NULL; *cgpp = NULL; - error = bread(devvp, devvp->v_type == VREG ? + flags = 0; + if ((fs->fs_metackhash & CK_CYLGRP) != 0) + flags |= GB_CKHASH; + error = breadn_flags(devvp, devvp->v_type == VREG ? fragstoblks(fs, cgtod(fs, cg)) : fsbtodb(fs, cgtod(fs, cg)), - (int)fs->fs_cgsize, NOCRED, &bp); + (int)fs->fs_cgsize, NULL, NULL, 0, NOCRED, flags, + ffs_ckhash_cg, &bp); if (error != 0) return (error); cgp = (struct cg *)bp->b_data; - if (!cg_chkmagic(cgp) || cgp->cg_cgx != cg) { + if (((fs->fs_metackhash & CK_CYLGRP) != 0 && + (bp->b_flags & B_CKHASH) != 0 && + cgp->cg_ckhash != bp->b_ckhash) || + !cg_chkmagic(cgp) || cgp->cg_cgx != cg) { + printf("checksum failed: cg %u, cgp: 0x%x != bp: 0x%lx\n", + cg, cgp->cg_ckhash, bp->b_ckhash); + bp->b_flags &= ~B_CKHASH; + bp->b_flags |= B_INVAL | B_NOCACHE; brelse(bp); return (EIO); } + bp->b_flags &= ~B_CKHASH; bp->b_xflags |= BX_BKGRDWRITE; + if ((fs->fs_metackhash & CK_CYLGRP) != 0) + bp->b_xflags |= BX_CYLGRP; cgp->cg_old_time = cgp->cg_time = time_second; *bpp = bp; *cgpp = cgp; return (0); } +static void +ffs_ckhash_cg(bp) + struct buf *bp; +{ + uint32_t ckhash; + struct cg *cgp; + + cgp = (struct cg *)bp->b_data; + ckhash = cgp->cg_ckhash; + cgp->cg_ckhash = 0; + bp->b_ckhash = calculate_crc32c(~0L, bp->b_data, bp->b_bcount); + cgp->cg_ckhash = ckhash; +} + /* * Fserr prints the name of a filesystem with an error diagnostic. * diff --git a/sys/ufs/ffs/ffs_inode.c b/sys/ufs/ffs/ffs_inode.c index 5a2099d4bf28..8d74b4e3ac3d 100644 --- a/sys/ufs/ffs/ffs_inode.c +++ b/sys/ufs/ffs/ffs_inode.c @@ -110,9 +110,9 @@ ffs_update(vp, waitfor) if (IS_SNAPSHOT(ip)) flags = GB_LOCK_NOWAIT; loop: - error = breadn_flags(ITODEVVP(ip), + error = bread_gb(ITODEVVP(ip), fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), - (int) fs->fs_bsize, 0, 0, 0, NOCRED, flags, &bp); + (int) fs->fs_bsize, NOCRED, flags, &bp); if (error != 0) { if (error != EBUSY) return (error); diff --git a/sys/ufs/ffs/ffs_snapshot.c b/sys/ufs/ffs/ffs_snapshot.c index abfe29ba9b99..8ddcb9cbd232 100644 --- a/sys/ufs/ffs/ffs_snapshot.c +++ b/sys/ufs/ffs/ffs_snapshot.c @@ -927,7 +927,7 @@ cgaccount(cg, vp, nbp, passno) error = UFS_BALLOC(vp, lblktosize(fs, (off_t)(base + loc)), fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); if (error) { - return (error); + goto out; } indiroff = (base + loc - UFS_NDADDR) % NINDIR(fs); for ( ; loc < len; loc++, indiroff++) { @@ -939,7 +939,7 @@ cgaccount(cg, vp, nbp, passno) lblktosize(fs, (off_t)(base + loc)), fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); if (error) { - return (error); + goto out; } indiroff = 0; } @@ -967,7 +967,21 @@ cgaccount(cg, vp, nbp, passno) if (passno == 2) ibp->b_flags |= B_VALIDSUSPWRT; bdwrite(ibp); - return (0); +out: + /* + * We have to calculate the crc32c here rather than just setting the + * BX_CYLGRP b_xflags because the allocation of the block for the + * the cylinder group map will always be a full size block (fs_bsize) + * even though the cylinder group may be smaller (fs_cgsize). The + * crc32c must be computed only over fs_cgsize whereas the BX_CYLGRP + * flag causes it to be computed over the size of the buffer. + */ + if ((fs->fs_metackhash & CK_CYLGRP) != 0) { + ((struct cg *)nbp->b_data)->cg_ckhash = 0; + ((struct cg *)nbp->b_data)->cg_ckhash = + calculate_crc32c(~0L, nbp->b_data, fs->fs_cgsize); + } + return (error); } /* diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c index d73d0ca0c466..b9a1ec412448 100644 --- a/sys/ufs/ffs/ffs_vfsops.c +++ b/sys/ufs/ffs/ffs_vfsops.c @@ -838,7 +838,11 @@ ffs_mountfs(devvp, mp, td) goto out; } fs->fs_fmod = 0; - fs->fs_flags &= ~FS_INDEXDIRS; /* no support for directory indices */ + /* none of these types of check-hashes are maintained */ + fs->fs_metackhash &= ~(CK_SUPERBLOCK | CK_INODE | CK_INDIR | CK_DIR); + /* no support for directory indices or any other undefined flags */ + fs->fs_flags &= ~FS_INDEXDIRS; + fs->fs_flags &= FS_SUPPORTED; fs->fs_flags &= ~FS_UNCLEAN; if (fs->fs_clean == 0) { fs->fs_flags |= FS_UNCLEAN; @@ -2150,7 +2154,8 @@ ffs_bufwrite(struct buf *bp) BO_LOCK(bp->b_bufobj); bp->b_vflags |= BV_BKGRDINPROG; BO_UNLOCK(bp->b_bufobj); - newbp->b_xflags |= BX_BKGRDMARKER; + newbp->b_xflags |= + (bp->b_xflags & BX_FSPRIV) | BX_BKGRDMARKER; newbp->b_lblkno = bp->b_lblkno; newbp->b_blkno = bp->b_blkno; newbp->b_offset = bp->b_offset; @@ -2194,9 +2199,8 @@ static void ffs_geom_strategy(struct bufobj *bo, struct buf *bp) { struct vnode *vp; - int error; struct buf *tbp; - int nocopy; + int error, nocopy; vp = bo2vnode(bo); if (bp->b_iocmd == BIO_WRITE) { @@ -2247,6 +2251,32 @@ ffs_geom_strategy(struct bufobj *bo, struct buf *bp) } #endif + /* + * Check for metadata that needs check-hashes and update them. + */ + switch (bp->b_xflags & BX_FSPRIV) { + case BX_CYLGRP: + ((struct cg *)bp->b_data)->cg_ckhash = 0; + ((struct cg *)bp->b_data)->cg_ckhash = + calculate_crc32c(~0L, bp->b_data, bp->b_bcount); + break; + + case BX_SUPERBLOCK: + case BX_INODE: + case BX_INDIR: + case BX_DIR: + printf("Check-hash write is unimplemented!!!\n"); + break; + + case 0: + break; + + default: + printf("multiple buffer types 0x%b\n", + (u_int)(bp->b_xflags & BX_FSPRIV), + PRINT_UFS_BUF_XFLAGS); + break; + } } g_vfs_strategy(bo, bp); } diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c index f8a186d7eed4..5ee9ff5fc38d 100644 --- a/sys/ufs/ffs/ffs_vnops.c +++ b/sys/ufs/ffs/ffs_vnops.c @@ -587,7 +587,7 @@ ffs_read(ap) */ u_int nextsize = blksize(fs, ip, nextlbn); error = breadn_flags(vp, lbn, size, &nextlbn, - &nextsize, 1, NOCRED, GB_UNMAPPED, &bp); + &nextsize, 1, NOCRED, GB_UNMAPPED, NULL, &bp); } else { /* * Failing all of the above, just read what the diff --git a/sys/ufs/ffs/fs.h b/sys/ufs/ffs/fs.h index 81fbeda84c1e..8315b3f359f3 100644 --- a/sys/ufs/ffs/fs.h +++ b/sys/ufs/ffs/fs.h @@ -361,7 +361,8 @@ struct fs { int32_t fs_save_cgsize; /* save real cg size to use fs_bsize */ ufs_time_t fs_mtime; /* Last mount or fsck time. */ int32_t fs_sujfree; /* SUJ free list */ - int32_t fs_sparecon32[23]; /* reserved for future constants */ + int32_t fs_sparecon32[22]; /* reserved for future constants */ + u_int32_t fs_metackhash; /* metadata check-hash, see CK_ below */ int32_t fs_flags; /* see FS_ flags below */ int32_t fs_contigsumsize; /* size of cluster summary array */ int32_t fs_maxsymlinklen; /* max length of an internal symlink */ @@ -388,7 +389,6 @@ CTASSERT(sizeof(struct fs) == 1376); #define FS_UFS1_MAGIC 0x011954 /* UFS1 fast filesystem magic number */ #define FS_UFS2_MAGIC 0x19540119 /* UFS2 fast filesystem magic number */ #define FS_BAD_MAGIC 0x19960408 /* UFS incomplete newfs magic number */ -#define FS_OKAY 0x7c269d38 /* superblock checksum */ #define FS_42INODEFMT -1 /* 4.2BSD inode format */ #define FS_44INODEFMT 2 /* 4.4BSD inode format */ @@ -415,7 +415,11 @@ CTASSERT(sizeof(struct fs) == 1376); * on-disk auxiliary indexes (such as B-trees) for speeding directory * accesses. Kernels that do not support auxiliary indices clear the * flag to indicate that the indices need to be rebuilt (by fsck) before - * they can be used. + * they can be used. When a filesystem is mounted, any flags not + * included in FS_SUPPORTED are cleared. This lets newer features + * know that the filesystem has been run on an older version of the + * filesystem and thus that data structures associated with those + * features are out-of-date and need to be rebuilt. * * FS_ACLS indicates that POSIX.1e ACLs are administratively enabled * for the file system, so they should be loaded from extended attributes, @@ -437,6 +441,28 @@ CTASSERT(sizeof(struct fs) == 1376); #define FS_NFS4ACLS 0x0100 /* file system has NFSv4 ACLs enabled */ #define FS_INDEXDIRS 0x0200 /* kernel supports indexed directories */ #define FS_TRIM 0x0400 /* issue BIO_DELETE for deleted blocks */ +#define FS_SUPPORTED 0xFFFF /* supported flags, others cleared at mount */ + +/* + * The fs_metackhash field indicates the types of metadata check-hash + * that are maintained for a filesystem. Not all filesystems check-hash + * all metadata. + */ +#define CK_SUPERBLOCK 0x0001 /* the superblock */ +#define CK_CYLGRP 0x0002 /* the cylinder groups */ +#define CK_INODE 0x0004 /* inodes */ +#define CK_INDIR 0x0008 /* indirect blocks */ +#define CK_DIR 0x0010 /* directory contents */ +/* + * The BX_FSPRIV buffer b_xflags are used to track types of data in buffers. + */ +#define BX_SUPERBLOCK 0x00010000 /* superblock */ +#define BX_CYLGRP 0x00020000 /* cylinder groups */ +#define BX_INODE 0x00040000 /* inodes */ +#define BX_INDIR 0x00080000 /* indirect blocks */ +#define BX_DIR 0x00100000 /* directory contents */ + +#define PRINT_UFS_BUF_XFLAGS "\20\25dir\24indir\23inode\22cylgrp\21superblock" /* * Macros to access bits in the fs_active array. @@ -506,7 +532,8 @@ struct cg { u_int32_t cg_niblk; /* number of inode blocks this cg */ u_int32_t cg_initediblk; /* last initialized inode */ u_int32_t cg_unrefs; /* number of unreferenced inodes */ - int32_t cg_sparecon32[2]; /* reserved for future use */ + int32_t cg_sparecon32[1]; /* reserved for future use */ + u_int32_t cg_ckhash; /* check-hash of this cg */ ufs_time_t cg_time; /* time last written */ int64_t cg_sparecon64[3]; /* reserved for future use */ u_int8_t cg_space[1]; /* space for cylinder group maps */