From b1897c197c06ebd09ab26a462489bd331c96ce2e Mon Sep 17 00:00:00 2001 From: Julian Elischer Date: Sun, 8 Mar 1998 09:59:44 +0000 Subject: [PATCH] Reviewed by: dyson@freebsd.org (john Dyson), dg@root.com (david greenman) Submitted by: Kirk McKusick (mcKusick@mckusick.com) Obtained from: WHistle development tree --- sbin/fsck/dir.c | 9 +- sbin/fsck/fsck.h | 2 + sbin/fsck/inode.c | 11 + sbin/fsck/main.c | 22 +- sbin/fsck/pass1.c | 16 +- sbin/fsck/pass2.c | 31 ++- sbin/fsck/pass5.c | 60 +++-- sbin/fsck/setup.c | 8 +- sbin/fsck/utilities.c | 41 +++- sbin/fsck_ffs/dir.c | 9 +- sbin/fsck_ffs/fsck.h | 2 + sbin/fsck_ffs/inode.c | 11 + sbin/fsck_ffs/main.c | 22 +- sbin/fsck_ffs/pass1.c | 16 +- sbin/fsck_ffs/pass2.c | 31 ++- sbin/fsck_ffs/pass5.c | 60 +++-- sbin/fsck_ffs/setup.c | 8 +- sbin/fsck_ffs/utilities.c | 41 +++- sbin/fsck_ifs/dir.c | 9 +- sbin/fsck_ifs/fsck.h | 2 + sbin/fsck_ifs/inode.c | 11 + sbin/fsck_ifs/main.c | 22 +- sbin/fsck_ifs/pass1.c | 16 +- sbin/fsck_ifs/pass2.c | 31 ++- sbin/fsck_ifs/pass5.c | 60 +++-- sbin/fsck_ifs/setup.c | 8 +- sbin/fsck_ifs/utilities.c | 41 +++- sbin/mount/mount.c | 8 +- sbin/mount_ifs/mount.c | 8 +- sbin/tunefs/tunefs.8 | 1 + sbin/tunefs/tunefs.c | 23 +- sys/conf/NOTES | 9 +- sys/conf/files | 2 + sys/conf/options | 8 +- sys/dev/de/if_de.c | 4 +- sys/fs/cd9660/cd9660_vfsops.c | 11 +- sys/fs/msdosfs/msdosfs_vfsops.c | 19 +- sys/fs/specfs/spec_vnops.c | 10 +- sys/gnu/ext2fs/inode.h | 11 +- sys/gnu/fs/ext2fs/inode.h | 11 +- sys/i386/conf/LINT | 9 +- sys/i386/conf/NOTES | 9 +- sys/isofs/cd9660/cd9660_vfsops.c | 11 +- sys/kern/kern_malloc.c | 6 +- sys/kern/kern_shutdown.c | 14 +- sys/kern/kern_synch.c | 4 +- sys/kern/vfs_bio.c | 78 ++++++- sys/kern/vfs_cluster.c | 11 +- sys/kern/vfs_export.c | 370 ++++++++++++++++++++++++++++++- sys/kern/vfs_extattr.c | 29 ++- sys/kern/vfs_subr.c | 370 ++++++++++++++++++++++++++++++- sys/kern/vfs_syscalls.c | 29 ++- sys/kern/vnode_if.src | 14 +- sys/miscfs/specfs/spec_vnops.c | 10 +- sys/miscfs/specfs/specdev.h | 11 +- sys/msdosfs/msdosfs_vfsops.c | 19 +- sys/nfs/nfs_bio.c | 6 +- sys/nfs/nfs_vnops.c | 9 +- sys/nfsclient/nfs_bio.c | 6 +- sys/nfsclient/nfs_vnops.c | 9 +- sys/pci/if_de.c | 4 +- sys/sys/bio.h | 22 +- sys/sys/buf.h | 22 +- sys/sys/malloc.h | 8 +- sys/sys/mount.h | 15 +- sys/sys/vnode.h | 12 +- sys/ufs/ffs/ffs_alloc.c | 93 ++++++-- sys/ufs/ffs/ffs_balloc.c | 92 +++++--- sys/ufs/ffs/ffs_extern.h | 34 ++- sys/ufs/ffs/ffs_inode.c | 56 ++++- sys/ufs/ffs/ffs_subr.c | 26 ++- sys/ufs/ffs/ffs_vfsops.c | 85 +++++-- sys/ufs/ffs/ffs_vnops.c | 97 +++++--- sys/ufs/ffs/fs.h | 16 +- sys/ufs/ufs/inode.h | 11 +- sys/ufs/ufs/ufs_extern.h | 27 ++- sys/ufs/ufs/ufs_lookup.c | 265 ++++++++++++++-------- sys/ufs/ufs/ufs_quota.c | 8 +- sys/ufs/ufs/ufs_readwrite.c | 10 +- sys/ufs/ufs/ufs_vnops.c | 340 ++++++++++++++-------------- 80 files changed, 2349 insertions(+), 643 deletions(-) diff --git a/sbin/fsck/dir.c b/sbin/fsck/dir.c index 4b6999b013a9..6ab67d33ca32 100644 --- a/sbin/fsck/dir.c +++ b/sbin/fsck/dir.c @@ -315,12 +315,13 @@ adjust(idesc, lcnt) pinode(idesc->id_number); printf(" COUNT %d SHOULD BE %d", dp->di_nlink, dp->di_nlink - lcnt); - if (preen) { + if (preen || usedsoftdep) { if (lcnt < 0) { printf("\n"); pfatal("LINK COUNT INCREASING"); } - printf(" (ADJUSTED)\n"); + if (preen) + printf(" (ADJUSTED)\n"); } if (preen || reply("ADJUST") == 1) { dp->di_nlink -= lcnt; @@ -406,13 +407,15 @@ linkup(orphan, parentdir) lostdir = (dp->di_mode & IFMT) == IFDIR; pwarn("UNREF %s ", lostdir ? "DIR" : "FILE"); pinode(orphan); - if (preen && dp->di_size == 0) + if ((preen || usedsoftdep) && dp->di_size == 0) return (0); if (preen) printf(" (RECONNECTED)\n"); else if (reply("RECONNECT") == 0) return (0); + if (parentdir != 0) + lncntp[parentdir]++; if (lfdir == 0) { dp = ginode(ROOTINO); idesc.id_name = lfname; diff --git a/sbin/fsck/fsck.h b/sbin/fsck/fsck.h index 1967691e989c..4e0271d815dd 100644 --- a/sbin/fsck/fsck.h +++ b/sbin/fsck/fsck.h @@ -176,6 +176,8 @@ int cvtlevel; /* convert to newer file system format */ int doinglevel1; /* converting to new cylinder group format */ int doinglevel2; /* converting to new inode format */ int newinofmt; /* filesystem has new inode format */ +char usedsoftdep; /* just fix soft dependency inconsistencies */ +char resolved; /* cleared if unresolved changes => not clean */ char preen; /* just fix normal inconsistencies */ char hotroot; /* checking root device */ char havesb; /* superblock has been read */ diff --git a/sbin/fsck/inode.c b/sbin/fsck/inode.c index 429dd3b2267b..74561c8eb1b4 100644 --- a/sbin/fsck/inode.c +++ b/sbin/fsck/inode.c @@ -559,6 +559,8 @@ allocino(request, type) { register ino_t ino; register struct dinode *dp; + struct cg *cgp = &cgrp; + int cg; if (request == 0) request = ROOTINO; @@ -569,9 +571,16 @@ allocino(request, type) break; if (ino == maxino) return (0); + cg = ino_to_cg(&sblock, ino); + getblk(&cgblk, cgtod(&sblock, cg), sblock.fs_cgsize); + if (!cg_chkmagic(cgp)) + pfatal("CG %d: BAD MAGIC NUMBER\n", cg); + setbit(cg_inosused(cgp), ino % sblock.fs_ipg); + cgp->cg_cs.cs_nifree--; switch (type & IFMT) { case IFDIR: statemap[ino] = DSTATE; + cgp->cg_cs.cs_ndir++; break; case IFREG: case IFLNK: @@ -580,12 +589,14 @@ allocino(request, type) default: return (0); } + cgdirty(); dp = ginode(ino); dp->di_db[0] = allocblk((long)1); if (dp->di_db[0] == 0) { statemap[ino] = USTATE; return (0); } + dp->di_flags = 0; dp->di_mode = type; dp->di_atime = time(NULL); dp->di_mtime = dp->di_ctime = dp->di_atime; diff --git a/sbin/fsck/main.c b/sbin/fsck/main.c index dcb7006125cb..b4bc2c9caaaf 100644 --- a/sbin/fsck/main.c +++ b/sbin/fsck/main.c @@ -42,7 +42,7 @@ static const char copyright[] = static char sccsid[] = "@(#)main.c 8.6 (Berkeley) 5/14/95"; #endif static const char rcsid[] = - "$Id$"; + "$Id: main.c,v 1.12 1997/12/20 22:24:32 bde Exp $"; #endif /* not lint */ #include @@ -209,6 +209,11 @@ checkfilesys(filesys, mntpt, auxdata, child) return (0); } + /* + * Cleared if any questions answered no. Used to decide if + * the superblock should be marked clean. + */ + resolved = 1; /* * 1: scan inodes tallying blocks used */ @@ -224,7 +229,7 @@ checkfilesys(filesys, mntpt, auxdata, child) * 1b: locate first references to duplicates, if any */ if (duplist) { - if (preen) + if (preen || usedsoftdep) pfatal("INTERNAL ERROR: dups with -p"); printf("** Phase 1b - Rescan For More DUPS\n"); pass1b(); @@ -306,19 +311,20 @@ checkfilesys(filesys, mntpt, auxdata, child) bwrite(fswritefd, (char *)&sblock, fsbtodb(&sblock, cgsblock(&sblock, cylno)), SBSIZE); } - if (!hotroot) { - ckfini(1); - } else { + if (rerun) + resolved = 0; + flags = 0; + if (hotroot) { struct statfs stfs_buf; /* * Check to see if root is mounted read-write. */ if (statfs("/", &stfs_buf) == 0) flags = stfs_buf.f_flags; - else - flags = 0; - ckfini(flags & MNT_RDONLY); + if ((flags & MNT_RDONLY) == 0) + resolved = 0; } + ckfini(resolved); free(blockmap); free(statemap); free((char *)lncntp); diff --git a/sbin/fsck/pass1.c b/sbin/fsck/pass1.c index 99582777186c..181f858184bc 100644 --- a/sbin/fsck/pass1.c +++ b/sbin/fsck/pass1.c @@ -200,8 +200,10 @@ checkinode(inumber, idesc) zlnp = (struct zlncnt *)malloc(sizeof *zlnp); if (zlnp == NULL) { pfatal("LINK COUNT TABLE OVERFLOW"); - if (reply("CONTINUE") == 0) + if (reply("CONTINUE") == 0) { + ckfini(0); exit(EEXIT); + } } else { zlnp->zlncnt = inumber; zlnp->next = zlnhead; @@ -270,8 +272,10 @@ pass1check(idesc) idesc->id_number); if (preen) printf(" (SKIPPING)\n"); - else if (reply("CONTINUE") == 0) + else if (reply("CONTINUE") == 0) { + ckfini(0); exit(EEXIT); + } return (STOP); } } @@ -288,15 +292,19 @@ pass1check(idesc) idesc->id_number); if (preen) printf(" (SKIPPING)\n"); - else if (reply("CONTINUE") == 0) + else if (reply("CONTINUE") == 0) { + ckfini(0); exit(EEXIT); + } return (STOP); } new = (struct dups *)malloc(sizeof(struct dups)); if (new == NULL) { pfatal("DUP TABLE OVERFLOW."); - if (reply("CONTINUE") == 0) + if (reply("CONTINUE") == 0) { + ckfini(0); exit(EEXIT); + } return (STOP); } new->dup = blkno; diff --git a/sbin/fsck/pass2.c b/sbin/fsck/pass2.c index 445f6f1682b9..ebc33b8a650f 100644 --- a/sbin/fsck/pass2.c +++ b/sbin/fsck/pass2.c @@ -66,8 +66,10 @@ pass2() case USTATE: pfatal("ROOT INODE UNALLOCATED"); - if (reply("ALLOCATE") == 0) + if (reply("ALLOCATE") == 0) { + ckfini(0); exit(EEXIT); + } if (allocdir(ROOTINO, ROOTINO, 0755) != ROOTINO) errx(EEXIT, "CANNOT ALLOCATE ROOT INODE"); break; @@ -80,8 +82,10 @@ pass2() errx(EEXIT, "CANNOT ALLOCATE ROOT INODE"); break; } - if (reply("CONTINUE") == 0) + if (reply("CONTINUE") == 0) { + ckfini(0); exit(EEXIT); + } break; case FSTATE: @@ -93,8 +97,10 @@ pass2() errx(EEXIT, "CANNOT ALLOCATE ROOT INODE"); break; } - if (reply("FIX") == 0) + if (reply("FIX") == 0) { + ckfini(0); exit(EEXIT); + } dp = ginode(ROOTINO); dp->di_mode &= ~IFMT; dp->di_mode |= IFDIR; @@ -139,8 +145,14 @@ pass2() } } else if ((inp->i_isize & (DIRBLKSIZ - 1)) != 0) { getpathname(pathbuf, inp->i_number, inp->i_number); - pwarn("DIRECTORY %s: LENGTH %d NOT MULTIPLE OF %d", - pathbuf, inp->i_isize, DIRBLKSIZ); + if (usedsoftdep) + pfatal("%s %s: LENGTH %d NOT MULTIPLE OF %d", + "DIRECTORY", pathbuf, inp->i_isize, + DIRBLKSIZ); + else + pwarn("%s %s: LENGTH %d NOT MULTIPLE OF %d", + "DIRECTORY", pathbuf, inp->i_isize, + DIRBLKSIZ); if (preen) printf(" (ADJUSTED)\n"); inp->i_isize = roundup(inp->i_isize, DIRBLKSIZ); @@ -394,7 +406,7 @@ pass2check(idesc) break; if (statemap[dirp->d_ino] == FCLEAR) errmsg = "DUP/BAD"; - else if (!preen) + else if (!preen && !usedsoftdep) errmsg = "ZERO LENGTH DIRECTORY"; else { n = 1; @@ -423,8 +435,11 @@ pass2check(idesc) pwarn("%s %s %s\n", pathbuf, "IS AN EXTRANEOUS HARD LINK TO DIRECTORY", namebuf); - if (preen) - printf(" (IGNORED)\n"); + if (preen) { + printf(" (REMOVED)\n"); + n = 1; + break; + } else if ((n = reply("REMOVE")) == 1) break; } diff --git a/sbin/fsck/pass5.c b/sbin/fsck/pass5.c index 3dd0c1aac237..873f008b8c78 100644 --- a/sbin/fsck/pass5.c +++ b/sbin/fsck/pass5.c @@ -50,11 +50,12 @@ void pass5() { int c, blk, frags, basesize, sumsize, mapsize, savednrpos; + int inomapsize, blkmapsize; struct fs *fs = &sblock; struct cg *cg = &cgrp; ufs_daddr_t dbase, dmax; ufs_daddr_t d; - long i, j; + long i, j, k; struct csum *cs; struct csum cstotal; struct inodesc idesc[3]; @@ -112,6 +113,8 @@ pass5() sumsize = &ocg->cg_iused[0] - (u_int8_t *)(&ocg->cg_btot[0]); mapsize = &ocg->cg_free[howmany(fs->fs_fpg, NBBY)] - (u_char *)&ocg->cg_iused[0]; + blkmapsize = howmany(fs->fs_fpg, NBBY); + inomapsize = &ocg->cg_free[0] - (u_char *)&ocg->cg_iused[0]; ocg->cg_magic = CG_MAGIC; savednrpos = fs->fs_nrpos; fs->fs_nrpos = 8; @@ -126,12 +129,12 @@ pass5() fs->fs_cpg * fs->fs_nrpos * sizeof(short); newcg->cg_freeoff = newcg->cg_iusedoff + howmany(fs->fs_ipg, NBBY); - if (fs->fs_contigsumsize <= 0) { - newcg->cg_nextfreeoff = newcg->cg_freeoff + - howmany(fs->fs_cpg * fs->fs_spc / NSPF(fs), NBBY); - } else { - newcg->cg_clustersumoff = newcg->cg_freeoff + - howmany(fs->fs_cpg * fs->fs_spc / NSPF(fs), NBBY) - + inomapsize = newcg->cg_freeoff - newcg->cg_iusedoff; + newcg->cg_nextfreeoff = newcg->cg_freeoff + + howmany(fs->fs_cpg * fs->fs_spc / NSPF(fs), NBBY); + blkmapsize = newcg->cg_nextfreeoff - newcg->cg_freeoff; + if (fs->fs_contigsumsize > 0) { + newcg->cg_clustersumoff = newcg->cg_nextfreeoff - sizeof(long); newcg->cg_clustersumoff = roundup(newcg->cg_clustersumoff, sizeof(long)); @@ -148,7 +151,7 @@ pass5() break; default: - sumsize = 0; /* keep lint happy */ + inomapsize = blkmapsize = sumsize = 0; /* keep lint happy */ errx(EEXIT, "UNKNOWN ROTATIONAL TABLE FORMAT %d", fs->fs_postblformat); } @@ -299,13 +302,6 @@ pass5() cgdirty(); continue; } - if (memcmp(cg_inosused(newcg), - cg_inosused(cg), mapsize) != 0 && - dofix(&idesc[1], "BLK(S) MISSING IN BIT MAPS")) { - memmove(cg_inosused(cg), cg_inosused(newcg), - (size_t)mapsize); - cgdirty(); - } if ((memcmp(newcg, cg, basesize) != 0 || memcmp(&cg_blktot(newcg)[0], &cg_blktot(cg)[0], sumsize) != 0) && @@ -315,6 +311,40 @@ pass5() &cg_blktot(newcg)[0], (size_t)sumsize); cgdirty(); } + if (usedsoftdep) { + for (i = 0; i < inomapsize; i++) { + j = cg_inosused(newcg)[i]; + if ((cg_inosused(cg)[i] & j) == j) + continue; + for (k = 0; k < NBBY; k++) { + if ((j & (1 << k)) == 0) + continue; + if (cg_inosused(cg)[i] & (1 << k)) + continue; + pwarn("ALLOCATED INODE %d MARKED FREE", + c * fs->fs_ipg + i * 8 + k); + } + } + for (i = 0; i < blkmapsize; i++) { + j = cg_blksfree(cg)[i]; + if ((cg_blksfree(newcg)[i] & j) == j) + continue; + for (k = 0; k < NBBY; k++) { + if ((j & (1 << k)) == 0) + continue; + if (cg_inosused(cg)[i] & (1 << k)) + continue; + pwarn("ALLOCATED FRAG %d MARKED FREE", + c * fs->fs_fpg + i * 8 + k); + } + } + } + if (memcmp(cg_inosused(newcg), cg_inosused(cg), mapsize) != 0 && + dofix(&idesc[1], "BLK(S) MISSING IN BIT MAPS")) { + memmove(cg_inosused(cg), cg_inosused(newcg), + (size_t)mapsize); + cgdirty(); + } } if (fs->fs_postblformat == FS_42POSTBLFMT) fs->fs_nrpos = savednrpos; diff --git a/sbin/fsck/setup.c b/sbin/fsck/setup.c index 28e7e4b4a46e..f464b63e53ba 100644 --- a/sbin/fsck/setup.c +++ b/sbin/fsck/setup.c @@ -255,8 +255,10 @@ setup(dev) fsbtodb(&sblock, sblock.fs_csaddr + j * sblock.fs_frag), size) != 0 && !asked) { pfatal("BAD SUMMARY INFORMATION"); - if (reply("CONTINUE") == 0) + if (reply("CONTINUE") == 0) { + ckfini(0); exit(EEXIT); + } asked++; } } @@ -311,6 +313,10 @@ setup(dev) goto badsb; } bufinit(); + if (sblock.fs_flags & FS_DOSOFTDEP) + usedsoftdep = 1; + else + usedsoftdep = 0; return (1); badsb: diff --git a/sbin/fsck/utilities.c b/sbin/fsck/utilities.c index 30c31cfeedb1..465fb3be3884 100644 --- a/sbin/fsck/utilities.c +++ b/sbin/fsck/utilities.c @@ -87,6 +87,7 @@ reply(question) printf("\n"); if (!persevere && (nflag || fswritefd < 0)) { printf("%s? no\n\n", question); + resolved = 0; return (0); } if (yflag || (persevere && nflag)) { @@ -97,13 +98,17 @@ reply(question) printf("%s? [yn] ", question); (void) fflush(stdout); c = getc(stdin); - while (c != '\n' && getc(stdin) != '\n') - if (feof(stdin)) + while (c != '\n' && getc(stdin) != '\n') { + if (feof(stdin)) { + resolved = 0; return (0); + } + } } while (c != 'y' && c != 'Y' && c != 'n' && c != 'N'); printf("\n"); if (c == 'y' || c == 'Y') return (1); + resolved = 0; return (0); } @@ -360,7 +365,8 @@ ufs_daddr_t allocblk(frags) long frags; { - register int i, j, k; + int i, j, k, cg, baseblk; + struct cg *cgp = &cgrp; if (frags <= 0 || frags > sblock.fs_frag) return (0); @@ -375,9 +381,21 @@ allocblk(frags) j += k; continue; } - for (k = 0; k < frags; k++) + cg = dtog(&sblock, i + j); + getblk(&cgblk, cgtod(&sblock, cg), sblock.fs_cgsize); + if (!cg_chkmagic(cgp)) + pfatal("CG %d: BAD MAGIC NUMBER\n", cg); + baseblk = dtogd(&sblock, i + j); + for (k = 0; k < frags; k++) { setbmap(i + j + k); + clrbit(cg_blksfree(cgp), baseblk + k); + } n_blks += frags; + if (frags == sblock.fs_frag) + cgp->cg_cs.cs_nbfree--; + else + cgp->cg_cs.cs_nffree -= frags; + cgdirty(); return (i + j); } } @@ -545,7 +563,8 @@ dofix(idesc, msg) /* * An unexpected inconsistency occured. - * Die if preening, otherwise just print message and continue. + * Die if preening or filesystem is running with soft dependency protocol, + * otherwise just print message and continue. */ void #if __STDC__ @@ -565,19 +584,23 @@ pfatal(fmt, va_alist) if (!preen) { (void)vfprintf(stderr, fmt, ap); va_end(ap); + if (usedsoftdep) + (void)fprintf(stderr, + "\nUNEXPECTED SOFTDEP INCONSISTENCY\n"); return; } (void)fprintf(stderr, "%s: ", cdevname); (void)vfprintf(stderr, fmt, ap); (void)fprintf(stderr, - "\n%s: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY.\n", - cdevname); + "\n%s: UNEXPECTED%sINCONSISTENCY; RUN fsck MANUALLY.\n", + cdevname, usedsoftdep ? " SOFTDEP " : " "); + ckfini(0); exit(EEXIT); } /* - * Pwarn just prints a message when not preening, - * or a warning (preceded by filename) when preening. + * Pwarn just prints a message when not preening or running soft dependency + * protocol, or a warning (preceded by filename) when preening. */ void #if __STDC__ diff --git a/sbin/fsck_ffs/dir.c b/sbin/fsck_ffs/dir.c index 4b6999b013a9..6ab67d33ca32 100644 --- a/sbin/fsck_ffs/dir.c +++ b/sbin/fsck_ffs/dir.c @@ -315,12 +315,13 @@ adjust(idesc, lcnt) pinode(idesc->id_number); printf(" COUNT %d SHOULD BE %d", dp->di_nlink, dp->di_nlink - lcnt); - if (preen) { + if (preen || usedsoftdep) { if (lcnt < 0) { printf("\n"); pfatal("LINK COUNT INCREASING"); } - printf(" (ADJUSTED)\n"); + if (preen) + printf(" (ADJUSTED)\n"); } if (preen || reply("ADJUST") == 1) { dp->di_nlink -= lcnt; @@ -406,13 +407,15 @@ linkup(orphan, parentdir) lostdir = (dp->di_mode & IFMT) == IFDIR; pwarn("UNREF %s ", lostdir ? "DIR" : "FILE"); pinode(orphan); - if (preen && dp->di_size == 0) + if ((preen || usedsoftdep) && dp->di_size == 0) return (0); if (preen) printf(" (RECONNECTED)\n"); else if (reply("RECONNECT") == 0) return (0); + if (parentdir != 0) + lncntp[parentdir]++; if (lfdir == 0) { dp = ginode(ROOTINO); idesc.id_name = lfname; diff --git a/sbin/fsck_ffs/fsck.h b/sbin/fsck_ffs/fsck.h index 1967691e989c..4e0271d815dd 100644 --- a/sbin/fsck_ffs/fsck.h +++ b/sbin/fsck_ffs/fsck.h @@ -176,6 +176,8 @@ int cvtlevel; /* convert to newer file system format */ int doinglevel1; /* converting to new cylinder group format */ int doinglevel2; /* converting to new inode format */ int newinofmt; /* filesystem has new inode format */ +char usedsoftdep; /* just fix soft dependency inconsistencies */ +char resolved; /* cleared if unresolved changes => not clean */ char preen; /* just fix normal inconsistencies */ char hotroot; /* checking root device */ char havesb; /* superblock has been read */ diff --git a/sbin/fsck_ffs/inode.c b/sbin/fsck_ffs/inode.c index 429dd3b2267b..74561c8eb1b4 100644 --- a/sbin/fsck_ffs/inode.c +++ b/sbin/fsck_ffs/inode.c @@ -559,6 +559,8 @@ allocino(request, type) { register ino_t ino; register struct dinode *dp; + struct cg *cgp = &cgrp; + int cg; if (request == 0) request = ROOTINO; @@ -569,9 +571,16 @@ allocino(request, type) break; if (ino == maxino) return (0); + cg = ino_to_cg(&sblock, ino); + getblk(&cgblk, cgtod(&sblock, cg), sblock.fs_cgsize); + if (!cg_chkmagic(cgp)) + pfatal("CG %d: BAD MAGIC NUMBER\n", cg); + setbit(cg_inosused(cgp), ino % sblock.fs_ipg); + cgp->cg_cs.cs_nifree--; switch (type & IFMT) { case IFDIR: statemap[ino] = DSTATE; + cgp->cg_cs.cs_ndir++; break; case IFREG: case IFLNK: @@ -580,12 +589,14 @@ allocino(request, type) default: return (0); } + cgdirty(); dp = ginode(ino); dp->di_db[0] = allocblk((long)1); if (dp->di_db[0] == 0) { statemap[ino] = USTATE; return (0); } + dp->di_flags = 0; dp->di_mode = type; dp->di_atime = time(NULL); dp->di_mtime = dp->di_ctime = dp->di_atime; diff --git a/sbin/fsck_ffs/main.c b/sbin/fsck_ffs/main.c index dcb7006125cb..b4bc2c9caaaf 100644 --- a/sbin/fsck_ffs/main.c +++ b/sbin/fsck_ffs/main.c @@ -42,7 +42,7 @@ static const char copyright[] = static char sccsid[] = "@(#)main.c 8.6 (Berkeley) 5/14/95"; #endif static const char rcsid[] = - "$Id$"; + "$Id: main.c,v 1.12 1997/12/20 22:24:32 bde Exp $"; #endif /* not lint */ #include @@ -209,6 +209,11 @@ checkfilesys(filesys, mntpt, auxdata, child) return (0); } + /* + * Cleared if any questions answered no. Used to decide if + * the superblock should be marked clean. + */ + resolved = 1; /* * 1: scan inodes tallying blocks used */ @@ -224,7 +229,7 @@ checkfilesys(filesys, mntpt, auxdata, child) * 1b: locate first references to duplicates, if any */ if (duplist) { - if (preen) + if (preen || usedsoftdep) pfatal("INTERNAL ERROR: dups with -p"); printf("** Phase 1b - Rescan For More DUPS\n"); pass1b(); @@ -306,19 +311,20 @@ checkfilesys(filesys, mntpt, auxdata, child) bwrite(fswritefd, (char *)&sblock, fsbtodb(&sblock, cgsblock(&sblock, cylno)), SBSIZE); } - if (!hotroot) { - ckfini(1); - } else { + if (rerun) + resolved = 0; + flags = 0; + if (hotroot) { struct statfs stfs_buf; /* * Check to see if root is mounted read-write. */ if (statfs("/", &stfs_buf) == 0) flags = stfs_buf.f_flags; - else - flags = 0; - ckfini(flags & MNT_RDONLY); + if ((flags & MNT_RDONLY) == 0) + resolved = 0; } + ckfini(resolved); free(blockmap); free(statemap); free((char *)lncntp); diff --git a/sbin/fsck_ffs/pass1.c b/sbin/fsck_ffs/pass1.c index 99582777186c..181f858184bc 100644 --- a/sbin/fsck_ffs/pass1.c +++ b/sbin/fsck_ffs/pass1.c @@ -200,8 +200,10 @@ checkinode(inumber, idesc) zlnp = (struct zlncnt *)malloc(sizeof *zlnp); if (zlnp == NULL) { pfatal("LINK COUNT TABLE OVERFLOW"); - if (reply("CONTINUE") == 0) + if (reply("CONTINUE") == 0) { + ckfini(0); exit(EEXIT); + } } else { zlnp->zlncnt = inumber; zlnp->next = zlnhead; @@ -270,8 +272,10 @@ pass1check(idesc) idesc->id_number); if (preen) printf(" (SKIPPING)\n"); - else if (reply("CONTINUE") == 0) + else if (reply("CONTINUE") == 0) { + ckfini(0); exit(EEXIT); + } return (STOP); } } @@ -288,15 +292,19 @@ pass1check(idesc) idesc->id_number); if (preen) printf(" (SKIPPING)\n"); - else if (reply("CONTINUE") == 0) + else if (reply("CONTINUE") == 0) { + ckfini(0); exit(EEXIT); + } return (STOP); } new = (struct dups *)malloc(sizeof(struct dups)); if (new == NULL) { pfatal("DUP TABLE OVERFLOW."); - if (reply("CONTINUE") == 0) + if (reply("CONTINUE") == 0) { + ckfini(0); exit(EEXIT); + } return (STOP); } new->dup = blkno; diff --git a/sbin/fsck_ffs/pass2.c b/sbin/fsck_ffs/pass2.c index 445f6f1682b9..ebc33b8a650f 100644 --- a/sbin/fsck_ffs/pass2.c +++ b/sbin/fsck_ffs/pass2.c @@ -66,8 +66,10 @@ pass2() case USTATE: pfatal("ROOT INODE UNALLOCATED"); - if (reply("ALLOCATE") == 0) + if (reply("ALLOCATE") == 0) { + ckfini(0); exit(EEXIT); + } if (allocdir(ROOTINO, ROOTINO, 0755) != ROOTINO) errx(EEXIT, "CANNOT ALLOCATE ROOT INODE"); break; @@ -80,8 +82,10 @@ pass2() errx(EEXIT, "CANNOT ALLOCATE ROOT INODE"); break; } - if (reply("CONTINUE") == 0) + if (reply("CONTINUE") == 0) { + ckfini(0); exit(EEXIT); + } break; case FSTATE: @@ -93,8 +97,10 @@ pass2() errx(EEXIT, "CANNOT ALLOCATE ROOT INODE"); break; } - if (reply("FIX") == 0) + if (reply("FIX") == 0) { + ckfini(0); exit(EEXIT); + } dp = ginode(ROOTINO); dp->di_mode &= ~IFMT; dp->di_mode |= IFDIR; @@ -139,8 +145,14 @@ pass2() } } else if ((inp->i_isize & (DIRBLKSIZ - 1)) != 0) { getpathname(pathbuf, inp->i_number, inp->i_number); - pwarn("DIRECTORY %s: LENGTH %d NOT MULTIPLE OF %d", - pathbuf, inp->i_isize, DIRBLKSIZ); + if (usedsoftdep) + pfatal("%s %s: LENGTH %d NOT MULTIPLE OF %d", + "DIRECTORY", pathbuf, inp->i_isize, + DIRBLKSIZ); + else + pwarn("%s %s: LENGTH %d NOT MULTIPLE OF %d", + "DIRECTORY", pathbuf, inp->i_isize, + DIRBLKSIZ); if (preen) printf(" (ADJUSTED)\n"); inp->i_isize = roundup(inp->i_isize, DIRBLKSIZ); @@ -394,7 +406,7 @@ pass2check(idesc) break; if (statemap[dirp->d_ino] == FCLEAR) errmsg = "DUP/BAD"; - else if (!preen) + else if (!preen && !usedsoftdep) errmsg = "ZERO LENGTH DIRECTORY"; else { n = 1; @@ -423,8 +435,11 @@ pass2check(idesc) pwarn("%s %s %s\n", pathbuf, "IS AN EXTRANEOUS HARD LINK TO DIRECTORY", namebuf); - if (preen) - printf(" (IGNORED)\n"); + if (preen) { + printf(" (REMOVED)\n"); + n = 1; + break; + } else if ((n = reply("REMOVE")) == 1) break; } diff --git a/sbin/fsck_ffs/pass5.c b/sbin/fsck_ffs/pass5.c index 3dd0c1aac237..873f008b8c78 100644 --- a/sbin/fsck_ffs/pass5.c +++ b/sbin/fsck_ffs/pass5.c @@ -50,11 +50,12 @@ void pass5() { int c, blk, frags, basesize, sumsize, mapsize, savednrpos; + int inomapsize, blkmapsize; struct fs *fs = &sblock; struct cg *cg = &cgrp; ufs_daddr_t dbase, dmax; ufs_daddr_t d; - long i, j; + long i, j, k; struct csum *cs; struct csum cstotal; struct inodesc idesc[3]; @@ -112,6 +113,8 @@ pass5() sumsize = &ocg->cg_iused[0] - (u_int8_t *)(&ocg->cg_btot[0]); mapsize = &ocg->cg_free[howmany(fs->fs_fpg, NBBY)] - (u_char *)&ocg->cg_iused[0]; + blkmapsize = howmany(fs->fs_fpg, NBBY); + inomapsize = &ocg->cg_free[0] - (u_char *)&ocg->cg_iused[0]; ocg->cg_magic = CG_MAGIC; savednrpos = fs->fs_nrpos; fs->fs_nrpos = 8; @@ -126,12 +129,12 @@ pass5() fs->fs_cpg * fs->fs_nrpos * sizeof(short); newcg->cg_freeoff = newcg->cg_iusedoff + howmany(fs->fs_ipg, NBBY); - if (fs->fs_contigsumsize <= 0) { - newcg->cg_nextfreeoff = newcg->cg_freeoff + - howmany(fs->fs_cpg * fs->fs_spc / NSPF(fs), NBBY); - } else { - newcg->cg_clustersumoff = newcg->cg_freeoff + - howmany(fs->fs_cpg * fs->fs_spc / NSPF(fs), NBBY) - + inomapsize = newcg->cg_freeoff - newcg->cg_iusedoff; + newcg->cg_nextfreeoff = newcg->cg_freeoff + + howmany(fs->fs_cpg * fs->fs_spc / NSPF(fs), NBBY); + blkmapsize = newcg->cg_nextfreeoff - newcg->cg_freeoff; + if (fs->fs_contigsumsize > 0) { + newcg->cg_clustersumoff = newcg->cg_nextfreeoff - sizeof(long); newcg->cg_clustersumoff = roundup(newcg->cg_clustersumoff, sizeof(long)); @@ -148,7 +151,7 @@ pass5() break; default: - sumsize = 0; /* keep lint happy */ + inomapsize = blkmapsize = sumsize = 0; /* keep lint happy */ errx(EEXIT, "UNKNOWN ROTATIONAL TABLE FORMAT %d", fs->fs_postblformat); } @@ -299,13 +302,6 @@ pass5() cgdirty(); continue; } - if (memcmp(cg_inosused(newcg), - cg_inosused(cg), mapsize) != 0 && - dofix(&idesc[1], "BLK(S) MISSING IN BIT MAPS")) { - memmove(cg_inosused(cg), cg_inosused(newcg), - (size_t)mapsize); - cgdirty(); - } if ((memcmp(newcg, cg, basesize) != 0 || memcmp(&cg_blktot(newcg)[0], &cg_blktot(cg)[0], sumsize) != 0) && @@ -315,6 +311,40 @@ pass5() &cg_blktot(newcg)[0], (size_t)sumsize); cgdirty(); } + if (usedsoftdep) { + for (i = 0; i < inomapsize; i++) { + j = cg_inosused(newcg)[i]; + if ((cg_inosused(cg)[i] & j) == j) + continue; + for (k = 0; k < NBBY; k++) { + if ((j & (1 << k)) == 0) + continue; + if (cg_inosused(cg)[i] & (1 << k)) + continue; + pwarn("ALLOCATED INODE %d MARKED FREE", + c * fs->fs_ipg + i * 8 + k); + } + } + for (i = 0; i < blkmapsize; i++) { + j = cg_blksfree(cg)[i]; + if ((cg_blksfree(newcg)[i] & j) == j) + continue; + for (k = 0; k < NBBY; k++) { + if ((j & (1 << k)) == 0) + continue; + if (cg_inosused(cg)[i] & (1 << k)) + continue; + pwarn("ALLOCATED FRAG %d MARKED FREE", + c * fs->fs_fpg + i * 8 + k); + } + } + } + if (memcmp(cg_inosused(newcg), cg_inosused(cg), mapsize) != 0 && + dofix(&idesc[1], "BLK(S) MISSING IN BIT MAPS")) { + memmove(cg_inosused(cg), cg_inosused(newcg), + (size_t)mapsize); + cgdirty(); + } } if (fs->fs_postblformat == FS_42POSTBLFMT) fs->fs_nrpos = savednrpos; diff --git a/sbin/fsck_ffs/setup.c b/sbin/fsck_ffs/setup.c index 28e7e4b4a46e..f464b63e53ba 100644 --- a/sbin/fsck_ffs/setup.c +++ b/sbin/fsck_ffs/setup.c @@ -255,8 +255,10 @@ setup(dev) fsbtodb(&sblock, sblock.fs_csaddr + j * sblock.fs_frag), size) != 0 && !asked) { pfatal("BAD SUMMARY INFORMATION"); - if (reply("CONTINUE") == 0) + if (reply("CONTINUE") == 0) { + ckfini(0); exit(EEXIT); + } asked++; } } @@ -311,6 +313,10 @@ setup(dev) goto badsb; } bufinit(); + if (sblock.fs_flags & FS_DOSOFTDEP) + usedsoftdep = 1; + else + usedsoftdep = 0; return (1); badsb: diff --git a/sbin/fsck_ffs/utilities.c b/sbin/fsck_ffs/utilities.c index 30c31cfeedb1..465fb3be3884 100644 --- a/sbin/fsck_ffs/utilities.c +++ b/sbin/fsck_ffs/utilities.c @@ -87,6 +87,7 @@ reply(question) printf("\n"); if (!persevere && (nflag || fswritefd < 0)) { printf("%s? no\n\n", question); + resolved = 0; return (0); } if (yflag || (persevere && nflag)) { @@ -97,13 +98,17 @@ reply(question) printf("%s? [yn] ", question); (void) fflush(stdout); c = getc(stdin); - while (c != '\n' && getc(stdin) != '\n') - if (feof(stdin)) + while (c != '\n' && getc(stdin) != '\n') { + if (feof(stdin)) { + resolved = 0; return (0); + } + } } while (c != 'y' && c != 'Y' && c != 'n' && c != 'N'); printf("\n"); if (c == 'y' || c == 'Y') return (1); + resolved = 0; return (0); } @@ -360,7 +365,8 @@ ufs_daddr_t allocblk(frags) long frags; { - register int i, j, k; + int i, j, k, cg, baseblk; + struct cg *cgp = &cgrp; if (frags <= 0 || frags > sblock.fs_frag) return (0); @@ -375,9 +381,21 @@ allocblk(frags) j += k; continue; } - for (k = 0; k < frags; k++) + cg = dtog(&sblock, i + j); + getblk(&cgblk, cgtod(&sblock, cg), sblock.fs_cgsize); + if (!cg_chkmagic(cgp)) + pfatal("CG %d: BAD MAGIC NUMBER\n", cg); + baseblk = dtogd(&sblock, i + j); + for (k = 0; k < frags; k++) { setbmap(i + j + k); + clrbit(cg_blksfree(cgp), baseblk + k); + } n_blks += frags; + if (frags == sblock.fs_frag) + cgp->cg_cs.cs_nbfree--; + else + cgp->cg_cs.cs_nffree -= frags; + cgdirty(); return (i + j); } } @@ -545,7 +563,8 @@ dofix(idesc, msg) /* * An unexpected inconsistency occured. - * Die if preening, otherwise just print message and continue. + * Die if preening or filesystem is running with soft dependency protocol, + * otherwise just print message and continue. */ void #if __STDC__ @@ -565,19 +584,23 @@ pfatal(fmt, va_alist) if (!preen) { (void)vfprintf(stderr, fmt, ap); va_end(ap); + if (usedsoftdep) + (void)fprintf(stderr, + "\nUNEXPECTED SOFTDEP INCONSISTENCY\n"); return; } (void)fprintf(stderr, "%s: ", cdevname); (void)vfprintf(stderr, fmt, ap); (void)fprintf(stderr, - "\n%s: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY.\n", - cdevname); + "\n%s: UNEXPECTED%sINCONSISTENCY; RUN fsck MANUALLY.\n", + cdevname, usedsoftdep ? " SOFTDEP " : " "); + ckfini(0); exit(EEXIT); } /* - * Pwarn just prints a message when not preening, - * or a warning (preceded by filename) when preening. + * Pwarn just prints a message when not preening or running soft dependency + * protocol, or a warning (preceded by filename) when preening. */ void #if __STDC__ diff --git a/sbin/fsck_ifs/dir.c b/sbin/fsck_ifs/dir.c index 4b6999b013a9..6ab67d33ca32 100644 --- a/sbin/fsck_ifs/dir.c +++ b/sbin/fsck_ifs/dir.c @@ -315,12 +315,13 @@ adjust(idesc, lcnt) pinode(idesc->id_number); printf(" COUNT %d SHOULD BE %d", dp->di_nlink, dp->di_nlink - lcnt); - if (preen) { + if (preen || usedsoftdep) { if (lcnt < 0) { printf("\n"); pfatal("LINK COUNT INCREASING"); } - printf(" (ADJUSTED)\n"); + if (preen) + printf(" (ADJUSTED)\n"); } if (preen || reply("ADJUST") == 1) { dp->di_nlink -= lcnt; @@ -406,13 +407,15 @@ linkup(orphan, parentdir) lostdir = (dp->di_mode & IFMT) == IFDIR; pwarn("UNREF %s ", lostdir ? "DIR" : "FILE"); pinode(orphan); - if (preen && dp->di_size == 0) + if ((preen || usedsoftdep) && dp->di_size == 0) return (0); if (preen) printf(" (RECONNECTED)\n"); else if (reply("RECONNECT") == 0) return (0); + if (parentdir != 0) + lncntp[parentdir]++; if (lfdir == 0) { dp = ginode(ROOTINO); idesc.id_name = lfname; diff --git a/sbin/fsck_ifs/fsck.h b/sbin/fsck_ifs/fsck.h index 1967691e989c..4e0271d815dd 100644 --- a/sbin/fsck_ifs/fsck.h +++ b/sbin/fsck_ifs/fsck.h @@ -176,6 +176,8 @@ int cvtlevel; /* convert to newer file system format */ int doinglevel1; /* converting to new cylinder group format */ int doinglevel2; /* converting to new inode format */ int newinofmt; /* filesystem has new inode format */ +char usedsoftdep; /* just fix soft dependency inconsistencies */ +char resolved; /* cleared if unresolved changes => not clean */ char preen; /* just fix normal inconsistencies */ char hotroot; /* checking root device */ char havesb; /* superblock has been read */ diff --git a/sbin/fsck_ifs/inode.c b/sbin/fsck_ifs/inode.c index 429dd3b2267b..74561c8eb1b4 100644 --- a/sbin/fsck_ifs/inode.c +++ b/sbin/fsck_ifs/inode.c @@ -559,6 +559,8 @@ allocino(request, type) { register ino_t ino; register struct dinode *dp; + struct cg *cgp = &cgrp; + int cg; if (request == 0) request = ROOTINO; @@ -569,9 +571,16 @@ allocino(request, type) break; if (ino == maxino) return (0); + cg = ino_to_cg(&sblock, ino); + getblk(&cgblk, cgtod(&sblock, cg), sblock.fs_cgsize); + if (!cg_chkmagic(cgp)) + pfatal("CG %d: BAD MAGIC NUMBER\n", cg); + setbit(cg_inosused(cgp), ino % sblock.fs_ipg); + cgp->cg_cs.cs_nifree--; switch (type & IFMT) { case IFDIR: statemap[ino] = DSTATE; + cgp->cg_cs.cs_ndir++; break; case IFREG: case IFLNK: @@ -580,12 +589,14 @@ allocino(request, type) default: return (0); } + cgdirty(); dp = ginode(ino); dp->di_db[0] = allocblk((long)1); if (dp->di_db[0] == 0) { statemap[ino] = USTATE; return (0); } + dp->di_flags = 0; dp->di_mode = type; dp->di_atime = time(NULL); dp->di_mtime = dp->di_ctime = dp->di_atime; diff --git a/sbin/fsck_ifs/main.c b/sbin/fsck_ifs/main.c index dcb7006125cb..b4bc2c9caaaf 100644 --- a/sbin/fsck_ifs/main.c +++ b/sbin/fsck_ifs/main.c @@ -42,7 +42,7 @@ static const char copyright[] = static char sccsid[] = "@(#)main.c 8.6 (Berkeley) 5/14/95"; #endif static const char rcsid[] = - "$Id$"; + "$Id: main.c,v 1.12 1997/12/20 22:24:32 bde Exp $"; #endif /* not lint */ #include @@ -209,6 +209,11 @@ checkfilesys(filesys, mntpt, auxdata, child) return (0); } + /* + * Cleared if any questions answered no. Used to decide if + * the superblock should be marked clean. + */ + resolved = 1; /* * 1: scan inodes tallying blocks used */ @@ -224,7 +229,7 @@ checkfilesys(filesys, mntpt, auxdata, child) * 1b: locate first references to duplicates, if any */ if (duplist) { - if (preen) + if (preen || usedsoftdep) pfatal("INTERNAL ERROR: dups with -p"); printf("** Phase 1b - Rescan For More DUPS\n"); pass1b(); @@ -306,19 +311,20 @@ checkfilesys(filesys, mntpt, auxdata, child) bwrite(fswritefd, (char *)&sblock, fsbtodb(&sblock, cgsblock(&sblock, cylno)), SBSIZE); } - if (!hotroot) { - ckfini(1); - } else { + if (rerun) + resolved = 0; + flags = 0; + if (hotroot) { struct statfs stfs_buf; /* * Check to see if root is mounted read-write. */ if (statfs("/", &stfs_buf) == 0) flags = stfs_buf.f_flags; - else - flags = 0; - ckfini(flags & MNT_RDONLY); + if ((flags & MNT_RDONLY) == 0) + resolved = 0; } + ckfini(resolved); free(blockmap); free(statemap); free((char *)lncntp); diff --git a/sbin/fsck_ifs/pass1.c b/sbin/fsck_ifs/pass1.c index 99582777186c..181f858184bc 100644 --- a/sbin/fsck_ifs/pass1.c +++ b/sbin/fsck_ifs/pass1.c @@ -200,8 +200,10 @@ checkinode(inumber, idesc) zlnp = (struct zlncnt *)malloc(sizeof *zlnp); if (zlnp == NULL) { pfatal("LINK COUNT TABLE OVERFLOW"); - if (reply("CONTINUE") == 0) + if (reply("CONTINUE") == 0) { + ckfini(0); exit(EEXIT); + } } else { zlnp->zlncnt = inumber; zlnp->next = zlnhead; @@ -270,8 +272,10 @@ pass1check(idesc) idesc->id_number); if (preen) printf(" (SKIPPING)\n"); - else if (reply("CONTINUE") == 0) + else if (reply("CONTINUE") == 0) { + ckfini(0); exit(EEXIT); + } return (STOP); } } @@ -288,15 +292,19 @@ pass1check(idesc) idesc->id_number); if (preen) printf(" (SKIPPING)\n"); - else if (reply("CONTINUE") == 0) + else if (reply("CONTINUE") == 0) { + ckfini(0); exit(EEXIT); + } return (STOP); } new = (struct dups *)malloc(sizeof(struct dups)); if (new == NULL) { pfatal("DUP TABLE OVERFLOW."); - if (reply("CONTINUE") == 0) + if (reply("CONTINUE") == 0) { + ckfini(0); exit(EEXIT); + } return (STOP); } new->dup = blkno; diff --git a/sbin/fsck_ifs/pass2.c b/sbin/fsck_ifs/pass2.c index 445f6f1682b9..ebc33b8a650f 100644 --- a/sbin/fsck_ifs/pass2.c +++ b/sbin/fsck_ifs/pass2.c @@ -66,8 +66,10 @@ pass2() case USTATE: pfatal("ROOT INODE UNALLOCATED"); - if (reply("ALLOCATE") == 0) + if (reply("ALLOCATE") == 0) { + ckfini(0); exit(EEXIT); + } if (allocdir(ROOTINO, ROOTINO, 0755) != ROOTINO) errx(EEXIT, "CANNOT ALLOCATE ROOT INODE"); break; @@ -80,8 +82,10 @@ pass2() errx(EEXIT, "CANNOT ALLOCATE ROOT INODE"); break; } - if (reply("CONTINUE") == 0) + if (reply("CONTINUE") == 0) { + ckfini(0); exit(EEXIT); + } break; case FSTATE: @@ -93,8 +97,10 @@ pass2() errx(EEXIT, "CANNOT ALLOCATE ROOT INODE"); break; } - if (reply("FIX") == 0) + if (reply("FIX") == 0) { + ckfini(0); exit(EEXIT); + } dp = ginode(ROOTINO); dp->di_mode &= ~IFMT; dp->di_mode |= IFDIR; @@ -139,8 +145,14 @@ pass2() } } else if ((inp->i_isize & (DIRBLKSIZ - 1)) != 0) { getpathname(pathbuf, inp->i_number, inp->i_number); - pwarn("DIRECTORY %s: LENGTH %d NOT MULTIPLE OF %d", - pathbuf, inp->i_isize, DIRBLKSIZ); + if (usedsoftdep) + pfatal("%s %s: LENGTH %d NOT MULTIPLE OF %d", + "DIRECTORY", pathbuf, inp->i_isize, + DIRBLKSIZ); + else + pwarn("%s %s: LENGTH %d NOT MULTIPLE OF %d", + "DIRECTORY", pathbuf, inp->i_isize, + DIRBLKSIZ); if (preen) printf(" (ADJUSTED)\n"); inp->i_isize = roundup(inp->i_isize, DIRBLKSIZ); @@ -394,7 +406,7 @@ pass2check(idesc) break; if (statemap[dirp->d_ino] == FCLEAR) errmsg = "DUP/BAD"; - else if (!preen) + else if (!preen && !usedsoftdep) errmsg = "ZERO LENGTH DIRECTORY"; else { n = 1; @@ -423,8 +435,11 @@ pass2check(idesc) pwarn("%s %s %s\n", pathbuf, "IS AN EXTRANEOUS HARD LINK TO DIRECTORY", namebuf); - if (preen) - printf(" (IGNORED)\n"); + if (preen) { + printf(" (REMOVED)\n"); + n = 1; + break; + } else if ((n = reply("REMOVE")) == 1) break; } diff --git a/sbin/fsck_ifs/pass5.c b/sbin/fsck_ifs/pass5.c index 3dd0c1aac237..873f008b8c78 100644 --- a/sbin/fsck_ifs/pass5.c +++ b/sbin/fsck_ifs/pass5.c @@ -50,11 +50,12 @@ void pass5() { int c, blk, frags, basesize, sumsize, mapsize, savednrpos; + int inomapsize, blkmapsize; struct fs *fs = &sblock; struct cg *cg = &cgrp; ufs_daddr_t dbase, dmax; ufs_daddr_t d; - long i, j; + long i, j, k; struct csum *cs; struct csum cstotal; struct inodesc idesc[3]; @@ -112,6 +113,8 @@ pass5() sumsize = &ocg->cg_iused[0] - (u_int8_t *)(&ocg->cg_btot[0]); mapsize = &ocg->cg_free[howmany(fs->fs_fpg, NBBY)] - (u_char *)&ocg->cg_iused[0]; + blkmapsize = howmany(fs->fs_fpg, NBBY); + inomapsize = &ocg->cg_free[0] - (u_char *)&ocg->cg_iused[0]; ocg->cg_magic = CG_MAGIC; savednrpos = fs->fs_nrpos; fs->fs_nrpos = 8; @@ -126,12 +129,12 @@ pass5() fs->fs_cpg * fs->fs_nrpos * sizeof(short); newcg->cg_freeoff = newcg->cg_iusedoff + howmany(fs->fs_ipg, NBBY); - if (fs->fs_contigsumsize <= 0) { - newcg->cg_nextfreeoff = newcg->cg_freeoff + - howmany(fs->fs_cpg * fs->fs_spc / NSPF(fs), NBBY); - } else { - newcg->cg_clustersumoff = newcg->cg_freeoff + - howmany(fs->fs_cpg * fs->fs_spc / NSPF(fs), NBBY) - + inomapsize = newcg->cg_freeoff - newcg->cg_iusedoff; + newcg->cg_nextfreeoff = newcg->cg_freeoff + + howmany(fs->fs_cpg * fs->fs_spc / NSPF(fs), NBBY); + blkmapsize = newcg->cg_nextfreeoff - newcg->cg_freeoff; + if (fs->fs_contigsumsize > 0) { + newcg->cg_clustersumoff = newcg->cg_nextfreeoff - sizeof(long); newcg->cg_clustersumoff = roundup(newcg->cg_clustersumoff, sizeof(long)); @@ -148,7 +151,7 @@ pass5() break; default: - sumsize = 0; /* keep lint happy */ + inomapsize = blkmapsize = sumsize = 0; /* keep lint happy */ errx(EEXIT, "UNKNOWN ROTATIONAL TABLE FORMAT %d", fs->fs_postblformat); } @@ -299,13 +302,6 @@ pass5() cgdirty(); continue; } - if (memcmp(cg_inosused(newcg), - cg_inosused(cg), mapsize) != 0 && - dofix(&idesc[1], "BLK(S) MISSING IN BIT MAPS")) { - memmove(cg_inosused(cg), cg_inosused(newcg), - (size_t)mapsize); - cgdirty(); - } if ((memcmp(newcg, cg, basesize) != 0 || memcmp(&cg_blktot(newcg)[0], &cg_blktot(cg)[0], sumsize) != 0) && @@ -315,6 +311,40 @@ pass5() &cg_blktot(newcg)[0], (size_t)sumsize); cgdirty(); } + if (usedsoftdep) { + for (i = 0; i < inomapsize; i++) { + j = cg_inosused(newcg)[i]; + if ((cg_inosused(cg)[i] & j) == j) + continue; + for (k = 0; k < NBBY; k++) { + if ((j & (1 << k)) == 0) + continue; + if (cg_inosused(cg)[i] & (1 << k)) + continue; + pwarn("ALLOCATED INODE %d MARKED FREE", + c * fs->fs_ipg + i * 8 + k); + } + } + for (i = 0; i < blkmapsize; i++) { + j = cg_blksfree(cg)[i]; + if ((cg_blksfree(newcg)[i] & j) == j) + continue; + for (k = 0; k < NBBY; k++) { + if ((j & (1 << k)) == 0) + continue; + if (cg_inosused(cg)[i] & (1 << k)) + continue; + pwarn("ALLOCATED FRAG %d MARKED FREE", + c * fs->fs_fpg + i * 8 + k); + } + } + } + if (memcmp(cg_inosused(newcg), cg_inosused(cg), mapsize) != 0 && + dofix(&idesc[1], "BLK(S) MISSING IN BIT MAPS")) { + memmove(cg_inosused(cg), cg_inosused(newcg), + (size_t)mapsize); + cgdirty(); + } } if (fs->fs_postblformat == FS_42POSTBLFMT) fs->fs_nrpos = savednrpos; diff --git a/sbin/fsck_ifs/setup.c b/sbin/fsck_ifs/setup.c index 28e7e4b4a46e..f464b63e53ba 100644 --- a/sbin/fsck_ifs/setup.c +++ b/sbin/fsck_ifs/setup.c @@ -255,8 +255,10 @@ setup(dev) fsbtodb(&sblock, sblock.fs_csaddr + j * sblock.fs_frag), size) != 0 && !asked) { pfatal("BAD SUMMARY INFORMATION"); - if (reply("CONTINUE") == 0) + if (reply("CONTINUE") == 0) { + ckfini(0); exit(EEXIT); + } asked++; } } @@ -311,6 +313,10 @@ setup(dev) goto badsb; } bufinit(); + if (sblock.fs_flags & FS_DOSOFTDEP) + usedsoftdep = 1; + else + usedsoftdep = 0; return (1); badsb: diff --git a/sbin/fsck_ifs/utilities.c b/sbin/fsck_ifs/utilities.c index 30c31cfeedb1..465fb3be3884 100644 --- a/sbin/fsck_ifs/utilities.c +++ b/sbin/fsck_ifs/utilities.c @@ -87,6 +87,7 @@ reply(question) printf("\n"); if (!persevere && (nflag || fswritefd < 0)) { printf("%s? no\n\n", question); + resolved = 0; return (0); } if (yflag || (persevere && nflag)) { @@ -97,13 +98,17 @@ reply(question) printf("%s? [yn] ", question); (void) fflush(stdout); c = getc(stdin); - while (c != '\n' && getc(stdin) != '\n') - if (feof(stdin)) + while (c != '\n' && getc(stdin) != '\n') { + if (feof(stdin)) { + resolved = 0; return (0); + } + } } while (c != 'y' && c != 'Y' && c != 'n' && c != 'N'); printf("\n"); if (c == 'y' || c == 'Y') return (1); + resolved = 0; return (0); } @@ -360,7 +365,8 @@ ufs_daddr_t allocblk(frags) long frags; { - register int i, j, k; + int i, j, k, cg, baseblk; + struct cg *cgp = &cgrp; if (frags <= 0 || frags > sblock.fs_frag) return (0); @@ -375,9 +381,21 @@ allocblk(frags) j += k; continue; } - for (k = 0; k < frags; k++) + cg = dtog(&sblock, i + j); + getblk(&cgblk, cgtod(&sblock, cg), sblock.fs_cgsize); + if (!cg_chkmagic(cgp)) + pfatal("CG %d: BAD MAGIC NUMBER\n", cg); + baseblk = dtogd(&sblock, i + j); + for (k = 0; k < frags; k++) { setbmap(i + j + k); + clrbit(cg_blksfree(cgp), baseblk + k); + } n_blks += frags; + if (frags == sblock.fs_frag) + cgp->cg_cs.cs_nbfree--; + else + cgp->cg_cs.cs_nffree -= frags; + cgdirty(); return (i + j); } } @@ -545,7 +563,8 @@ dofix(idesc, msg) /* * An unexpected inconsistency occured. - * Die if preening, otherwise just print message and continue. + * Die if preening or filesystem is running with soft dependency protocol, + * otherwise just print message and continue. */ void #if __STDC__ @@ -565,19 +584,23 @@ pfatal(fmt, va_alist) if (!preen) { (void)vfprintf(stderr, fmt, ap); va_end(ap); + if (usedsoftdep) + (void)fprintf(stderr, + "\nUNEXPECTED SOFTDEP INCONSISTENCY\n"); return; } (void)fprintf(stderr, "%s: ", cdevname); (void)vfprintf(stderr, fmt, ap); (void)fprintf(stderr, - "\n%s: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY.\n", - cdevname); + "\n%s: UNEXPECTED%sINCONSISTENCY; RUN fsck MANUALLY.\n", + cdevname, usedsoftdep ? " SOFTDEP " : " "); + ckfini(0); exit(EEXIT); } /* - * Pwarn just prints a message when not preening, - * or a warning (preceded by filename) when preening. + * Pwarn just prints a message when not preening or running soft dependency + * protocol, or a warning (preceded by filename) when preening. */ void #if __STDC__ diff --git a/sbin/mount/mount.c b/sbin/mount/mount.c index 93b7e14edbdb..f4ee239b57df 100644 --- a/sbin/mount/mount.c +++ b/sbin/mount/mount.c @@ -42,7 +42,7 @@ static const char copyright[] = static char sccsid[] = "@(#)mount.c 8.25 (Berkeley) 5/8/95"; #else static const char rcsid[] = - "$Id: mount.c,v 1.21 1997/11/13 00:28:49 julian Exp $"; + "$Id: mount.c,v 1.22 1998/02/13 04:54:27 bde Exp $"; #endif #endif /* not lint */ @@ -98,6 +98,7 @@ static struct opt { { MNT_NOCLUSTERR, "noclusterr" }, { MNT_NOCLUSTERW, "noclusterw" }, { MNT_SUIDDIR, "suiddir" }, + { MNT_SOFTDEP, "soft-updates" }, { NULL } }; @@ -495,7 +496,8 @@ prmount(sfp) else (void)printf("%d", sfp->f_owner); } - (void)printf(f ? ")\n" : "\n"); + (void)printf("%swrites: sync %d async %d)\n", !f++ ? " (" : ", ", + sfp->f_syncwrites, sfp->f_asyncwrites); } struct statfs * @@ -602,6 +604,8 @@ putfsent(ent) printf(",noclusterr"); if (ent->f_flags & MNT_NOCLUSTERW) printf(",noclusterw"); + if (ent->f_flags & MNT_SUIDDIR) + printf(",suiddir"); if ((fst = getfsspec(ent->f_mntfromname))) printf("\t%u %u\n", fst->fs_freq, fst->fs_passno); diff --git a/sbin/mount_ifs/mount.c b/sbin/mount_ifs/mount.c index 93b7e14edbdb..f4ee239b57df 100644 --- a/sbin/mount_ifs/mount.c +++ b/sbin/mount_ifs/mount.c @@ -42,7 +42,7 @@ static const char copyright[] = static char sccsid[] = "@(#)mount.c 8.25 (Berkeley) 5/8/95"; #else static const char rcsid[] = - "$Id: mount.c,v 1.21 1997/11/13 00:28:49 julian Exp $"; + "$Id: mount.c,v 1.22 1998/02/13 04:54:27 bde Exp $"; #endif #endif /* not lint */ @@ -98,6 +98,7 @@ static struct opt { { MNT_NOCLUSTERR, "noclusterr" }, { MNT_NOCLUSTERW, "noclusterw" }, { MNT_SUIDDIR, "suiddir" }, + { MNT_SOFTDEP, "soft-updates" }, { NULL } }; @@ -495,7 +496,8 @@ prmount(sfp) else (void)printf("%d", sfp->f_owner); } - (void)printf(f ? ")\n" : "\n"); + (void)printf("%swrites: sync %d async %d)\n", !f++ ? " (" : ", ", + sfp->f_syncwrites, sfp->f_asyncwrites); } struct statfs * @@ -602,6 +604,8 @@ putfsent(ent) printf(",noclusterr"); if (ent->f_flags & MNT_NOCLUSTERW) printf(",noclusterw"); + if (ent->f_flags & MNT_SUIDDIR) + printf(",suiddir"); if ((fst = getfsspec(ent->f_mntfromname))) printf("\t%u %u\n", fst->fs_freq, fst->fs_passno); diff --git a/sbin/tunefs/tunefs.8 b/sbin/tunefs/tunefs.8 index df161a9de000..3d014475607d 100644 --- a/sbin/tunefs/tunefs.8 +++ b/sbin/tunefs/tunefs.8 @@ -46,6 +46,7 @@ .Op Fl m Ar minfree .Op Fl p .Bk -words +.Op Fl n Ar soft_dependency_enabling .Op Fl o Ar optimize_preference .Ek .Op Ar special | Ar filesys diff --git a/sbin/tunefs/tunefs.c b/sbin/tunefs/tunefs.c index 1802f22f5ac3..b1df99b324c9 100644 --- a/sbin/tunefs/tunefs.c +++ b/sbin/tunefs/tunefs.c @@ -81,7 +81,7 @@ main(argc, argv) int argc; char *argv[]; { - char *cp, *special, *name; + char *cp, *special, *name, *action; struct stat st; int i; int Aflag = 0; @@ -182,6 +182,24 @@ main(argc, argv) warnx(OPTWARN, "space", "<", MINFREE); continue; + case 'n': + name = "soft updates"; + if (argc < 1) + errx(10, "-s: missing %s", name); + argc--, argv++; + if (strcmp(*argv, "enable") == 0) { + sblock.fs_flags |= FS_DOSOFTDEP; + action = "set"; + } else if (strcmp(*argv, "disable") == 0) { + sblock.fs_flags &= ~FS_DOSOFTDEP; + action = "cleared"; + } else { + errx(10, "bad %s (options are %s)", + name, "`enable' or `disable'"); + } + warnx("%s %s", name, action); + continue; + case 'o': name = "optimization preference"; if (argc < 1) @@ -237,6 +255,7 @@ usage() fprintf(stderr, "\t-d rotational delay between contiguous blocks\n"); fprintf(stderr, "\t-e maximum blocks per file in a cylinder group\n"); fprintf(stderr, "\t-m minimum percentage of free space\n"); + fprintf(stderr, "\t-n soft updates (`enable' or `disable')\n"); fprintf(stderr, "\t-o optimization preference (`space' or `time')\n"); fprintf(stderr, "\t-p no change - just prints current tuneable settings\n"); exit(2); @@ -261,6 +280,8 @@ getsb(fs, file) void printfs() { + warnx("soft updates: (-n) %s", + (sblock.fs_flags & FS_DOSOFTDEP)? "enabled" : "disabled"); warnx("maximum contiguous block count: (-a) %d", sblock.fs_maxcontig); warnx("rotational delay between contiguous blocks: (-d) %d ms", diff --git a/sys/conf/NOTES b/sys/conf/NOTES index 357fdf410d59..cc44c5e04b74 100644 --- a/sys/conf/NOTES +++ b/sys/conf/NOTES @@ -2,7 +2,7 @@ # LINT -- config file for checking all the sources, tries to pull in # as much of the source tree as it can. # -# $Id: LINT,v 1.412 1998/02/24 22:24:46 phk Exp $ +# $Id: LINT,v 1.413 1998/02/27 10:02:41 itojun Exp $ # # NB: You probably don't want to try running a kernel built from this # file. Instead, you should start from GENERIC, and add options from @@ -466,6 +466,13 @@ options NFS_ROOT #NFS usable as root device # This DEVFS is experimental but seems to work options DEVFS #devices filesystem +# Allow the FFS to use Softupdates technology. +# To do this you need to fetch the two files +# /sys/ufs/ffs/softdep.h and /sys/ufs/ffs/ffs_softdep.c +# from freebsd.org and understand the licensing restrictions. +#options SOFTUPDATES +# (we can't actually enable it because the files may not be present) + # Make space in the kernel for a MFS root filesystem. Define to the number # of kilobytes to reserve for the filesystem. options MFS_ROOT=10 diff --git a/sys/conf/files b/sys/conf/files index 36d1b23b59c8..108d5c853390 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -416,6 +416,8 @@ ufs/ffs/ffs_balloc.c optional ffs ufs/ffs/ffs_balloc.c optional mfs ufs/ffs/ffs_inode.c optional ffs ufs/ffs/ffs_inode.c optional mfs +ufs/ffs/ffs_softdep_stub.c optional ffs +ufs/ffs/ffs_softdep.c optional softupdates ufs/ffs/ffs_subr.c optional ffs ufs/ffs/ffs_subr.c optional mfs ufs/ffs/ffs_tables.c optional ffs diff --git a/sys/conf/options b/sys/conf/options index 48a5c3355a01..161be3aec136 100644 --- a/sys/conf/options +++ b/sys/conf/options @@ -1,4 +1,4 @@ -# $Id: options,v 1.63 1998/02/27 10:02:37 itojun Exp $ +# $Id: options,v 1.64 1998/03/04 10:24:08 dufault Exp $ # Format: # Option name filename @@ -59,6 +59,12 @@ CD9660 FFS NFS +# If you are following the conditions in the copyright, +# you can enable soft-updates which will speed up a lot of thigs +# and make the system safer from crashes at the same time. +# otherwise a STUB module will be compiled in. +SOFTUPDATES opt_ffs.h + # The above static dependencies are planned removed, with a # _ROOT option to control if it usable as root. This list # allows these options to be present in config files already (though diff --git a/sys/dev/de/if_de.c b/sys/dev/de/if_de.c index 1cfaeba29de5..e764252d396b 100644 --- a/sys/dev/de/if_de.c +++ b/sys/dev/de/if_de.c @@ -1,5 +1,7 @@ +#undef __FreeBSD__ +#define __FreeBSD__ 3 /* $NetBSD: if_de.c,v 1.56 1997/10/20 14:32:46 matt Exp $ */ -/* $Id: if_de.c,v 1.79 1998/02/06 12:14:08 eivind Exp $ */ +/* $Id: if_de.c,v 1.80 1998/02/20 13:11:50 bde Exp $ */ /*- * Copyright (c) 1994-1997 Matt Thomas (matt@3am-software.com) diff --git a/sys/fs/cd9660/cd9660_vfsops.c b/sys/fs/cd9660/cd9660_vfsops.c index a9218c3a2b7d..1431b19f0819 100644 --- a/sys/fs/cd9660/cd9660_vfsops.c +++ b/sys/fs/cd9660/cd9660_vfsops.c @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)cd9660_vfsops.c 8.18 (Berkeley) 5/22/95 - * $Id: cd9660_vfsops.c,v 1.33 1997/12/21 21:40:02 joerg Exp $ + * $Id: cd9660_vfsops.c,v 1.34 1998/03/01 22:46:00 msmith Exp $ */ #include @@ -392,7 +392,7 @@ iso_mountfs(devvp, mp, p, argp) isomp->im_dev = dev; isomp->im_devvp = devvp; - devvp->v_specflags |= SI_MOUNTEDON; + devvp->v_specmountpoint = mp; /* Check the Rock Ridge Extention support */ if (!(argp->flags & ISOFSMNT_NORRIP)) { @@ -438,7 +438,7 @@ iso_mountfs(devvp, mp, p, argp) return 0; out: - devvp->v_specflags &= ~SI_MOUNTEDON; + devvp->v_specmountpoint = NULL; if (bp) brelse(bp); if (needclose) @@ -489,7 +489,7 @@ cd9660_unmount(mp, mntflags, p) isomp = VFSTOISOFS(mp); - isomp->im_devvp->v_specflags &= ~SI_MOUNTEDON; + isomp->im_devvp->v_specmountpoint = NULL; error = VOP_CLOSE(isomp->im_devvp, FREAD, NOCRED, p); vrele(isomp->im_devvp); free((caddr_t)isomp, M_ISOFSMNT); @@ -561,7 +561,8 @@ cd9660_statfs(mp, sbp, p) bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN); } /* Use the first spare for flags: */ - sbp->f_spare[0] = isomp->im_flags; + /* Don't do this!!! XXX */ + /* sbp->f_spare[0] = isomp->im_flags; */ return 0; } diff --git a/sys/fs/msdosfs/msdosfs_vfsops.c b/sys/fs/msdosfs/msdosfs_vfsops.c index 2b1d1d7f352f..0af5438dae63 100644 --- a/sys/fs/msdosfs/msdosfs_vfsops.c +++ b/sys/fs/msdosfs/msdosfs_vfsops.c @@ -1,4 +1,4 @@ -/* $Id: msdosfs_vfsops.c,v 1.28 1998/02/23 16:44:32 ache Exp $ */ +/* $Id: msdosfs_vfsops.c,v 1.29 1998/03/01 22:46:27 msmith Exp $ */ /* $NetBSD: msdosfs_vfsops.c,v 1.51 1997/11/17 15:36:58 ws Exp $ */ /*- @@ -772,7 +772,7 @@ mountmsdosfs(devvp, mp, p, argp) mp->mnt_stat.f_fsid.val[0] = (long)dev; mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; mp->mnt_flag |= MNT_LOCAL; - devvp->v_specflags |= SI_MOUNTEDON; + devvp->v_specmountpoint = mp; return 0; @@ -818,7 +818,7 @@ msdosfs_unmount(mp, mntflags, p) if (error) return error; pmp = VFSTOMSDOSFS(mp); - pmp->pm_devvp->v_specflags &= ~SI_MOUNTEDON; + pmp->pm_devvp->v_specmountpoint = NULL; #ifdef MSDOSFS_DEBUG { struct vnode *vp = pmp->pm_devvp; @@ -841,8 +841,9 @@ msdosfs_unmount(mp, mntflags, p) ((u_int *)vp->v_data)[1]); } #endif - error = VOP_CLOSE(pmp->pm_devvp, (pmp->pm_flags&MSDOSFSMNT_RONLY) ? FREAD : FREAD | FWRITE, - NOCRED, p); + error = VOP_CLOSE(pmp->pm_devvp, + (pmp->pm_flags&MSDOSFSMNT_RONLY) ? FREAD : FREAD | FWRITE, + NOCRED, p); vrele(pmp->pm_devvp); free(pmp->pm_inusemap, M_MSDOSFSFAT); free(pmp, M_MSDOSFSMNT); @@ -946,9 +947,11 @@ msdosfs_sync(mp, waitfor, cred, p) simple_lock(&vp->v_interlock); nvp = vp->v_mntvnodes.le_next; dep = VTODE(vp); - if (vp->v_type == VNON || ((dep->de_flag & - (DE_ACCESS | DE_CREATE | DE_UPDATE | DE_MODIFIED)) == 0) - && vp->v_dirtyblkhd.lh_first == NULL) { + if (vp->v_type == VNON + || (waitfor == MNT_LAZY) /* can this happen with msdosfs? */ + || (((dep->de_flag & + (DE_ACCESS | DE_CREATE | DE_UPDATE | DE_MODIFIED)) == 0) + && (vp->v_dirtyblkhd.lh_first == NULL))) { simple_unlock(&vp->v_interlock); continue; } diff --git a/sys/fs/specfs/spec_vnops.c b/sys/fs/specfs/spec_vnops.c index 9c3c8450867d..666322f65609 100644 --- a/sys/fs/specfs/spec_vnops.c +++ b/sys/fs/specfs/spec_vnops.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)spec_vnops.c 8.14 (Berkeley) 5/21/95 - * $Id: spec_vnops.c,v 1.58 1998/03/07 21:35:52 dyson Exp $ + * $Id: spec_vnops.c,v 1.59 1998/03/08 08:46:18 dyson Exp $ */ #include @@ -548,8 +548,12 @@ spec_strategy(ap) struct buf *a_bp; } */ *ap; { + struct buf *bp; - (*bdevsw[major(ap->a_bp->b_dev)]->d_strategy)(ap->a_bp); + bp = ap->a_bp; + if ((LIST_FIRST(&bp->b_dep)) != NULL && bioops.io_start) + (*bioops.io_start)(bp); + (*bdevsw[major(bp->b_dev)]->d_strategy)(bp); return (0); } @@ -633,7 +637,9 @@ spec_close(ap) * we must invalidate any in core blocks, so that * we can, for instance, change floppy disks. */ + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_p); error = vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0); + VOP_UNLOCK(vp, 0, ap->a_p); if (error) return (error); diff --git a/sys/gnu/ext2fs/inode.h b/sys/gnu/ext2fs/inode.h index f2fd0f25fa5e..4bd1cf5d7de1 100644 --- a/sys/gnu/ext2fs/inode.h +++ b/sys/gnu/ext2fs/inode.h @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)inode.h 8.9 (Berkeley) 5/14/95 - * $Id: inode.h,v 1.19 1997/12/05 13:43:47 jkh Exp $ + * $Id: inode.h,v 1.20 1998/01/30 11:34:02 phk Exp $ */ #ifndef _UFS_UFS_INODE_H_ @@ -45,6 +45,11 @@ #include #include +/* + * The size of a logical block number. + */ +typedef long ufs_lbn_t; + /* * This must agree with the definition in . */ @@ -67,6 +72,7 @@ struct inode { u_int32_t i_flag; /* flags, see below */ dev_t i_dev; /* Device associated with the inode. */ ino_t i_number; /* The identity of the inode. */ + int i_effnlink; /* i_nlink when I/O completes */ union { /* Associated filesystem. */ struct fs *fs; /* FFS */ @@ -160,6 +166,9 @@ struct indir { } \ } +/* Determine if soft dependencies are being done */ +#define DOINGSOFTDEP(vp) ((vp)->v_mount->mnt_flag & MNT_SOFTDEP) + /* This overlays the fid structure (see mount.h). */ struct ufid { u_int16_t ufid_len; /* Length of structure. */ diff --git a/sys/gnu/fs/ext2fs/inode.h b/sys/gnu/fs/ext2fs/inode.h index f2fd0f25fa5e..4bd1cf5d7de1 100644 --- a/sys/gnu/fs/ext2fs/inode.h +++ b/sys/gnu/fs/ext2fs/inode.h @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)inode.h 8.9 (Berkeley) 5/14/95 - * $Id: inode.h,v 1.19 1997/12/05 13:43:47 jkh Exp $ + * $Id: inode.h,v 1.20 1998/01/30 11:34:02 phk Exp $ */ #ifndef _UFS_UFS_INODE_H_ @@ -45,6 +45,11 @@ #include #include +/* + * The size of a logical block number. + */ +typedef long ufs_lbn_t; + /* * This must agree with the definition in . */ @@ -67,6 +72,7 @@ struct inode { u_int32_t i_flag; /* flags, see below */ dev_t i_dev; /* Device associated with the inode. */ ino_t i_number; /* The identity of the inode. */ + int i_effnlink; /* i_nlink when I/O completes */ union { /* Associated filesystem. */ struct fs *fs; /* FFS */ @@ -160,6 +166,9 @@ struct indir { } \ } +/* Determine if soft dependencies are being done */ +#define DOINGSOFTDEP(vp) ((vp)->v_mount->mnt_flag & MNT_SOFTDEP) + /* This overlays the fid structure (see mount.h). */ struct ufid { u_int16_t ufid_len; /* Length of structure. */ diff --git a/sys/i386/conf/LINT b/sys/i386/conf/LINT index 357fdf410d59..cc44c5e04b74 100644 --- a/sys/i386/conf/LINT +++ b/sys/i386/conf/LINT @@ -2,7 +2,7 @@ # LINT -- config file for checking all the sources, tries to pull in # as much of the source tree as it can. # -# $Id: LINT,v 1.412 1998/02/24 22:24:46 phk Exp $ +# $Id: LINT,v 1.413 1998/02/27 10:02:41 itojun Exp $ # # NB: You probably don't want to try running a kernel built from this # file. Instead, you should start from GENERIC, and add options from @@ -466,6 +466,13 @@ options NFS_ROOT #NFS usable as root device # This DEVFS is experimental but seems to work options DEVFS #devices filesystem +# Allow the FFS to use Softupdates technology. +# To do this you need to fetch the two files +# /sys/ufs/ffs/softdep.h and /sys/ufs/ffs/ffs_softdep.c +# from freebsd.org and understand the licensing restrictions. +#options SOFTUPDATES +# (we can't actually enable it because the files may not be present) + # Make space in the kernel for a MFS root filesystem. Define to the number # of kilobytes to reserve for the filesystem. options MFS_ROOT=10 diff --git a/sys/i386/conf/NOTES b/sys/i386/conf/NOTES index 357fdf410d59..cc44c5e04b74 100644 --- a/sys/i386/conf/NOTES +++ b/sys/i386/conf/NOTES @@ -2,7 +2,7 @@ # LINT -- config file for checking all the sources, tries to pull in # as much of the source tree as it can. # -# $Id: LINT,v 1.412 1998/02/24 22:24:46 phk Exp $ +# $Id: LINT,v 1.413 1998/02/27 10:02:41 itojun Exp $ # # NB: You probably don't want to try running a kernel built from this # file. Instead, you should start from GENERIC, and add options from @@ -466,6 +466,13 @@ options NFS_ROOT #NFS usable as root device # This DEVFS is experimental but seems to work options DEVFS #devices filesystem +# Allow the FFS to use Softupdates technology. +# To do this you need to fetch the two files +# /sys/ufs/ffs/softdep.h and /sys/ufs/ffs/ffs_softdep.c +# from freebsd.org and understand the licensing restrictions. +#options SOFTUPDATES +# (we can't actually enable it because the files may not be present) + # Make space in the kernel for a MFS root filesystem. Define to the number # of kilobytes to reserve for the filesystem. options MFS_ROOT=10 diff --git a/sys/isofs/cd9660/cd9660_vfsops.c b/sys/isofs/cd9660/cd9660_vfsops.c index a9218c3a2b7d..1431b19f0819 100644 --- a/sys/isofs/cd9660/cd9660_vfsops.c +++ b/sys/isofs/cd9660/cd9660_vfsops.c @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)cd9660_vfsops.c 8.18 (Berkeley) 5/22/95 - * $Id: cd9660_vfsops.c,v 1.33 1997/12/21 21:40:02 joerg Exp $ + * $Id: cd9660_vfsops.c,v 1.34 1998/03/01 22:46:00 msmith Exp $ */ #include @@ -392,7 +392,7 @@ iso_mountfs(devvp, mp, p, argp) isomp->im_dev = dev; isomp->im_devvp = devvp; - devvp->v_specflags |= SI_MOUNTEDON; + devvp->v_specmountpoint = mp; /* Check the Rock Ridge Extention support */ if (!(argp->flags & ISOFSMNT_NORRIP)) { @@ -438,7 +438,7 @@ iso_mountfs(devvp, mp, p, argp) return 0; out: - devvp->v_specflags &= ~SI_MOUNTEDON; + devvp->v_specmountpoint = NULL; if (bp) brelse(bp); if (needclose) @@ -489,7 +489,7 @@ cd9660_unmount(mp, mntflags, p) isomp = VFSTOISOFS(mp); - isomp->im_devvp->v_specflags &= ~SI_MOUNTEDON; + isomp->im_devvp->v_specmountpoint = NULL; error = VOP_CLOSE(isomp->im_devvp, FREAD, NOCRED, p); vrele(isomp->im_devvp); free((caddr_t)isomp, M_ISOFSMNT); @@ -561,7 +561,8 @@ cd9660_statfs(mp, sbp, p) bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN); } /* Use the first spare for flags: */ - sbp->f_spare[0] = isomp->im_flags; + /* Don't do this!!! XXX */ + /* sbp->f_spare[0] = isomp->im_flags; */ return 0; } diff --git a/sys/kern/kern_malloc.c b/sys/kern/kern_malloc.c index a51177c050b2..7d0746f860d5 100644 --- a/sys/kern/kern_malloc.c +++ b/sys/kern/kern_malloc.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)kern_malloc.c 8.3 (Berkeley) 1/4/94 - * $Id: kern_malloc.c,v 1.43 1998/02/09 06:09:22 eivind Exp $ + * $Id: kern_malloc.c,v 1.44 1998/02/23 07:41:23 dyson Exp $ */ #include "opt_vm.h" @@ -128,7 +128,7 @@ malloc(size, type, flags) indx = BUCKETINDX(size); kbp = &bucket[indx]; - s = splhigh(); + s = splmem(); while (ksp->ks_memuse >= ksp->ks_limit) { if (flags & M_NOWAIT) { splx(s); @@ -268,7 +268,7 @@ free(addr, type) kup = btokup(addr); size = 1 << kup->ku_indx; kbp = &bucket[kup->ku_indx]; - s = splhigh(); + s = splmem(); #ifdef DIAGNOSTIC /* * Check for returns of data that do not point to the diff --git a/sys/kern/kern_shutdown.c b/sys/kern/kern_shutdown.c index c83dd75df127..c6dd9c17bbda 100644 --- a/sys/kern/kern_shutdown.c +++ b/sys/kern/kern_shutdown.c @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)kern_shutdown.c 8.3 (Berkeley) 1/21/94 - * $Id: kern_shutdown.c,v 1.27 1997/11/25 07:07:43 julian Exp $ + * $Id: kern_shutdown.c,v 1.28 1998/02/16 23:57:44 eivind Exp $ */ #include "opt_ddb.h" @@ -217,17 +217,27 @@ boot(howto) sync(&proc0, NULL); + /* + * With soft updates, some buffers that are + * written will be remarked as dirty until other + * buffers are written. + */ for (iter = 0; iter < 20; iter++) { nbusy = 0; for (bp = &buf[nbuf]; --bp >= buf; ) { if ((bp->b_flags & (B_BUSY | B_INVAL)) == B_BUSY) { nbusy++; + } else if ((bp->b_flags & (B_DELWRI | B_INVAL)) + == B_DELWRI) { + /* bawrite(bp);*/ + nbusy++; } } if (nbusy == 0) break; printf("%d ", nbusy); - DELAY(40000 * iter); + sync(&proc0, NULL); + DELAY(50000 * iter); } if (nbusy) { /* diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c index bb370ac4fa66..4fdc5bde1d0b 100644 --- a/sys/kern/kern_synch.c +++ b/sys/kern/kern_synch.c @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)kern_synch.c 8.9 (Berkeley) 5/19/95 - * $Id: kern_synch.c,v 1.47 1998/02/25 06:04:46 bde Exp $ + * $Id: kern_synch.c,v 1.48 1998/03/04 10:25:55 dufault Exp $ */ #include "opt_ktrace.h" @@ -230,7 +230,6 @@ schedcpu(arg) register int s; register unsigned int newcpu; - wakeup((caddr_t)&lbolt); for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) { /* * Increment time in/out of memory and sleep time @@ -282,6 +281,7 @@ schedcpu(arg) splx(s); } vmmeter(); + wakeup((caddr_t)&lbolt); timeout(schedcpu, (void *)0, hz); } diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index 4c09e1dcb5a5..114e035a0d13 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -11,7 +11,7 @@ * 2. Absolutely no warranty of function or purpose is made by the author * John S. Dyson. * - * $Id: vfs_bio.c,v 1.153 1998/03/04 03:17:30 dyson Exp $ + * $Id: vfs_bio.c,v 1.154 1998/03/07 21:35:24 dyson Exp $ */ /* @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -53,6 +54,9 @@ static MALLOC_DEFINE(M_BIOBUF, "BIO buffer", "BIO buffer"); +struct bio_ops bioops; /* I/O operation notification */ + +#if 0 /* replaced bu sched_sync */ static void vfs_update __P((void)); static struct proc *updateproc; static struct kproc_desc up_kp = { @@ -61,6 +65,7 @@ static struct kproc_desc up_kp = { &updateproc }; SYSINIT_KT(update, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp) +#endif struct buf *buf; /* buffer header pool */ struct swqueue bswlist; @@ -179,6 +184,7 @@ bufinit() bp->b_qindex = QUEUE_EMPTY; bp->b_vnbufs.le_next = NOLIST; bp->b_generation = 0; + LIST_INIT(&bp->b_dep); TAILQ_INSERT_TAIL(&bufqueues[QUEUE_EMPTY], bp, b_freelist); LIST_INSERT_HEAD(&invalhash, bp, b_hash); } @@ -362,6 +368,9 @@ int bwrite(struct buf * bp) { int oldflags = bp->b_flags; + struct vnode *vp; + struct mount *mp; + if (bp->b_flags & B_INVAL) { brelse(bp); @@ -386,6 +395,23 @@ bwrite(struct buf * bp) curproc->p_stats->p_ru.ru_oublock++; VOP_STRATEGY(bp); + /* + * Collect statistics on synchronous and asynchronous writes. + * Writes to block devices are charged to their associated + * filesystem (if any). + */ + if ((vp = bp->b_vp) != NULL) { + if (vp->v_type == VBLK) + mp = vp->v_specmountpoint; + else + mp = vp->v_mount; + if (mp != NULL) + if ((oldflags & B_ASYNC) == 0) + mp->mnt_stat.f_syncwrites++; + else + mp->mnt_stat.f_asyncwrites++; + } + if ((oldflags & B_ASYNC) == 0) { int rtval = biowait(bp); @@ -420,6 +446,8 @@ vfs_bio_need_satisfy(void) { void bdwrite(struct buf * bp) { + int s; + struct vnode *vp; #if !defined(MAX_PERF) if ((bp->b_flags & B_BUSY) == 0) { @@ -438,7 +466,9 @@ bdwrite(struct buf * bp) bp->b_flags &= ~(B_READ|B_RELBUF); if ((bp->b_flags & B_DELWRI) == 0) { bp->b_flags |= B_DONE | B_DELWRI; + s = splbio(); reassignbuf(bp, bp->b_vp); + splx(s); ++numdirtybuffers; } @@ -470,12 +500,45 @@ bdwrite(struct buf * bp) vfs_clean_pages(bp); bqrelse(bp); + /* + * XXX The soft dependency code is not prepared to + * have I/O done when a bdwrite is requested. For + * now we just let the write be delayed if it is + * requested by the soft dependency code. + */ + if ((vp = bp->b_vp) && + (vp->v_type == VBLK && vp->v_specmountpoint && + (vp->v_specmountpoint->mnt_flag & MNT_SOFTDEP)) || + (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))) + return; + if (numdirtybuffers >= hidirtybuffers) flushdirtybuffers(0, 0); return; } + +/* + * Same as first half of bdwrite, mark buffer dirty, but do not release it. + * Check how this compares with vfs_setdirty(); XXX [JRE] + */ +void +bdirty(bp) + struct buf *bp; +{ + int s; + + bp->b_flags &= ~(B_READ|B_RELBUF); /* XXX ??? check this */ + if ((bp->b_flags & B_DELWRI) == 0) { + bp->b_flags |= B_DONE | B_DELWRI; /* why done? XXX JRE */ + s = splbio(); + reassignbuf(bp, bp->b_vp); + splx(s); + ++numdirtybuffers; + } +} + /* * Asynchronous write. * Start output on a buffer, but do not wait for it to complete. @@ -535,6 +598,8 @@ brelse(struct buf * bp) if ((bp->b_flags & (B_NOCACHE | B_INVAL | B_ERROR)) || (bp->b_bufsize <= 0)) { bp->b_flags |= B_INVAL; + if (LIST_FIRST(&bp->b_dep) != NULL && bioops.io_deallocate) + (*bioops.io_deallocate)(bp); if (bp->b_flags & B_DELWRI) --numdirtybuffers; bp->b_flags &= ~(B_DELWRI | B_CACHE); @@ -1065,6 +1130,9 @@ getnewbuf(struct vnode *vp, daddr_t blkno, crfree(bp->b_wcred); bp->b_wcred = NOCRED; } + if (LIST_FIRST(&bp->b_dep) != NULL && + bioops.io_deallocate) + (*bioops.io_deallocate)(bp); LIST_REMOVE(bp, b_hash); LIST_INSERT_HEAD(&invalhash, bp, b_hash); @@ -1083,6 +1151,8 @@ getnewbuf(struct vnode *vp, daddr_t blkno, bp->b_dirtyoff = bp->b_dirtyend = 0; bp->b_validoff = bp->b_validend = 0; bp->b_usecount = 5; + /* Here, not kern_physio.c, is where this should be done*/ + LIST_INIT(&bp->b_dep); maxsize = (maxsize + PAGE_MASK) & ~PAGE_MASK; @@ -1799,6 +1869,9 @@ biodone(register struct buf * bp) splx(s); return; } + if (LIST_FIRST(&bp->b_dep) != NULL && bioops.io_complete) + (*bioops.io_complete)(bp); + if (bp->b_flags & B_VMIO) { int i, resid; vm_ooffset_t foff; @@ -1944,6 +2017,7 @@ count_lock_queue() return (count); } +#if 0 /* not with kirks code */ static int vfs_update_interval = 30; static void @@ -1970,6 +2044,8 @@ sysctl_kern_updateinterval SYSCTL_HANDLER_ARGS SYSCTL_PROC(_kern, KERN_UPDATEINTERVAL, update, CTLTYPE_INT|CTLFLAG_RW, &vfs_update_interval, 0, sysctl_kern_updateinterval, "I", ""); +#endif + /* * This routine is called in lieu of iodone in the case of diff --git a/sys/kern/vfs_cluster.c b/sys/kern/vfs_cluster.c index 7f477bfd1b82..0022ac906eb4 100644 --- a/sys/kern/vfs_cluster.c +++ b/sys/kern/vfs_cluster.c @@ -33,7 +33,7 @@ * SUCH DAMAGE. * * @(#)vfs_cluster.c 8.7 (Berkeley) 2/13/94 - * $Id: vfs_cluster.c,v 1.55 1998/02/06 12:13:30 eivind Exp $ + * $Id: vfs_cluster.c,v 1.56 1998/03/07 21:35:28 dyson Exp $ */ #include "opt_debug_cluster.h" @@ -399,6 +399,9 @@ cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp) break; } } + /* check for latent dependencies to be handled */ + if ((LIST_FIRST(&tbp->b_dep)) != NULL && bioops.io_start) + (*bioops.io_start)(tbp); TAILQ_INSERT_TAIL(&bp->b_cluster.cluster_head, tbp, b_cluster.cluster_entry); for (j = 0; j < tbp->b_npages; j += 1) { @@ -684,7 +687,6 @@ cluster_wbuild(vp, size, start_lbn, len) (tbp->b_flags & (B_VMIO|B_NEEDCOMMIT)); bp->b_iodone = cluster_callback; pbgetvp(vp, bp); - for (i = 0; i < len; ++i, ++start_lbn) { if (i != 0) { s = splbio(); @@ -714,7 +716,10 @@ cluster_wbuild(vp, size, start_lbn, len) tbp->b_flags &= ~B_DONE; splx(s); } - + /* check for latent dependencies to be handled */ + if ((LIST_FIRST(&tbp->b_dep)) != NULL && + bioops.io_start) + (*bioops.io_start)(tbp); if (tbp->b_flags & B_VMIO) { vm_page_t m; diff --git a/sys/kern/vfs_export.c b/sys/kern/vfs_export.c index 5d27cf57355f..972604d24b37 100644 --- a/sys/kern/vfs_export.c +++ b/sys/kern/vfs_export.c @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95 - * $Id: vfs_subr.c,v 1.136 1998/03/01 23:07:45 dyson Exp $ + * $Id: vfs_subr.c,v 1.137 1998/03/07 21:35:35 dyson Exp $ */ /* @@ -123,6 +123,19 @@ static struct simplelock spechash_slock; struct nfs_public nfs_pub; /* publicly exported FS */ static vm_zone_t vnode_zone; +/* + * The workitem queue. + */ +#define SYNCER_MAXDELAY 32 +int syncer_maxdelay = SYNCER_MAXDELAY; /* maximum delay time */ +time_t syncdelay = 30; +int rushjob; /* number of slots to run ASAP */ + +static int syncer_delayno = 0; +static long syncer_mask; +LIST_HEAD(synclist, vnode); +static struct synclist *syncer_workitem_pending; + int desiredvnodes; SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, &desiredvnodes, 0, ""); @@ -147,6 +160,12 @@ vntblinit() simple_lock_init(&vnode_free_list_slock); CIRCLEQ_INIT(&mountlist); vnode_zone = zinit("VNODE", sizeof (struct vnode), 0, 0, 5); + /* + * Initialize the filesystem syncer. + */ + syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, + &syncer_mask); + syncer_maxdelay = syncer_mask + 1; } /* @@ -554,7 +573,7 @@ vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) int s, error; vm_object_t object; - if (flags & V_SAVE) { + if ((flags & V_SAVE) && vp->v_dirtyblkhd.lh_first != NULL) { if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p))) return (error); if (vp->v_dirtyblkhd.lh_first != NULL) @@ -688,16 +707,153 @@ brelvp(bp) /* * Delete from old vnode list, if on one. */ + vp = bp->b_vp; s = splbio(); if (bp->b_vnbufs.le_next != NOLIST) bufremvn(bp); + if ((vp->v_flag & VONWORKLST) && (LIST_FIRST(&vp->v_dirtyblkhd) == NULL)) { + vp->v_flag &= ~VONWORKLST; + LIST_REMOVE(vp, v_synclist); + } splx(s); - - vp = bp->b_vp; bp->b_vp = (struct vnode *) 0; vdrop(vp); } +/* + * The workitem queue. + * + * It is useful to delay writes of file data and filesystem metadata + * for tens of seconds so that quickly created and deleted files need + * not waste disk bandwidth being created and removed. To realize this, + * we append vnodes to a "workitem" queue. When running with a soft + * updates implementation, most pending metadata dependencies should + * not wait for more than a few seconds. Thus, mounted on block devices + * are delayed only about a half the time that file data is delayed. + * Similarly, directory updates are more critical, so are only delayed + * about a third the time that file data is delayed. Thus, there are + * SYNCER_MAXDELAY queues that are processed round-robin at a rate of + * one each second (driven off the filesystem syner process). The + * syncer_delayno variable indicates the next queue that is to be processed. + * Items that need to be processed soon are placed in this queue: + * + * syncer_workitem_pending[syncer_delayno] + * + * A delay of fifteen seconds is done by placing the request fifteen + * entries later in the queue: + * + * syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask] + * + */ + +/* + * Add an item to the syncer work queue. + */ +void +vn_syncer_add_to_worklist(vp, delay) + struct vnode *vp; + int delay; +{ + int s, slot; + + s = splbio(); + + if (vp->v_flag & VONWORKLST) { + LIST_REMOVE(vp, v_synclist); + } + + if (delay > syncer_maxdelay - 2) + delay = syncer_maxdelay - 2; + slot = (syncer_delayno + delay) & syncer_mask; + + LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist); + vp->v_flag |= VONWORKLST; + splx(s); +} + +static void sched_sync __P((void)); +static struct proc *updateproc; +static struct kproc_desc up_kp = { + "syncer", + sched_sync, + &updateproc +}; +SYSINIT_KT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp) + +/* + * System filesystem synchronizer daemon. + */ +void +sched_sync(void) +{ + struct synclist *slp; + struct vnode *vp; + long starttime; + int s; + struct proc *p = updateproc; + + for (;;) { + starttime = time.tv_sec; + + /* + * Push files whose dirty time has expired. + */ + s = splbio(); + slp = &syncer_workitem_pending[syncer_delayno]; + syncer_delayno += 1; + if (syncer_delayno == syncer_maxdelay) + syncer_delayno = 0; + splx(s); + + while ((vp = LIST_FIRST(slp)) != NULL) { + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + (void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p); + VOP_UNLOCK(vp, 0, p); + if (LIST_FIRST(slp) == vp) { + if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL && + vp->v_type != VBLK) + panic("sched_sync: fsync failed"); + /* + * Move ourselves to the back of the sync list. + */ + LIST_REMOVE(vp, v_synclist); + vn_syncer_add_to_worklist(vp, syncdelay); + } + } + + /* + * Do soft update processing. + */ + if (bioops.io_sync) + (*bioops.io_sync)(NULL); + + /* + * The variable rushjob allows the kernel to speed up the + * processing of the filesystem syncer process. A rushjob + * value of N tells the filesystem syncer to process the next + * N seconds worth of work on its queue ASAP. Currently rushjob + * is used by the soft update code to speed up the filesystem + * syncer process when the incore state is getting so far + * ahead of the disk that the kernel memory pool is being + * threatened with exhaustion. + */ + if (rushjob > 0) { + rushjob -= 1; + continue; + } + /* + * If it has taken us less than a second to process the + * current work, then wait. Otherwise start right over + * again. We can still lose time if any single round + * takes more than two seconds, but it does not really + * matter as we are just trying to generally pace the + * filesystem activity. + */ + if (time.tv_sec == starttime) + tsleep(&lbolt, PPAUSE, "syncer", 0); + } +} + /* * Associate a p-buffer with a vnode. */ @@ -743,6 +899,8 @@ reassignbuf(bp, newvp) register struct buf *bp; register struct vnode *newvp; { + struct buflists *listheadp; + int delay; int s; if (newvp == NULL) { @@ -765,18 +923,40 @@ reassignbuf(bp, newvp) if (bp->b_flags & B_DELWRI) { struct buf *tbp; - tbp = newvp->v_dirtyblkhd.lh_first; + listheadp = &newvp->v_dirtyblkhd; + if ((newvp->v_flag & VONWORKLST) == 0) { + switch (newvp->v_type) { + case VDIR: + delay = syncdelay / 3; + break; + case VBLK: + if (newvp->v_specmountpoint != NULL) { + delay = syncdelay / 2; + break; + } + /* fall through */ + default: + delay = syncdelay; + } + vn_syncer_add_to_worklist(newvp, delay); + } + tbp = listheadp->lh_first; if (!tbp || (tbp->b_lblkno > bp->b_lblkno)) { - bufinsvn(bp, &newvp->v_dirtyblkhd); + bufinsvn(bp, listheadp); } else { while (tbp->b_vnbufs.le_next && - (tbp->b_vnbufs.le_next->b_lblkno < bp->b_lblkno)) { + (tbp->b_vnbufs.le_next->b_lblkno < bp->b_lblkno)) { tbp = tbp->b_vnbufs.le_next; } LIST_INSERT_AFTER(tbp, bp, b_vnbufs); } } else { bufinsvn(bp, &newvp->v_cleanblkhd); + if ((newvp->v_flag & VONWORKLST) && + LIST_FIRST(&newvp->v_dirtyblkhd) == NULL) { + newvp->v_flag &= ~VONWORKLST; + LIST_REMOVE(newvp, v_synclist); + } } bp->b_vp = newvp; vhold(bp->b_vp); @@ -863,7 +1043,7 @@ checkalias(nvp, nvp_rdev, mp) nvp->v_rdev = nvp_rdev; nvp->v_hashchain = vpp; nvp->v_specnext = *vpp; - nvp->v_specflags = 0; + nvp->v_specmountpoint = NULL; simple_unlock(&spechash_slock); *vpp = nvp; if (vp != NULLVP) { @@ -920,7 +1100,6 @@ vget(vp, flags, p) if (VSHOULDBUSY(vp)) vbusy(vp); - if (flags & LK_TYPE_MASK) { if ((error = vn_lock(vp, flags | LK_INTERLOCK, p)) != 0) { /* @@ -1066,7 +1245,7 @@ vdrop(vp) simple_lock(&vp->v_interlock); if (vp->v_holdcnt <= 0) - panic("holdrele: holdcnt"); + panic("vdrop: holdcnt"); vp->v_holdcnt--; if (VSHOULDFREE(vp)) vfree(vp); @@ -1790,7 +1969,7 @@ vfs_mountedon(vp) struct vnode *vq; int error = 0; - if (vp->v_specflags & SI_MOUNTEDON) + if (vp->v_specmountpoint != NULL) return (EBUSY); if (vp->v_flag & VALIASED) { simple_lock(&spechash_slock); @@ -1798,7 +1977,7 @@ vfs_mountedon(vp) if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) continue; - if (vq->v_specflags & SI_MOUNTEDON) { + if (vq->v_specmountpoint != NULL) { error = EBUSY; break; } @@ -2326,3 +2505,170 @@ vn_pollgone(vp) } simple_unlock(&vp->v_pollinfo.vpi_lock); } + + + +/* + * Routine to create and manage a filesystem syncer vnode. + */ +#define sync_close ((int (*) __P((struct vop_close_args *)))nullop) +int sync_fsync __P((struct vop_fsync_args *)); +int sync_inactive __P((struct vop_inactive_args *)); +int sync_reclaim __P((struct vop_reclaim_args *)); +#define sync_lock ((int (*) __P((struct vop_lock_args *)))vop_nolock) +#define sync_unlock ((int (*) __P((struct vop_unlock_args *)))vop_nounlock) +int sync_print __P((struct vop_print_args *)); +#define sync_islocked ((int(*) __P((struct vop_islocked_args *)))vop_noislocked) + +vop_t **sync_vnodeop_p; +struct vnodeopv_entry_desc sync_vnodeop_entries[] = { + { &vop_default_desc, (vop_t *) vop_eopnotsupp }, + { &vop_close_desc, (vop_t *) sync_close }, /* close */ + { &vop_fsync_desc, (vop_t *) sync_fsync }, /* fsync */ + { &vop_inactive_desc, (vop_t *) sync_inactive }, /* inactive */ + { &vop_reclaim_desc, (vop_t *) sync_reclaim }, /* reclaim */ + { &vop_lock_desc, (vop_t *) sync_lock }, /* lock */ + { &vop_unlock_desc, (vop_t *) sync_unlock }, /* unlock */ + { &vop_print_desc, (vop_t *) sync_print }, /* print */ + { &vop_islocked_desc, (vop_t *) sync_islocked }, /* islocked */ + { NULL, NULL } +}; +struct vnodeopv_desc sync_vnodeop_opv_desc = + { &sync_vnodeop_p, sync_vnodeop_entries }; + +VNODEOP_SET(sync_vnodeop_opv_desc); + +/* + * Create a new filesystem syncer vnode for the specified mount point. + */ +int +vfs_allocate_syncvnode(mp) + struct mount *mp; +{ + struct vnode *vp; + static long start, incr, next; + int error; + + /* Allocate a new vnode */ + if ((error = getnewvnode(VT_VFS, mp, sync_vnodeop_p, &vp)) != 0) { + mp->mnt_syncer = NULL; + return (error); + } + vp->v_type = VNON; + /* + * Place the vnode onto the syncer worklist. We attempt to + * scatter them about on the list so that they will go off + * at evenly distributed times even if all the filesystems + * are mounted at once. + */ + next += incr; + if (next == 0 || next > syncer_maxdelay) { + start /= 2; + incr /= 2; + if (start == 0) { + start = syncer_maxdelay / 2; + incr = syncer_maxdelay; + } + next = start; + } + vn_syncer_add_to_worklist(vp, syncdelay > 0 ? next % syncdelay : 0); + mp->mnt_syncer = vp; + return (0); +} + +/* + * Do a lazy sync of the filesystem. + */ +int +sync_fsync(ap) + struct vop_fsync_args /* { + struct vnode *a_vp; + struct ucred *a_cred; + int a_waitfor; + struct proc *a_p; + } */ *ap; +{ + struct vnode *syncvp = ap->a_vp; + struct mount *mp = syncvp->v_mount; + struct proc *p = ap->a_p; + int asyncflag; + + /* + * We only need to do something if this is a lazy evaluation. + */ + if (ap->a_waitfor != MNT_LAZY) + return (0); + + /* + * Move ourselves to the back of the sync list. + */ + vn_syncer_add_to_worklist(syncvp, syncdelay); + + /* + * Walk the list of vnodes pushing all that are dirty and + * not already on the sync list. + */ + simple_lock(&mountlist_slock); + if (vfs_busy(mp, LK_EXCLUSIVE | LK_NOWAIT, &mountlist_slock, p) != 0) + return (0); + asyncflag = mp->mnt_flag & MNT_ASYNC; + mp->mnt_flag &= ~MNT_ASYNC; + VFS_SYNC(mp, MNT_LAZY, ap->a_cred, p); + if (asyncflag) + mp->mnt_flag |= MNT_ASYNC; + vfs_unbusy(mp, p); + return (0); +} + +/* + * The syncer vnode is no referenced. + */ +int +sync_inactive(ap) + struct vop_inactive_args /* { + struct vnode *a_vp; + struct proc *a_p; + } */ *ap; +{ + + vgone(ap->a_vp); + return (0); +} + +/* + * The syncer vnode is no longer needed and is being decommissioned. + */ +int +sync_reclaim(ap) + struct vop_reclaim_args /* { + struct vnode *a_vp; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + + vp->v_mount->mnt_syncer = NULL; + if (vp->v_flag & VONWORKLST) { + LIST_REMOVE(vp, v_synclist); + vp->v_flag &= ~VONWORKLST; + } + + return (0); +} + +/* + * Print out a syncer vnode. + */ +int +sync_print(ap) + struct vop_print_args /* { + struct vnode *a_vp; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + + printf("syncer vnode"); + if (vp->v_vnlock != NULL) + lockmgr_printinfo(vp->v_vnlock); + printf("\n"); + return (0); +} diff --git a/sys/kern/vfs_extattr.c b/sys/kern/vfs_extattr.c index 596de9561183..4a818dc42939 100644 --- a/sys/kern/vfs_extattr.c +++ b/sys/kern/vfs_extattr.c @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 - * $Id: vfs_syscalls.c,v 1.93 1998/02/15 04:17:09 dyson Exp $ + * $Id: vfs_syscalls.c,v 1.94 1998/03/07 21:35:39 dyson Exp $ */ /* For 4.3 integer FS ID compatibility */ @@ -283,6 +283,14 @@ mount(p, uap) mp->mnt_flag = flag; mp->mnt_kern_flag = flag2; } + if ((mp->mnt_flag & MNT_RDONLY) == 0) { + if (mp->mnt_syncer == NULL) + error = vfs_allocate_syncvnode(mp); + } else { + if (mp->mnt_syncer != NULL) + vrele(mp->mnt_syncer); + mp->mnt_syncer = NULL; + } vfs_unbusy(mp, p); return (error); } @@ -296,6 +304,8 @@ mount(p, uap) simple_unlock(&mountlist_slock); checkdirs(vp); VOP_UNLOCK(vp, 0, p); + if ((mp->mnt_flag & MNT_RDONLY) == 0) + error = vfs_allocate_syncvnode(mp); vfs_unbusy(mp, p); if (error = VFS_START(mp, 0, p)) vrele(vp); @@ -431,12 +441,16 @@ dounmount(mp, flags, p) vfs_msync(mp, MNT_WAIT); mp->mnt_flag &=~ MNT_ASYNC; cache_purgevfs(mp); /* remove cache entries for this file sys */ + if (mp->mnt_syncer != NULL) + vrele(mp->mnt_syncer); if (((mp->mnt_flag & MNT_RDONLY) || (error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0) || (flags & MNT_FORCE)) error = VFS_UNMOUNT(mp, flags, p); simple_lock(&mountlist_slock); if (error) { + if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL) + (void) vfs_allocate_syncvnode(mp); mp->mnt_kern_flag &= ~MNTK_UNMOUNT; lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK | LK_REENABLE, &mountlist_slock, p); @@ -490,9 +504,9 @@ sync(p, uap) asyncflag = mp->mnt_flag & MNT_ASYNC; mp->mnt_flag &= ~MNT_ASYNC; vfs_msync(mp, MNT_NOWAIT); - VFS_SYNC(mp, MNT_NOWAIT, p != NULL ? p->p_ucred : NOCRED, p); - if (asyncflag) - mp->mnt_flag |= MNT_ASYNC; + VFS_SYNC(mp, MNT_NOWAIT, + ((p != NULL) ? p->p_ucred : NOCRED), p); + mp->mnt_flag |= asyncflag; } simple_lock(&mountlist_slock); nmp = mp->mnt_list.cqe_next; @@ -665,10 +679,11 @@ getfsstat(p, uap) if (sfsp && count < maxcount) { sp = &mp->mnt_stat; /* - * If MNT_NOWAIT is specified, do not refresh the - * fsstat cache. MNT_WAIT overrides MNT_NOWAIT. + * If MNT_NOWAIT or MNT_LAZY is specified, do not + * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY + * overrides MNT_WAIT. */ - if (((SCARG(uap, flags) & MNT_NOWAIT) == 0 || + if (((SCARG(uap, flags) & (MNT_LAZY|MNT_NOWAIT)) == 0 || (SCARG(uap, flags) & MNT_WAIT)) && (error = VFS_STATFS(mp, sp, p))) { simple_lock(&mountlist_slock); diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index 5d27cf57355f..972604d24b37 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95 - * $Id: vfs_subr.c,v 1.136 1998/03/01 23:07:45 dyson Exp $ + * $Id: vfs_subr.c,v 1.137 1998/03/07 21:35:35 dyson Exp $ */ /* @@ -123,6 +123,19 @@ static struct simplelock spechash_slock; struct nfs_public nfs_pub; /* publicly exported FS */ static vm_zone_t vnode_zone; +/* + * The workitem queue. + */ +#define SYNCER_MAXDELAY 32 +int syncer_maxdelay = SYNCER_MAXDELAY; /* maximum delay time */ +time_t syncdelay = 30; +int rushjob; /* number of slots to run ASAP */ + +static int syncer_delayno = 0; +static long syncer_mask; +LIST_HEAD(synclist, vnode); +static struct synclist *syncer_workitem_pending; + int desiredvnodes; SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, &desiredvnodes, 0, ""); @@ -147,6 +160,12 @@ vntblinit() simple_lock_init(&vnode_free_list_slock); CIRCLEQ_INIT(&mountlist); vnode_zone = zinit("VNODE", sizeof (struct vnode), 0, 0, 5); + /* + * Initialize the filesystem syncer. + */ + syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, + &syncer_mask); + syncer_maxdelay = syncer_mask + 1; } /* @@ -554,7 +573,7 @@ vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) int s, error; vm_object_t object; - if (flags & V_SAVE) { + if ((flags & V_SAVE) && vp->v_dirtyblkhd.lh_first != NULL) { if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p))) return (error); if (vp->v_dirtyblkhd.lh_first != NULL) @@ -688,16 +707,153 @@ brelvp(bp) /* * Delete from old vnode list, if on one. */ + vp = bp->b_vp; s = splbio(); if (bp->b_vnbufs.le_next != NOLIST) bufremvn(bp); + if ((vp->v_flag & VONWORKLST) && (LIST_FIRST(&vp->v_dirtyblkhd) == NULL)) { + vp->v_flag &= ~VONWORKLST; + LIST_REMOVE(vp, v_synclist); + } splx(s); - - vp = bp->b_vp; bp->b_vp = (struct vnode *) 0; vdrop(vp); } +/* + * The workitem queue. + * + * It is useful to delay writes of file data and filesystem metadata + * for tens of seconds so that quickly created and deleted files need + * not waste disk bandwidth being created and removed. To realize this, + * we append vnodes to a "workitem" queue. When running with a soft + * updates implementation, most pending metadata dependencies should + * not wait for more than a few seconds. Thus, mounted on block devices + * are delayed only about a half the time that file data is delayed. + * Similarly, directory updates are more critical, so are only delayed + * about a third the time that file data is delayed. Thus, there are + * SYNCER_MAXDELAY queues that are processed round-robin at a rate of + * one each second (driven off the filesystem syner process). The + * syncer_delayno variable indicates the next queue that is to be processed. + * Items that need to be processed soon are placed in this queue: + * + * syncer_workitem_pending[syncer_delayno] + * + * A delay of fifteen seconds is done by placing the request fifteen + * entries later in the queue: + * + * syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask] + * + */ + +/* + * Add an item to the syncer work queue. + */ +void +vn_syncer_add_to_worklist(vp, delay) + struct vnode *vp; + int delay; +{ + int s, slot; + + s = splbio(); + + if (vp->v_flag & VONWORKLST) { + LIST_REMOVE(vp, v_synclist); + } + + if (delay > syncer_maxdelay - 2) + delay = syncer_maxdelay - 2; + slot = (syncer_delayno + delay) & syncer_mask; + + LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist); + vp->v_flag |= VONWORKLST; + splx(s); +} + +static void sched_sync __P((void)); +static struct proc *updateproc; +static struct kproc_desc up_kp = { + "syncer", + sched_sync, + &updateproc +}; +SYSINIT_KT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp) + +/* + * System filesystem synchronizer daemon. + */ +void +sched_sync(void) +{ + struct synclist *slp; + struct vnode *vp; + long starttime; + int s; + struct proc *p = updateproc; + + for (;;) { + starttime = time.tv_sec; + + /* + * Push files whose dirty time has expired. + */ + s = splbio(); + slp = &syncer_workitem_pending[syncer_delayno]; + syncer_delayno += 1; + if (syncer_delayno == syncer_maxdelay) + syncer_delayno = 0; + splx(s); + + while ((vp = LIST_FIRST(slp)) != NULL) { + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + (void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p); + VOP_UNLOCK(vp, 0, p); + if (LIST_FIRST(slp) == vp) { + if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL && + vp->v_type != VBLK) + panic("sched_sync: fsync failed"); + /* + * Move ourselves to the back of the sync list. + */ + LIST_REMOVE(vp, v_synclist); + vn_syncer_add_to_worklist(vp, syncdelay); + } + } + + /* + * Do soft update processing. + */ + if (bioops.io_sync) + (*bioops.io_sync)(NULL); + + /* + * The variable rushjob allows the kernel to speed up the + * processing of the filesystem syncer process. A rushjob + * value of N tells the filesystem syncer to process the next + * N seconds worth of work on its queue ASAP. Currently rushjob + * is used by the soft update code to speed up the filesystem + * syncer process when the incore state is getting so far + * ahead of the disk that the kernel memory pool is being + * threatened with exhaustion. + */ + if (rushjob > 0) { + rushjob -= 1; + continue; + } + /* + * If it has taken us less than a second to process the + * current work, then wait. Otherwise start right over + * again. We can still lose time if any single round + * takes more than two seconds, but it does not really + * matter as we are just trying to generally pace the + * filesystem activity. + */ + if (time.tv_sec == starttime) + tsleep(&lbolt, PPAUSE, "syncer", 0); + } +} + /* * Associate a p-buffer with a vnode. */ @@ -743,6 +899,8 @@ reassignbuf(bp, newvp) register struct buf *bp; register struct vnode *newvp; { + struct buflists *listheadp; + int delay; int s; if (newvp == NULL) { @@ -765,18 +923,40 @@ reassignbuf(bp, newvp) if (bp->b_flags & B_DELWRI) { struct buf *tbp; - tbp = newvp->v_dirtyblkhd.lh_first; + listheadp = &newvp->v_dirtyblkhd; + if ((newvp->v_flag & VONWORKLST) == 0) { + switch (newvp->v_type) { + case VDIR: + delay = syncdelay / 3; + break; + case VBLK: + if (newvp->v_specmountpoint != NULL) { + delay = syncdelay / 2; + break; + } + /* fall through */ + default: + delay = syncdelay; + } + vn_syncer_add_to_worklist(newvp, delay); + } + tbp = listheadp->lh_first; if (!tbp || (tbp->b_lblkno > bp->b_lblkno)) { - bufinsvn(bp, &newvp->v_dirtyblkhd); + bufinsvn(bp, listheadp); } else { while (tbp->b_vnbufs.le_next && - (tbp->b_vnbufs.le_next->b_lblkno < bp->b_lblkno)) { + (tbp->b_vnbufs.le_next->b_lblkno < bp->b_lblkno)) { tbp = tbp->b_vnbufs.le_next; } LIST_INSERT_AFTER(tbp, bp, b_vnbufs); } } else { bufinsvn(bp, &newvp->v_cleanblkhd); + if ((newvp->v_flag & VONWORKLST) && + LIST_FIRST(&newvp->v_dirtyblkhd) == NULL) { + newvp->v_flag &= ~VONWORKLST; + LIST_REMOVE(newvp, v_synclist); + } } bp->b_vp = newvp; vhold(bp->b_vp); @@ -863,7 +1043,7 @@ checkalias(nvp, nvp_rdev, mp) nvp->v_rdev = nvp_rdev; nvp->v_hashchain = vpp; nvp->v_specnext = *vpp; - nvp->v_specflags = 0; + nvp->v_specmountpoint = NULL; simple_unlock(&spechash_slock); *vpp = nvp; if (vp != NULLVP) { @@ -920,7 +1100,6 @@ vget(vp, flags, p) if (VSHOULDBUSY(vp)) vbusy(vp); - if (flags & LK_TYPE_MASK) { if ((error = vn_lock(vp, flags | LK_INTERLOCK, p)) != 0) { /* @@ -1066,7 +1245,7 @@ vdrop(vp) simple_lock(&vp->v_interlock); if (vp->v_holdcnt <= 0) - panic("holdrele: holdcnt"); + panic("vdrop: holdcnt"); vp->v_holdcnt--; if (VSHOULDFREE(vp)) vfree(vp); @@ -1790,7 +1969,7 @@ vfs_mountedon(vp) struct vnode *vq; int error = 0; - if (vp->v_specflags & SI_MOUNTEDON) + if (vp->v_specmountpoint != NULL) return (EBUSY); if (vp->v_flag & VALIASED) { simple_lock(&spechash_slock); @@ -1798,7 +1977,7 @@ vfs_mountedon(vp) if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) continue; - if (vq->v_specflags & SI_MOUNTEDON) { + if (vq->v_specmountpoint != NULL) { error = EBUSY; break; } @@ -2326,3 +2505,170 @@ vn_pollgone(vp) } simple_unlock(&vp->v_pollinfo.vpi_lock); } + + + +/* + * Routine to create and manage a filesystem syncer vnode. + */ +#define sync_close ((int (*) __P((struct vop_close_args *)))nullop) +int sync_fsync __P((struct vop_fsync_args *)); +int sync_inactive __P((struct vop_inactive_args *)); +int sync_reclaim __P((struct vop_reclaim_args *)); +#define sync_lock ((int (*) __P((struct vop_lock_args *)))vop_nolock) +#define sync_unlock ((int (*) __P((struct vop_unlock_args *)))vop_nounlock) +int sync_print __P((struct vop_print_args *)); +#define sync_islocked ((int(*) __P((struct vop_islocked_args *)))vop_noislocked) + +vop_t **sync_vnodeop_p; +struct vnodeopv_entry_desc sync_vnodeop_entries[] = { + { &vop_default_desc, (vop_t *) vop_eopnotsupp }, + { &vop_close_desc, (vop_t *) sync_close }, /* close */ + { &vop_fsync_desc, (vop_t *) sync_fsync }, /* fsync */ + { &vop_inactive_desc, (vop_t *) sync_inactive }, /* inactive */ + { &vop_reclaim_desc, (vop_t *) sync_reclaim }, /* reclaim */ + { &vop_lock_desc, (vop_t *) sync_lock }, /* lock */ + { &vop_unlock_desc, (vop_t *) sync_unlock }, /* unlock */ + { &vop_print_desc, (vop_t *) sync_print }, /* print */ + { &vop_islocked_desc, (vop_t *) sync_islocked }, /* islocked */ + { NULL, NULL } +}; +struct vnodeopv_desc sync_vnodeop_opv_desc = + { &sync_vnodeop_p, sync_vnodeop_entries }; + +VNODEOP_SET(sync_vnodeop_opv_desc); + +/* + * Create a new filesystem syncer vnode for the specified mount point. + */ +int +vfs_allocate_syncvnode(mp) + struct mount *mp; +{ + struct vnode *vp; + static long start, incr, next; + int error; + + /* Allocate a new vnode */ + if ((error = getnewvnode(VT_VFS, mp, sync_vnodeop_p, &vp)) != 0) { + mp->mnt_syncer = NULL; + return (error); + } + vp->v_type = VNON; + /* + * Place the vnode onto the syncer worklist. We attempt to + * scatter them about on the list so that they will go off + * at evenly distributed times even if all the filesystems + * are mounted at once. + */ + next += incr; + if (next == 0 || next > syncer_maxdelay) { + start /= 2; + incr /= 2; + if (start == 0) { + start = syncer_maxdelay / 2; + incr = syncer_maxdelay; + } + next = start; + } + vn_syncer_add_to_worklist(vp, syncdelay > 0 ? next % syncdelay : 0); + mp->mnt_syncer = vp; + return (0); +} + +/* + * Do a lazy sync of the filesystem. + */ +int +sync_fsync(ap) + struct vop_fsync_args /* { + struct vnode *a_vp; + struct ucred *a_cred; + int a_waitfor; + struct proc *a_p; + } */ *ap; +{ + struct vnode *syncvp = ap->a_vp; + struct mount *mp = syncvp->v_mount; + struct proc *p = ap->a_p; + int asyncflag; + + /* + * We only need to do something if this is a lazy evaluation. + */ + if (ap->a_waitfor != MNT_LAZY) + return (0); + + /* + * Move ourselves to the back of the sync list. + */ + vn_syncer_add_to_worklist(syncvp, syncdelay); + + /* + * Walk the list of vnodes pushing all that are dirty and + * not already on the sync list. + */ + simple_lock(&mountlist_slock); + if (vfs_busy(mp, LK_EXCLUSIVE | LK_NOWAIT, &mountlist_slock, p) != 0) + return (0); + asyncflag = mp->mnt_flag & MNT_ASYNC; + mp->mnt_flag &= ~MNT_ASYNC; + VFS_SYNC(mp, MNT_LAZY, ap->a_cred, p); + if (asyncflag) + mp->mnt_flag |= MNT_ASYNC; + vfs_unbusy(mp, p); + return (0); +} + +/* + * The syncer vnode is no referenced. + */ +int +sync_inactive(ap) + struct vop_inactive_args /* { + struct vnode *a_vp; + struct proc *a_p; + } */ *ap; +{ + + vgone(ap->a_vp); + return (0); +} + +/* + * The syncer vnode is no longer needed and is being decommissioned. + */ +int +sync_reclaim(ap) + struct vop_reclaim_args /* { + struct vnode *a_vp; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + + vp->v_mount->mnt_syncer = NULL; + if (vp->v_flag & VONWORKLST) { + LIST_REMOVE(vp, v_synclist); + vp->v_flag &= ~VONWORKLST; + } + + return (0); +} + +/* + * Print out a syncer vnode. + */ +int +sync_print(ap) + struct vop_print_args /* { + struct vnode *a_vp; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + + printf("syncer vnode"); + if (vp->v_vnlock != NULL) + lockmgr_printinfo(vp->v_vnlock); + printf("\n"); + return (0); +} diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index 596de9561183..4a818dc42939 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 - * $Id: vfs_syscalls.c,v 1.93 1998/02/15 04:17:09 dyson Exp $ + * $Id: vfs_syscalls.c,v 1.94 1998/03/07 21:35:39 dyson Exp $ */ /* For 4.3 integer FS ID compatibility */ @@ -283,6 +283,14 @@ mount(p, uap) mp->mnt_flag = flag; mp->mnt_kern_flag = flag2; } + if ((mp->mnt_flag & MNT_RDONLY) == 0) { + if (mp->mnt_syncer == NULL) + error = vfs_allocate_syncvnode(mp); + } else { + if (mp->mnt_syncer != NULL) + vrele(mp->mnt_syncer); + mp->mnt_syncer = NULL; + } vfs_unbusy(mp, p); return (error); } @@ -296,6 +304,8 @@ mount(p, uap) simple_unlock(&mountlist_slock); checkdirs(vp); VOP_UNLOCK(vp, 0, p); + if ((mp->mnt_flag & MNT_RDONLY) == 0) + error = vfs_allocate_syncvnode(mp); vfs_unbusy(mp, p); if (error = VFS_START(mp, 0, p)) vrele(vp); @@ -431,12 +441,16 @@ dounmount(mp, flags, p) vfs_msync(mp, MNT_WAIT); mp->mnt_flag &=~ MNT_ASYNC; cache_purgevfs(mp); /* remove cache entries for this file sys */ + if (mp->mnt_syncer != NULL) + vrele(mp->mnt_syncer); if (((mp->mnt_flag & MNT_RDONLY) || (error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0) || (flags & MNT_FORCE)) error = VFS_UNMOUNT(mp, flags, p); simple_lock(&mountlist_slock); if (error) { + if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL) + (void) vfs_allocate_syncvnode(mp); mp->mnt_kern_flag &= ~MNTK_UNMOUNT; lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK | LK_REENABLE, &mountlist_slock, p); @@ -490,9 +504,9 @@ sync(p, uap) asyncflag = mp->mnt_flag & MNT_ASYNC; mp->mnt_flag &= ~MNT_ASYNC; vfs_msync(mp, MNT_NOWAIT); - VFS_SYNC(mp, MNT_NOWAIT, p != NULL ? p->p_ucred : NOCRED, p); - if (asyncflag) - mp->mnt_flag |= MNT_ASYNC; + VFS_SYNC(mp, MNT_NOWAIT, + ((p != NULL) ? p->p_ucred : NOCRED), p); + mp->mnt_flag |= asyncflag; } simple_lock(&mountlist_slock); nmp = mp->mnt_list.cqe_next; @@ -665,10 +679,11 @@ getfsstat(p, uap) if (sfsp && count < maxcount) { sp = &mp->mnt_stat; /* - * If MNT_NOWAIT is specified, do not refresh the - * fsstat cache. MNT_WAIT overrides MNT_NOWAIT. + * If MNT_NOWAIT or MNT_LAZY is specified, do not + * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY + * overrides MNT_WAIT. */ - if (((SCARG(uap, flags) & MNT_NOWAIT) == 0 || + if (((SCARG(uap, flags) & (MNT_LAZY|MNT_NOWAIT)) == 0 || (SCARG(uap, flags) & MNT_WAIT)) && (error = VFS_STATFS(mp, sp, p))) { simple_lock(&mountlist_slock); diff --git a/sys/kern/vnode_if.src b/sys/kern/vnode_if.src index bedf27422475..922e0604b542 100644 --- a/sys/kern/vnode_if.src +++ b/sys/kern/vnode_if.src @@ -31,7 +31,7 @@ # SUCH DAMAGE. # # @(#)vnode_if.src 8.12 (Berkeley) 5/14/95 -# $Id: vnode_if.src,v 1.14 1997/10/16 10:48:00 phk Exp $ +# $Id: vnode_if.src,v 1.15 1997/10/16 20:32:23 phk Exp $ # # @@ -428,6 +428,18 @@ vop_advlock { IN int flags; }; +# +#% balloc vp L L L +# +vop_balloc { + IN struct vnode *vp; + IN off_t startoffset; + IN int size; + IN struct ucred *cred; + IN int flags; + OUT struct buf **bpp; +}; + # #% reallocblks vp L L L # diff --git a/sys/miscfs/specfs/spec_vnops.c b/sys/miscfs/specfs/spec_vnops.c index 9c3c8450867d..666322f65609 100644 --- a/sys/miscfs/specfs/spec_vnops.c +++ b/sys/miscfs/specfs/spec_vnops.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)spec_vnops.c 8.14 (Berkeley) 5/21/95 - * $Id: spec_vnops.c,v 1.58 1998/03/07 21:35:52 dyson Exp $ + * $Id: spec_vnops.c,v 1.59 1998/03/08 08:46:18 dyson Exp $ */ #include @@ -548,8 +548,12 @@ spec_strategy(ap) struct buf *a_bp; } */ *ap; { + struct buf *bp; - (*bdevsw[major(ap->a_bp->b_dev)]->d_strategy)(ap->a_bp); + bp = ap->a_bp; + if ((LIST_FIRST(&bp->b_dep)) != NULL && bioops.io_start) + (*bioops.io_start)(bp); + (*bdevsw[major(bp->b_dev)]->d_strategy)(bp); return (0); } @@ -633,7 +637,9 @@ spec_close(ap) * we must invalidate any in core blocks, so that * we can, for instance, change floppy disks. */ + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_p); error = vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0); + VOP_UNLOCK(vp, 0, ap->a_p); if (error) return (error); diff --git a/sys/miscfs/specfs/specdev.h b/sys/miscfs/specfs/specdev.h index 06a5cdd9bbe5..b4c6f7750458 100644 --- a/sys/miscfs/specfs/specdev.h +++ b/sys/miscfs/specfs/specdev.h @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)specdev.h 8.6 (Berkeley) 5/21/95 - * $Id: specdev.h,v 1.12 1997/09/14 02:58:03 peter Exp $ + * $Id: specdev.h,v 1.13 1997/10/15 13:23:21 phk Exp $ */ /* @@ -42,7 +42,7 @@ struct specinfo { struct vnode **si_hashchain; struct vnode *si_specnext; - long si_flags; + struct mount *si_mountpoint; dev_t si_rdev; }; /* @@ -51,12 +51,7 @@ struct specinfo { #define v_rdev v_specinfo->si_rdev #define v_hashchain v_specinfo->si_hashchain #define v_specnext v_specinfo->si_specnext -#define v_specflags v_specinfo->si_flags - -/* - * Flags for specinfo - */ -#define SI_MOUNTEDON 0x0001 /* block special device is mounted on */ +#define v_specmountpoint v_specinfo->si_mountpoint /* * Special device management diff --git a/sys/msdosfs/msdosfs_vfsops.c b/sys/msdosfs/msdosfs_vfsops.c index 2b1d1d7f352f..0af5438dae63 100644 --- a/sys/msdosfs/msdosfs_vfsops.c +++ b/sys/msdosfs/msdosfs_vfsops.c @@ -1,4 +1,4 @@ -/* $Id: msdosfs_vfsops.c,v 1.28 1998/02/23 16:44:32 ache Exp $ */ +/* $Id: msdosfs_vfsops.c,v 1.29 1998/03/01 22:46:27 msmith Exp $ */ /* $NetBSD: msdosfs_vfsops.c,v 1.51 1997/11/17 15:36:58 ws Exp $ */ /*- @@ -772,7 +772,7 @@ mountmsdosfs(devvp, mp, p, argp) mp->mnt_stat.f_fsid.val[0] = (long)dev; mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; mp->mnt_flag |= MNT_LOCAL; - devvp->v_specflags |= SI_MOUNTEDON; + devvp->v_specmountpoint = mp; return 0; @@ -818,7 +818,7 @@ msdosfs_unmount(mp, mntflags, p) if (error) return error; pmp = VFSTOMSDOSFS(mp); - pmp->pm_devvp->v_specflags &= ~SI_MOUNTEDON; + pmp->pm_devvp->v_specmountpoint = NULL; #ifdef MSDOSFS_DEBUG { struct vnode *vp = pmp->pm_devvp; @@ -841,8 +841,9 @@ msdosfs_unmount(mp, mntflags, p) ((u_int *)vp->v_data)[1]); } #endif - error = VOP_CLOSE(pmp->pm_devvp, (pmp->pm_flags&MSDOSFSMNT_RONLY) ? FREAD : FREAD | FWRITE, - NOCRED, p); + error = VOP_CLOSE(pmp->pm_devvp, + (pmp->pm_flags&MSDOSFSMNT_RONLY) ? FREAD : FREAD | FWRITE, + NOCRED, p); vrele(pmp->pm_devvp); free(pmp->pm_inusemap, M_MSDOSFSFAT); free(pmp, M_MSDOSFSMNT); @@ -946,9 +947,11 @@ msdosfs_sync(mp, waitfor, cred, p) simple_lock(&vp->v_interlock); nvp = vp->v_mntvnodes.le_next; dep = VTODE(vp); - if (vp->v_type == VNON || ((dep->de_flag & - (DE_ACCESS | DE_CREATE | DE_UPDATE | DE_MODIFIED)) == 0) - && vp->v_dirtyblkhd.lh_first == NULL) { + if (vp->v_type == VNON + || (waitfor == MNT_LAZY) /* can this happen with msdosfs? */ + || (((dep->de_flag & + (DE_ACCESS | DE_CREATE | DE_UPDATE | DE_MODIFIED)) == 0) + && (vp->v_dirtyblkhd.lh_first == NULL))) { simple_unlock(&vp->v_interlock); continue; } diff --git a/sys/nfs/nfs_bio.c b/sys/nfs/nfs_bio.c index be2b42380f9e..c1f8bb246cf5 100644 --- a/sys/nfs/nfs_bio.c +++ b/sys/nfs/nfs_bio.c @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * @(#)nfs_bio.c 8.9 (Berkeley) 3/30/95 - * $Id: nfs_bio.c,v 1.51 1998/03/06 09:46:43 msmith Exp $ + * $Id: nfs_bio.c,v 1.52 1998/03/07 21:36:01 dyson Exp $ */ @@ -1206,10 +1206,14 @@ nfs_doio(bp, cr, p) */ if (error == EINTR || (!error && (bp->b_flags & B_NEEDCOMMIT))) { + int s; + bp->b_flags &= ~(B_INVAL|B_NOCACHE); ++numdirtybuffers; bp->b_flags |= B_DELWRI; + s = splbio(); reassignbuf(bp, vp); + splx(s); if ((bp->b_flags & B_ASYNC) == 0) bp->b_flags |= B_EINTR; } else { diff --git a/sys/nfs/nfs_vnops.c b/sys/nfs/nfs_vnops.c index 1021f4706aa1..9c08967b19bf 100644 --- a/sys/nfs/nfs_vnops.c +++ b/sys/nfs/nfs_vnops.c @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * @(#)nfs_vnops.c 8.16 (Berkeley) 5/27/95 - * $Id: nfs_vnops.c,v 1.79 1998/03/06 09:46:48 msmith Exp $ + * $Id: nfs_vnops.c,v 1.80 1998/03/07 21:36:06 dyson Exp $ */ @@ -2846,9 +2846,11 @@ nfs_flush(vp, cred, waitfor, p, commit) vfs_bio_need_satisfy(); } } + s = splbio(); /* XXX check this positionning */ bp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_DELWRI); bp->b_dirtyoff = bp->b_dirtyend = 0; reassignbuf(bp, vp); + splx(s); biodone(bp); } } @@ -2994,6 +2996,7 @@ nfs_writebp(bp, force) register struct buf *bp; int force; { + int s; register int oldflags = bp->b_flags, retv = 1; off_t off; @@ -3008,6 +3011,7 @@ nfs_writebp(bp, force) if (needsbuffer) vfs_bio_need_satisfy(); } + s = splbio(); /* XXX check if needed */ bp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_DELWRI); if ((oldflags & (B_ASYNC|B_DELWRI)) == (B_ASYNC|B_DELWRI)) { @@ -3016,6 +3020,7 @@ nfs_writebp(bp, force) bp->b_vp->v_numoutput++; curproc->p_stats->p_ru.ru_oublock++; + splx(s); /* * If B_NEEDCOMMIT is set, a commit rpc may do the trick. If not @@ -3046,7 +3051,9 @@ nfs_writebp(bp, force) int rtval = biowait(bp); if (oldflags & B_DELWRI) { + s = splbio(); reassignbuf(bp, bp->b_vp); + splx(s); } brelse(bp); diff --git a/sys/nfsclient/nfs_bio.c b/sys/nfsclient/nfs_bio.c index be2b42380f9e..c1f8bb246cf5 100644 --- a/sys/nfsclient/nfs_bio.c +++ b/sys/nfsclient/nfs_bio.c @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * @(#)nfs_bio.c 8.9 (Berkeley) 3/30/95 - * $Id: nfs_bio.c,v 1.51 1998/03/06 09:46:43 msmith Exp $ + * $Id: nfs_bio.c,v 1.52 1998/03/07 21:36:01 dyson Exp $ */ @@ -1206,10 +1206,14 @@ nfs_doio(bp, cr, p) */ if (error == EINTR || (!error && (bp->b_flags & B_NEEDCOMMIT))) { + int s; + bp->b_flags &= ~(B_INVAL|B_NOCACHE); ++numdirtybuffers; bp->b_flags |= B_DELWRI; + s = splbio(); reassignbuf(bp, vp); + splx(s); if ((bp->b_flags & B_ASYNC) == 0) bp->b_flags |= B_EINTR; } else { diff --git a/sys/nfsclient/nfs_vnops.c b/sys/nfsclient/nfs_vnops.c index 1021f4706aa1..9c08967b19bf 100644 --- a/sys/nfsclient/nfs_vnops.c +++ b/sys/nfsclient/nfs_vnops.c @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * @(#)nfs_vnops.c 8.16 (Berkeley) 5/27/95 - * $Id: nfs_vnops.c,v 1.79 1998/03/06 09:46:48 msmith Exp $ + * $Id: nfs_vnops.c,v 1.80 1998/03/07 21:36:06 dyson Exp $ */ @@ -2846,9 +2846,11 @@ nfs_flush(vp, cred, waitfor, p, commit) vfs_bio_need_satisfy(); } } + s = splbio(); /* XXX check this positionning */ bp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_DELWRI); bp->b_dirtyoff = bp->b_dirtyend = 0; reassignbuf(bp, vp); + splx(s); biodone(bp); } } @@ -2994,6 +2996,7 @@ nfs_writebp(bp, force) register struct buf *bp; int force; { + int s; register int oldflags = bp->b_flags, retv = 1; off_t off; @@ -3008,6 +3011,7 @@ nfs_writebp(bp, force) if (needsbuffer) vfs_bio_need_satisfy(); } + s = splbio(); /* XXX check if needed */ bp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_DELWRI); if ((oldflags & (B_ASYNC|B_DELWRI)) == (B_ASYNC|B_DELWRI)) { @@ -3016,6 +3020,7 @@ nfs_writebp(bp, force) bp->b_vp->v_numoutput++; curproc->p_stats->p_ru.ru_oublock++; + splx(s); /* * If B_NEEDCOMMIT is set, a commit rpc may do the trick. If not @@ -3046,7 +3051,9 @@ nfs_writebp(bp, force) int rtval = biowait(bp); if (oldflags & B_DELWRI) { + s = splbio(); reassignbuf(bp, bp->b_vp); + splx(s); } brelse(bp); diff --git a/sys/pci/if_de.c b/sys/pci/if_de.c index 1cfaeba29de5..e764252d396b 100644 --- a/sys/pci/if_de.c +++ b/sys/pci/if_de.c @@ -1,5 +1,7 @@ +#undef __FreeBSD__ +#define __FreeBSD__ 3 /* $NetBSD: if_de.c,v 1.56 1997/10/20 14:32:46 matt Exp $ */ -/* $Id: if_de.c,v 1.79 1998/02/06 12:14:08 eivind Exp $ */ +/* $Id: if_de.c,v 1.80 1998/02/20 13:11:50 bde Exp $ */ /*- * Copyright (c) 1994-1997 Matt Thomas (matt@3am-software.com) diff --git a/sys/sys/bio.h b/sys/sys/bio.h index 47117edfd37d..f274dd62f91e 100644 --- a/sys/sys/bio.h +++ b/sys/sys/bio.h @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)buf.h 8.9 (Berkeley) 3/30/95 - * $Id: buf.h,v 1.45 1998/01/22 17:30:10 dyson Exp $ + * $Id: buf.h,v 1.46 1998/03/07 21:36:20 dyson Exp $ */ #ifndef _SYS_BUF_H_ @@ -47,6 +47,24 @@ #define NOLIST ((struct buf *)0x87654321) struct buf; +struct mount; + +/* + * To avoid including + */ +LIST_HEAD(workhead, worklist); +/* + * These are currently used only by the soft dependency code, hence + * are stored once in a global variable. If other subsystems wanted + * to use these hooks, a pointer to a set of bio_ops could be added + * to each buffer. + */ +extern struct bio_ops { + void (*io_start) __P((struct buf *)); + void (*io_complete) __P((struct buf *)); + void (*io_deallocate) __P((struct buf *)); + int (*io_sync) __P((struct mount *)); +} bioops; struct iodone_chain { long ic_prev_flags; @@ -104,6 +122,7 @@ struct buf { } b_cluster; struct vm_page *b_pages[btoc(MAXPHYS)]; int b_npages; + struct workhead b_dep; /* List of filesystem dependencies. */ }; /* @@ -264,6 +283,7 @@ int breadn __P((struct vnode *, daddr_t, int, daddr_t *, int *, int, int bwrite __P((struct buf *)); void bdwrite __P((struct buf *)); void bawrite __P((struct buf *)); +void bdirty __P((struct buf *)); int bowrite __P((struct buf *)); void brelse __P((struct buf *)); void bqrelse __P((struct buf *)); diff --git a/sys/sys/buf.h b/sys/sys/buf.h index 47117edfd37d..f274dd62f91e 100644 --- a/sys/sys/buf.h +++ b/sys/sys/buf.h @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)buf.h 8.9 (Berkeley) 3/30/95 - * $Id: buf.h,v 1.45 1998/01/22 17:30:10 dyson Exp $ + * $Id: buf.h,v 1.46 1998/03/07 21:36:20 dyson Exp $ */ #ifndef _SYS_BUF_H_ @@ -47,6 +47,24 @@ #define NOLIST ((struct buf *)0x87654321) struct buf; +struct mount; + +/* + * To avoid including + */ +LIST_HEAD(workhead, worklist); +/* + * These are currently used only by the soft dependency code, hence + * are stored once in a global variable. If other subsystems wanted + * to use these hooks, a pointer to a set of bio_ops could be added + * to each buffer. + */ +extern struct bio_ops { + void (*io_start) __P((struct buf *)); + void (*io_complete) __P((struct buf *)); + void (*io_deallocate) __P((struct buf *)); + int (*io_sync) __P((struct mount *)); +} bioops; struct iodone_chain { long ic_prev_flags; @@ -104,6 +122,7 @@ struct buf { } b_cluster; struct vm_page *b_pages[btoc(MAXPHYS)]; int b_npages; + struct workhead b_dep; /* List of filesystem dependencies. */ }; /* @@ -264,6 +283,7 @@ int breadn __P((struct vnode *, daddr_t, int, daddr_t *, int *, int, int bwrite __P((struct buf *)); void bdwrite __P((struct buf *)); void bawrite __P((struct buf *)); +void bdirty __P((struct buf *)); int bowrite __P((struct buf *)); void brelse __P((struct buf *)); void bqrelse __P((struct buf *)); diff --git a/sys/sys/malloc.h b/sys/sys/malloc.h index 69d258c96ba5..a04c6881367f 100644 --- a/sys/sys/malloc.h +++ b/sys/sys/malloc.h @@ -31,12 +31,14 @@ * SUCH DAMAGE. * * @(#)malloc.h 8.5 (Berkeley) 5/3/95 - * $Id: malloc.h,v 1.35 1997/12/05 19:14:36 bde Exp $ + * $Id: malloc.h,v 1.36 1997/12/27 09:42:03 bde Exp $ */ #ifndef _SYS_MALLOC_H_ #define _SYS_MALLOC_H_ +#define splmem splhigh + #define KMEMSTATS /* @@ -165,7 +167,7 @@ struct kmembuckets { #else /* do not collect statistics */ #define MALLOC(space, cast, size, type, flags) do { \ register struct kmembuckets *kbp = &bucket[BUCKETINDX(size)]; \ - long s = splimp(); \ + long s = splmem(); \ if (kbp->kb_next == NULL) { \ (space) = (cast)malloc((u_long)(size), type, flags); \ } else { \ @@ -178,7 +180,7 @@ struct kmembuckets { #define FREE(addr, type) do { \ register struct kmembuckets *kbp; \ register struct kmemusage *kup = btokup(addr); \ - long s = splimp(); \ + long s = splmem(); \ if (1 << kup->ku_indx > MAXALLOCSAVE) { \ free((addr), type); \ } else { \ diff --git a/sys/sys/mount.h b/sys/sys/mount.h index 9230ae0979be..05eae825620d 100644 --- a/sys/sys/mount.h +++ b/sys/sys/mount.h @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)mount.h 8.21 (Berkeley) 5/20/95 - * $Id: mount.h,v 1.56 1998/02/22 01:17:51 jkh Exp $ + * $Id: mount.h,v 1.57 1998/03/01 22:46:36 msmith Exp $ */ #ifndef _SYS_MOUNT_H_ @@ -79,7 +79,8 @@ struct statfs { uid_t f_owner; /* user that mounted the filesystem */ int f_type; /* type of filesystem (see below) */ int f_flags; /* copy of mount exported flags */ - long f_spare[2]; /* spare for later */ + long f_syncwrites; /* count of sync writes since mount */ + long f_asyncwrites; /* count of async writes since mount */ char f_fstypename[MFSNAMELEN]; /* fs type name */ char f_mntonname[MNAMELEN]; /* directory on which mounted */ char f_mntfromname[MNAMELEN];/* mounted filesystem */ @@ -146,6 +147,7 @@ struct mount { struct vfsops *mnt_op; /* operations on fs */ struct vfsconf *mnt_vfc; /* configuration info */ struct vnode *mnt_vnodecovered; /* vnode we mounted on */ + struct vnode *mnt_syncer; /* syncer vnode */ struct vnodelst mnt_vnodelist; /* list of vnodes this mount */ struct lock mnt_lock; /* mount structure lock */ int mnt_flag; /* flags shared with user */ @@ -167,6 +169,7 @@ struct mount { #define MNT_UNION 0x00000020 /* union with underlying filesystem */ #define MNT_ASYNC 0x00000040 /* file system written asynchronously */ #define MNT_SUIDDIR 0x00100000 /* special handling of SUID on dirs */ +#define MNT_SOFTDEP 0x00200000 /* soft updates being done */ #define MNT_NOATIME 0x10000000 /* disable update of file access time */ #define MNT_NOCLUSTERR 0x40000000 /* disable cluster read */ #define MNT_NOCLUSTERW 0x80000000 /* disable cluster write */ @@ -203,7 +206,8 @@ struct mount { MNT_DEFEXPORTED | MNT_EXPORTANON| MNT_EXKERB | \ MNT_LOCAL | MNT_USER | MNT_QUOTA | \ MNT_ROOTFS | MNT_NOATIME | MNT_NOCLUSTERR| \ - MNT_NOCLUSTERW | MNT_SUIDDIR/* | MNT_EXPUBLIC */) + MNT_NOCLUSTERW | MNT_SUIDDIR | MNT_SOFTDEP \ + /* | MNT_EXPUBLIC */) /* * External filesystem command modifier flags. * Unmount can use the MNT_FORCE flag. @@ -248,8 +252,8 @@ struct mount { * * waitfor flags to vfs_sync() and getfsstat() */ -#define MNT_WAIT 1 -#define MNT_NOWAIT 2 +#define MNT_WAIT 1 /* synchronously wait for I/O to complete */ +#define MNT_NOWAIT 2 /* start all I/O, but do not wait for it */ #define MNT_LAZY 3 /* push data not written by filesystem syncer */ /* @@ -448,6 +452,7 @@ int vfs_export /* process mount export info */ int vfs_vrele __P((struct mount *, struct vnode *)); struct netcred *vfs_export_lookup /* lookup host in fs export list */ __P((struct mount *, struct netexport *, struct sockaddr *)); +int vfs_allocate_syncvnode __P((struct mount *)); void vfs_getnewfsid __P((struct mount *)); struct mount *vfs_getvfs __P((fsid_t *)); /* return vfs given fsid */ int vfs_mountedon __P((struct vnode *)); /* is a vfs mounted on vp */ diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index aed0b4936a13..1e21926b95a2 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)vnode.h 8.7 (Berkeley) 2/4/94 - * $Id: vnode.h,v 1.66 1998/01/24 02:01:31 dyson Exp $ + * $Id: vnode.h,v 1.67 1998/03/07 21:36:27 dyson Exp $ */ #ifndef _SYS_VNODE_H_ @@ -61,7 +61,7 @@ enum vtype { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO, VBAD }; enum vtagtype { VT_NON, VT_UFS, VT_NFS, VT_MFS, VT_PC, VT_LFS, VT_LOFS, VT_FDESC, VT_PORTAL, VT_NULL, VT_UMAP, VT_KERNFS, VT_PROCFS, VT_AFS, VT_ISOFS, - VT_UNION, VT_MSDOSFS, VT_DEVFS, VT_TFS + VT_UNION, VT_MSDOSFS, VT_DEVFS, VT_TFS, VT_VFS }; /* @@ -94,6 +94,7 @@ struct vnode { LIST_ENTRY(vnode) v_mntvnodes; /* vnodes for mount point */ struct buflists v_cleanblkhd; /* clean blocklist head */ struct buflists v_dirtyblkhd; /* dirty blocklist head */ + LIST_ENTRY(vnode) v_synclist; /* vnodes with dirty buffers */ long v_numoutput; /* num of writes in progress */ enum vtype v_type; /* vnode type */ union { @@ -154,7 +155,8 @@ struct vnode { #define VOWANT 0x20000 /* a process is waiting for VOLOCK */ #define VDOOMED 0x40000 /* This vnode is being recycled */ #define VFREE 0x80000 /* This vnode is on the freelist */ -#define VTBFREE 0x100000 /* This vnode is no the to be freelist */ +#define VTBFREE 0x100000 /* This vnode is on the to-be-freelist */ +#define VONWORKLST 0x200000 /* On syncer work-list */ /* * Vnode attributes. A field value of VNOVAL represents a field whose value @@ -243,6 +245,7 @@ extern int vttoif_tab[]; #define VREF(vp) vref(vp) + #ifdef DIAGNOSTIC #define VATTR_NULL(vap) vattr_null(vap) #else @@ -262,6 +265,8 @@ extern int vttoif_tab[]; */ extern struct vnode *rootvnode; /* root (i.e. "/") vnode */ extern int desiredvnodes; /* number of vnodes desired */ +extern time_t syncdelay; /* time to delay syncing vnodes */ +extern int rushjob; /* # of slots filesys_syncer should run ASAP */ extern struct vm_zone *namei_zone; extern int prtactive; /* nonzero to call vprint() */ extern struct vattr va_null; /* predefined null vattr structure */ @@ -499,6 +504,7 @@ int vn_rdwr __P((enum uio_rw rw, struct vnode *vp, caddr_t base, int len, off_t offset, enum uio_seg segflg, int ioflg, struct ucred *cred, int *aresid, struct proc *p)); int vn_stat __P((struct vnode *vp, struct stat *sb, struct proc *p)); +void vn_syncer_add_to_worklist __P((struct vnode *vp, int delay)); int vfs_cache_lookup __P((struct vop_lookup_args *ap)); int vfs_object_create __P((struct vnode *vp, struct proc *p, struct ucred *cred, int waslocked)); diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c index f2e6a74febc5..8eab25a204a2 100644 --- a/sys/ufs/ffs/ffs_alloc.c +++ b/sys/ufs/ffs/ffs_alloc.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)ffs_alloc.c 8.18 (Berkeley) 5/26/95 - * $Id: ffs_alloc.c,v 1.46 1998/02/04 22:33:27 eivind Exp $ + * $Id: ffs_alloc.c,v 1.47 1998/02/06 12:14:13 eivind Exp $ */ #include "opt_quota.h" @@ -57,7 +57,8 @@ typedef ufs_daddr_t allocfcn_t __P((struct inode *ip, int cg, ufs_daddr_t bpref, int size)); static ufs_daddr_t ffs_alloccg __P((struct inode *, int, ufs_daddr_t, int)); -static ufs_daddr_t ffs_alloccgblk __P((struct fs *, struct cg *, ufs_daddr_t)); +static ufs_daddr_t + ffs_alloccgblk __P((struct inode *, struct buf *, ufs_daddr_t)); #ifdef DIAGNOSTIC static int ffs_checkblk __P((struct inode *, ufs_daddr_t, long)); #endif @@ -292,7 +293,8 @@ ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp) ffs_alloccg); if (bno > 0) { bp->b_blkno = fsbtodb(fs, bno); - ffs_blkfree(ip, bprev, (long)osize); + if (!DOINGSOFTDEP(ITOV(ip))) + ffs_blkfree(ip, bprev, (long)osize); if (nsize < request) ffs_blkfree(ip, bno + numfrags(fs, nsize), (long)(request - nsize)); @@ -455,8 +457,10 @@ ffs_reallocblks(ap) #endif blkno = newblk; for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) { - if (i == ssize) + if (i == ssize) { bap = ebap; + soff = -i; + } #ifdef DIAGNOSTIC if (!ffs_checkblk(ip, dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) @@ -468,6 +472,16 @@ ffs_reallocblks(ap) if (prtrealloc) printf(" %d,", *bap); #endif + if (DOINGSOFTDEP(vp)) { + if (sbap == &ip->i_db[0] && i < ssize) + softdep_setup_allocdirect(ip, start_lbn + i, + blkno, *bap, fs->fs_bsize, fs->fs_bsize, + buflist->bs_children[i]); + else + softdep_setup_allocindir_page(ip, start_lbn + i, + i < ssize ? sbp : ebp, soff + i, blkno, + *bap, buflist->bs_children[i]); + } *bap++ = blkno; } /* @@ -509,8 +523,10 @@ ffs_reallocblks(ap) printf("\n\tnew:"); #endif for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) { - ffs_blkfree(ip, dbtofsb(fs, buflist->bs_children[i]->b_blkno), - fs->fs_bsize); + if (!DOINGSOFTDEP(vp)) + ffs_blkfree(ip, + dbtofsb(fs, buflist->bs_children[i]->b_blkno), + fs->fs_bsize); buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); #ifdef DEBUG if (!ffs_checkblk(ip, @@ -847,6 +863,8 @@ ffs_fragextend(ip, cg, bprev, osize, nsize) fs->fs_cs(fs, cg).cs_nffree--; } fs->fs_fmod = 1; + if (DOINGSOFTDEP(ITOV(ip))) + softdep_setup_blkmapdep(bp, fs, bprev); bdwrite(bp); return (bprev); } @@ -868,7 +886,8 @@ ffs_alloccg(ip, cg, bpref, size) register struct cg *cgp; struct buf *bp; register int i; - int error, bno, frags, allocsiz; + ufs_daddr_t bno, blkno; + int allocsiz, error, frags; fs = ip->i_fs; if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize) @@ -887,7 +906,7 @@ ffs_alloccg(ip, cg, bpref, size) } cgp->cg_time = time.tv_sec; if (size == fs->fs_bsize) { - bno = ffs_alloccgblk(fs, cgp, bpref); + bno = ffs_alloccgblk(ip, bp, bpref); bdwrite(bp); return (bno); } @@ -909,7 +928,7 @@ ffs_alloccg(ip, cg, bpref, size) brelse(bp); return (0); } - bno = ffs_alloccgblk(fs, cgp, bpref); + bno = ffs_alloccgblk(ip, bp, bpref); bpref = dtogd(fs, bno); for (i = frags; i < fs->fs_frag; i++) setbit(cg_blksfree(cgp), bpref + i); @@ -936,8 +955,11 @@ ffs_alloccg(ip, cg, bpref, size) cgp->cg_frsum[allocsiz]--; if (frags != allocsiz) cgp->cg_frsum[allocsiz - frags]++; + blkno = cg * fs->fs_fpg + bno; + if (DOINGSOFTDEP(ITOV(ip))) + softdep_setup_blkmapdep(bp, fs, blkno); bdwrite(bp); - return (cg * fs->fs_fpg + bno); + return ((u_long)blkno); } /* @@ -952,16 +974,20 @@ ffs_alloccg(ip, cg, bpref, size) * blocks may be fragmented by the routine that allocates them. */ static ufs_daddr_t -ffs_alloccgblk(fs, cgp, bpref) - register struct fs *fs; - register struct cg *cgp; +ffs_alloccgblk(ip, bp, bpref) + struct inode *ip; + struct buf *bp; ufs_daddr_t bpref; { + struct fs *fs; + struct cg *cgp; ufs_daddr_t bno, blkno; int cylno, pos, delta; short *cylbp; register int i; + fs = ip->i_fs; + cgp = (struct cg *)bp->b_data; if (bpref == 0 || dtog(fs, bpref) != cgp->cg_cgx) { bpref = cgp->cg_rotor; goto norot; @@ -1052,7 +1078,10 @@ ffs_alloccgblk(fs, cgp, bpref) cg_blks(fs, cgp, cylno)[cbtorpos(fs, bno)]--; cg_blktot(cgp)[cylno]--; fs->fs_fmod = 1; - return (cgp->cg_cgx * fs->fs_fpg + bno); + blkno = cgp->cg_cgx * fs->fs_fpg + bno; + if (DOINGSOFTDEP(ITOV(ip))) + softdep_setup_blkmapdep(bp, fs, blkno); + return (blkno); } #ifdef notyet @@ -1155,7 +1184,7 @@ ffs_clusteralloc(ip, cg, bpref, len) panic("ffs_clusteralloc: allocated out of group"); len = blkstofrags(fs, len); for (i = 0; i < len; i += fs->fs_frag) - if ((got = ffs_alloccgblk(fs, cgp, bno + i)) != bno + i) + if ((got = ffs_alloccgblk(ip, bp, bno + i)) != bno + i) panic("ffs_clusteralloc: lost block"); bdwrite(bp); return (bno); @@ -1234,6 +1263,8 @@ ffs_nodealloccg(ip, cg, ipref, mode) panic("ffs_nodealloccg: block not in map"); /* NOTREACHED */ gotit: + if (DOINGSOFTDEP(ITOV(ip))) + softdep_setup_inomapdep(bp, ip, cg * fs->fs_ipg + ipref); setbit(cg_inosused(cgp), ipref); cgp->cg_cs.cs_nifree--; fs->fs_cstotal.cs_nifree--; @@ -1268,9 +1299,10 @@ ffs_blkfree(ip, bno, size) int i, error, cg, blk, frags, bbase; fs = ip->i_fs; - if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) { - printf("dev = 0x%lx, bsize = %ld, size = %ld, fs = %s\n", - (u_long)ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt); + if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0 || + fragnum(fs, bno) + numfrags(fs, size) > fs->fs_frag) { + printf("dev=0x%lx, bno = %d, bsize = %d, size = %ld, fs = %s\n", + (u_long)ip->i_dev, bno, fs->fs_bsize, size, fs->fs_fsmnt); panic("ffs_blkfree: bad size"); } cg = dtog(fs, bno); @@ -1294,7 +1326,7 @@ ffs_blkfree(ip, bno, size) bno = dtogd(fs, bno); if (size == fs->fs_bsize) { blkno = fragstoblks(fs, bno); - if (ffs_isblock(fs, cg_blksfree(cgp), blkno)) { + if (!ffs_isfreeblock(fs, cg_blksfree(cgp), blkno)) { printf("dev = 0x%lx, block = %ld, fs = %s\n", (u_long) ip->i_dev, bno, fs->fs_fsmnt); panic("ffs_blkfree: freeing free block"); @@ -1404,11 +1436,26 @@ ffs_checkblk(ip, bno, size) /* * Free an inode. - * - * The specified inode is placed back in the free map. */ int -ffs_vfree(pvp, ino, mode) +ffs_vfree( pvp, ino, mode) + struct vnode *pvp; + ino_t ino; + int mode; +{ + if (DOINGSOFTDEP(pvp)) { + softdep_freefile(pvp, ino, mode); + return (0); + } + return (ffs_freefile(pvp, ino, mode)); +} + +/* + * Do the actual free operation. + * The specified inode is placed back in the free map. + */ + int + ffs_freefile( pvp, ino, mode) struct vnode *pvp; ino_t ino; int mode; @@ -1429,7 +1476,7 @@ ffs_vfree(pvp, ino, mode) (int)fs->fs_cgsize, NOCRED, &bp); if (error) { brelse(bp); - return (0); + return (error); } cgp = (struct cg *)bp->b_data; if (!cg_chkmagic(cgp)) { diff --git a/sys/ufs/ffs/ffs_balloc.c b/sys/ufs/ffs/ffs_balloc.c index 60d20376d1b6..cf253b72e6e4 100644 --- a/sys/ufs/ffs/ffs_balloc.c +++ b/sys/ufs/ffs/ffs_balloc.c @@ -31,13 +31,14 @@ * SUCH DAMAGE. * * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95 - * $Id: ffs_balloc.c,v 1.18 1998/02/04 22:33:31 eivind Exp $ + * $Id: ffs_balloc.c,v 1.19 1998/02/06 12:14:14 eivind Exp $ */ #include #include #include #include +#include #include #include @@ -53,16 +54,23 @@ * the inode and the logical block number in a file. */ int -ffs_balloc(ip, lbn, size, cred, bpp, flags) +ffs_balloc(ap) + struct vop_balloc_args /* { + struct inode *a_ip; + ufs_daddr_t a_lbn; + int a_size; + struct ucred *a_cred; + int a_flags; + struct buf *a_bpp; + } */ *ap; +{ register struct inode *ip; register ufs_daddr_t lbn; int size; struct ucred *cred; - struct buf **bpp; int flags; -{ - register struct fs *fs; - register ufs_daddr_t nb; + struct fs *fs; + ufs_daddr_t nb; struct buf *bp, *nbp; struct vnode *vp = ITOV(ip); struct indir indirs[NIADDR + 2]; @@ -70,10 +78,18 @@ ffs_balloc(ip, lbn, size, cred, bpp, flags) int deallocated, osize, nsize, num, i, error; ufs_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1]; - *bpp = NULL; + vp = ap->a_vp; + ip = VTOI(vp); + fs = ip->i_fs; + lbn = lblkno(fs, ap->a_startoffset); + size = blkoff(fs, ap->a_startoffset) + ap->a_size; + if (size > fs->fs_bsize) + panic("ffs_balloc: blk too big"); + *ap->a_bpp = NULL; if (lbn < 0) return (EFBIG); - fs = ip->i_fs; + cred = ap->a_cred; + flags = ap->a_flags; /* * If the next write will extend the file into a new block, @@ -89,6 +105,10 @@ ffs_balloc(ip, lbn, size, cred, bpp, flags) osize, (int)fs->fs_bsize, cred, &bp); if (error) return (error); + if (DOINGSOFTDEP(vp)) + softdep_setup_allocdirect(ip, nb, + dbtofsb(fs, bp->b_blkno), ip->i_db[nb], + fs->fs_bsize, osize, bp); ip->i_size = smalllblktosize(fs, nb + 1); ip->i_db[nb] = dbtofsb(fs, bp->b_blkno); ip->i_flag |= IN_CHANGE | IN_UPDATE; @@ -110,7 +130,7 @@ ffs_balloc(ip, lbn, size, cred, bpp, flags) return (error); } bp->b_blkno = fsbtodb(fs, nb); - *bpp = bp; + *ap->a_bpp = bp; return (0); } if (nb != 0) { @@ -132,6 +152,10 @@ ffs_balloc(ip, lbn, size, cred, bpp, flags) &ip->i_db[0]), osize, nsize, cred, &bp); if (error) return (error); + if (DOINGSOFTDEP(vp)) + softdep_setup_allocdirect(ip, lbn, + dbtofsb(fs, bp->b_blkno), nb, + nsize, osize, bp); } } else { if (ip->i_size < smalllblktosize(fs, lbn + 1)) @@ -147,10 +171,13 @@ ffs_balloc(ip, lbn, size, cred, bpp, flags) bp->b_blkno = fsbtodb(fs, newb); if (flags & B_CLRBUF) vfs_bio_clrbuf(bp); + if (DOINGSOFTDEP(vp)) + softdep_setup_allocdirect(ip, lbn, newb, 0, + nsize, 0, bp); } ip->i_db[lbn] = dbtofsb(fs, bp->b_blkno); ip->i_flag |= IN_CHANGE | IN_UPDATE; - *bpp = bp; + *ap->a_bpp = bp; return (0); } /* @@ -180,12 +207,18 @@ ffs_balloc(ip, lbn, size, cred, bpp, flags) bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0); bp->b_blkno = fsbtodb(fs, nb); vfs_bio_clrbuf(bp); - /* - * Write synchronously so that indirect blocks - * never point at garbage. - */ - if (error = bwrite(bp)) - goto fail; + if (DOINGSOFTDEP(vp)) { + softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off, + newb, 0, fs->fs_bsize, 0, bp); + bdwrite(bp); + } else { + /* + * Write synchronously so that indirect blocks + * never point at garbage. + */ + if (error = bwrite(bp)) + goto fail; + } allocib = &ip->i_ib[indirs[0].in_off]; *allocib = nb; ip->i_flag |= IN_CHANGE | IN_UPDATE; @@ -221,13 +254,19 @@ ffs_balloc(ip, lbn, size, cred, bpp, flags) nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0); nbp->b_blkno = fsbtodb(fs, nb); vfs_bio_clrbuf(nbp); - /* - * Write synchronously so that indirect blocks - * never point at garbage. - */ - if (error = bwrite(nbp)) { - brelse(bp); - goto fail; + if (DOINGSOFTDEP(vp)) { + softdep_setup_allocindir_meta(nbp, ip, bp, + indirs[i - 1].in_off, nb); + bdwrite(nbp); + } else { + /* + * Write synchronously so that indirect blocks + * never point at garbage. + */ + if (error = bwrite(nbp)) { + brelse(bp); + goto fail; + } } bap[indirs[i - 1].in_off] = nb; /* @@ -259,6 +298,9 @@ ffs_balloc(ip, lbn, size, cred, bpp, flags) nbp->b_blkno = fsbtodb(fs, nb); if (flags & B_CLRBUF) vfs_bio_clrbuf(nbp); + if (DOINGSOFTDEP(vp)) + softdep_setup_allocindir_page(ip, lbn, bp, + indirs[i].in_off, nb, 0, nbp); bap[indirs[i].in_off] = nb; /* * If required, write synchronously, otherwise use @@ -271,7 +313,7 @@ ffs_balloc(ip, lbn, size, cred, bpp, flags) bp->b_flags |= B_CLUSTEROK; bdwrite(bp); } - *bpp = nbp; + *ap->a_bpp = nbp; return (0); } brelse(bp); @@ -285,7 +327,7 @@ ffs_balloc(ip, lbn, size, cred, bpp, flags) nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0); nbp->b_blkno = fsbtodb(fs, nb); } - *bpp = nbp; + *ap->a_bpp = nbp; return (0); fail: /* diff --git a/sys/ufs/ffs/ffs_extern.h b/sys/ufs/ffs/ffs_extern.h index dad97d3d426b..90cb38c02a11 100644 --- a/sys/ufs/ffs/ffs_extern.h +++ b/sys/ufs/ffs/ffs_extern.h @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)ffs_extern.h 8.6 (Berkeley) 3/30/95 - * $Id: ffs_extern.h,v 1.21 1997/11/22 08:35:45 bde Exp $ + * $Id: ffs_extern.h,v 1.22 1998/02/03 21:52:00 bde Exp $ */ #ifndef _UFS_FFS_EXTERN_H @@ -68,8 +68,7 @@ struct vop_reallocblks_args; int ffs_alloc __P((struct inode *, ufs_daddr_t, ufs_daddr_t, int, struct ucred *, ufs_daddr_t *)); -int ffs_balloc __P((struct inode *, - ufs_daddr_t, int, struct ucred *, struct buf **, int)); +int ffs_balloc __P((struct vop_balloc_args *)); int ffs_blkatoff __P((struct vnode *, off_t, char **, struct buf **)); void ffs_blkfree __P((struct inode *, ufs_daddr_t, long)); ufs_daddr_t ffs_blkpref __P((struct inode *, ufs_daddr_t, int, ufs_daddr_t *)); @@ -79,7 +78,9 @@ int ffs_fhtovp __P((struct mount *, struct fid *, struct sockaddr *, struct vnode **, int *, struct ucred **)); int ffs_flushfiles __P((struct mount *, int, struct proc *)); void ffs_fragacct __P((struct fs *, int, int32_t [], int)); +int ffs_freefile __P(( struct vnode *, ino_t, int )); int ffs_isblock __P((struct fs *, u_char *, ufs_daddr_t)); +int ffs_isfreeblock __P((struct fs *, unsigned char *, ufs_daddr_t)); int ffs_mountfs __P((struct vnode *, struct mount *, struct proc *, struct malloc_type *)); int ffs_mountroot __P((void)); @@ -102,4 +103,31 @@ extern vop_t **ffs_vnodeop_p; extern vop_t **ffs_specop_p; extern vop_t **ffs_fifoop_p; +/* + * Soft update function prototypes. + */ +void softdep_initialize __P((void)); +int softdep_process_worklist __P((struct mount *)); +int softdep_mount __P((struct vnode *, struct mount *, struct fs *, + struct ucred *)); +int softdep_flushfiles __P((struct mount *, int, struct proc *)); +void softdep_update_inodeblock __P((struct inode *, struct buf *, int)); +void softdep_load_inodeblock __P((struct inode *)); +int softdep_fsync __P((struct vnode *)); +void softdep_freefile __P((struct vnode *, ino_t, int)); +void softdep_setup_freeblocks __P((struct inode *, off_t)); +void softdep_deallocate_dependencies __P((struct buf *)); +void softdep_setup_inomapdep __P((struct buf *, struct inode *, ino_t)); +void softdep_setup_blkmapdep __P((struct buf *, struct fs *, ufs_daddr_t)); +void softdep_setup_allocdirect __P((struct inode *, ufs_lbn_t, ufs_daddr_t, + ufs_daddr_t, long, long, struct buf *)); +void softdep_setup_allocindir_meta __P((struct buf *, struct inode *, + struct buf *, int, ufs_daddr_t)); +void softdep_setup_allocindir_page __P((struct inode *, ufs_lbn_t, + struct buf *, int, ufs_daddr_t, ufs_daddr_t, struct buf *)); +void softdep_disk_io_initiation __P((struct buf *)); +void softdep_disk_write_complete __P((struct buf *)); +int softdep_sync_metadata __P((struct vop_fsync_args *)); + #endif /* !_UFS_FFS_EXTERN_H */ + diff --git a/sys/ufs/ffs/ffs_inode.c b/sys/ufs/ffs/ffs_inode.c index cf1c043bd9bd..d1364a19b0e4 100644 --- a/sys/ufs/ffs/ffs_inode.c +++ b/sys/ufs/ffs/ffs_inode.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)ffs_inode.c 8.13 (Berkeley) 4/21/95 - * $Id: ffs_inode.c,v 1.34 1998/02/06 12:14:14 eivind Exp $ + * $Id: ffs_inode.c,v 1.35 1998/03/07 21:36:33 dyson Exp $ */ #include "opt_quota.h" @@ -87,8 +87,9 @@ ffs_update(vp, access, modify, waitfor) ~(IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE); return (0); } - if ((ip->i_flag & - (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0) + if (((ip->i_flag & + (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0) && + (waitfor != MNT_WAIT)) return (0); /* * Use a copy of the current time to get consistent timestamps @@ -129,11 +130,15 @@ ffs_update(vp, access, modify, waitfor) brelse(bp); return (error); } + if (DOINGSOFTDEP(vp)) + softdep_update_inodeblock(ip, bp, waitfor); + else if (ip->i_effnlink != ip->i_nlink) + panic("ffs_update: bad link cnt"); *((struct dinode *)bp->b_data + ino_to_fsbo(fs, ip->i_number)) = ip->i_din; - if (waitfor && (vp->v_mount->mnt_flag & MNT_ASYNC) == 0) + if (waitfor && (vp->v_mount->mnt_flag & MNT_ASYNC) == 0) { return (bwrite(bp)); - else { + } else { if (bp->b_bufsize == fs->fs_bsize) bp->b_flags |= B_CLUSTEROK; bdwrite(bp); @@ -171,6 +176,8 @@ ffs_truncate(vp, length, flags, cred, p) off_t osize; oip = VTOI(ovp); + if (oip->i_size == length) + return (0); fs = oip->i_fs; if (length < 0) return (EINVAL); @@ -197,6 +204,31 @@ ffs_truncate(vp, length, flags, cred, p) if (error) return (error); #endif + ovp->v_lasta = ovp->v_clen = ovp->v_cstart = ovp->v_lastw = 0; + if (DOINGSOFTDEP(ovp)) { + if (length > 0) { + /* + * If a file is only partially truncated, then + * we have to clean up the data structures + * describing the allocation past the truncation + * point. Finding and deallocating those structures + * is a lot of work. Since partial truncation occurs + * rarely, we solve the problem by syncing the file + * so that it will have no data structures left. + */ + if ((error = VOP_FSYNC(ovp, cred, MNT_WAIT, + p)) != 0) + return (error); + } else { +#ifdef QUOTA + (void) chkdq(oip, -oip->i_blocks, NOCRED, 0); +#endif + softdep_setup_freeblocks(oip, length); + (void) vinvalbuf(ovp, 0, cred, p, 0, 0); + oip->i_flag |= IN_CHANGE | IN_UPDATE; + return (ffs_update(ovp, &tv, &tv, 0)); + } + } osize = oip->i_size; /* * Lengthen the size of the file. We must ensure that the @@ -205,13 +237,15 @@ ffs_truncate(vp, length, flags, cred, p) */ if (osize < length) { vnode_pager_setsize(ovp, length); +#if 0 offset = blkoff(fs, length - 1); lbn = lblkno(fs, length - 1); +#endif aflags = B_CLRBUF; if (flags & IO_SYNC) aflags |= B_SYNC; - error = ffs_balloc(oip, lbn, offset + 1, cred, - &bp, aflags); + error = VOP_BALLOC(ovp, length - 1, 1, + cred, aflags, &bp); if (error) return (error); oip->i_size = length; @@ -241,9 +275,13 @@ ffs_truncate(vp, length, flags, cred, p) aflags = B_CLRBUF; if (flags & IO_SYNC) aflags |= B_SYNC; - error = ffs_balloc(oip, lbn, offset, cred, &bp, aflags); - if (error) + error = VOP_BALLOC(ovp, length - 1, 1, cred, aflags, &bp); + if (error) { +#if 0 /* kirk's version had this */ + vnode_pager_setsize(ovp, (u_long)osize); +#endif return (error); + } oip->i_size = length; size = blksize(fs, oip, lbn); bzero((char *)bp->b_data + offset, (u_int)(size - offset)); diff --git a/sys/ufs/ffs/ffs_subr.c b/sys/ufs/ffs/ffs_subr.c index 601a4cf7ba19..7d7de141dfbe 100644 --- a/sys/ufs/ffs/ffs_subr.c +++ b/sys/ufs/ffs/ffs_subr.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)ffs_subr.c 8.5 (Berkeley) 3/21/95 - * $Id: ffs_subr.c,v 1.18 1998/02/06 12:14:14 eivind Exp $ + * $Id: ffs_subr.c,v 1.19 1998/02/13 00:20:36 bde Exp $ */ #include @@ -190,6 +190,30 @@ ffs_isblock(fs, cp, h) } } +/* + * check if a block is free + */ +int +ffs_isfreeblock(fs, cp, h) + struct fs *fs; + unsigned char *cp; + ufs_daddr_t h; +{ + + switch ((int)fs->fs_frag) { + case 8: + return (cp[h] == 0); + case 4: + return ((cp[h >> 1] & (0x0f << ((h & 0x1) << 2))) == 0); + case 2: + return ((cp[h >> 2] & (0x03 << ((h & 0x3) << 1))) == 0); + case 1: + return ((cp[h >> 3] & (0x01 << (h & 0x7))) == 0); + default: + panic("ffs_isfreeblock"); + } +} + /* * take a block out of the map */ diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c index edfe0e696f10..71a451dd38ec 100644 --- a/sys/ufs/ffs/ffs_vfsops.c +++ b/sys/ufs/ffs/ffs_vfsops.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)ffs_vfsops.c 8.31 (Berkeley) 5/20/95 - * $Id: ffs_vfsops.c,v 1.74 1998/03/07 14:59:44 bde Exp $ + * $Id: ffs_vfsops.c,v 1.75 1998/03/07 21:36:36 dyson Exp $ */ #include "opt_quota.h" @@ -203,7 +203,11 @@ ffs_mount( mp, path, data, ndp, p) flags = WRITECLOSE; if (mp->mnt_flag & MNT_FORCE) flags |= FORCECLOSE; - err = ffs_flushfiles(mp, flags, p); + if (mp->mnt_flag & MNT_SOFTDEP) { + err = softdep_flushfiles(mp, flags, p); + } else { + err = ffs_flushfiles(mp, flags, p); + } } if (!err && (mp->mnt_flag & MNT_RELOAD)) err = ffs_reload(mp, ndp->ni_cnd.cn_cred, p); @@ -410,7 +414,10 @@ ffs_reload(mp, cred, p) * Step 1: invalidate all cached meta-data. */ devvp = VFSTOUFS(mp)->um_devvp; - if (vinvalbuf(devvp, 0, cred, p, 0, 0)) + vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); + error = vinvalbuf(devvp, 0, cred, p, 0, 0); + VOP_UNLOCK(devvp, 0, p); + if (error) panic("ffs_reload: dirty1"); dev = devvp->v_rdev; @@ -516,6 +523,7 @@ ffs_reload(mp, cred, p) } ip->i_din = *((struct dinode *)bp->b_data + ino_to_fsbo(fs, ip->i_number)); + ip->i_effnlink = ip->i_nlink; brelse(bp); vput(vp); simple_lock(&mntvnode_slock); @@ -537,10 +545,12 @@ ffs_mountfs(devvp, mp, p, malloctype) register struct ufsmount *ump; struct buf *bp; register struct fs *fs; + struct cg *cgp; dev_t dev; struct partinfo dpart; + struct csum cstotal; caddr_t base, space; - int error, i, blks, size, ronly; + int error, i, cyl, blks, size, ronly; int32_t *lp; struct ucred *cred; u_int64_t maxfilesize; /* XXX */ @@ -562,7 +572,10 @@ ffs_mountfs(devvp, mp, p, malloctype) if (ncount > 1 && devvp != rootvp) return (EBUSY); - if (error = vinvalbuf(devvp, V_SAVE, cred, p, 0, 0)) + vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); + error = vinvalbuf(devvp, V_SAVE, cred, p, 0, 0); + VOP_UNLOCK(devvp, 0, p); + if (error) return (error); /* @@ -674,7 +687,7 @@ ffs_mountfs(devvp, mp, p, malloctype) ump->um_seqinc = fs->fs_frag; for (i = 0; i < MAXQUOTAS; i++) ump->um_quotas[i] = NULLVP; - devvp->v_specflags |= SI_MOUNTEDON; + devvp->v_specmountpoint = mp; ffs_oldfscompat(fs); /* @@ -700,11 +713,17 @@ ffs_mountfs(devvp, mp, p, malloctype) if (fs->fs_maxfilesize > maxfilesize) /* XXX */ fs->fs_maxfilesize = maxfilesize; /* XXX */ if (ronly == 0) { + if ((fs->fs_flags & FS_DOSOFTDEP) && + (error = softdep_mount(devvp, mp, fs, cred)) != 0) { + free(base, M_UFSMNT); + goto out; + } fs->fs_clean = 0; (void) ffs_sbupdate(ump, MNT_WAIT); } return (0); out: + devvp->v_specmountpoint = NULL; if (bp) brelse(bp); (void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, cred, p); @@ -765,9 +784,13 @@ ffs_unmount(mp, mntflags, p) if (mntflags & MNT_FORCE) { flags |= FORCECLOSE; } - error = ffs_flushfiles(mp, flags, p); - if (error) - return (error); + if (mp->mnt_flag & MNT_SOFTDEP) { + if ((error = softdep_flushfiles(mp, flags, p)) != 0) + return (error); + } else { + if ((error = ffs_flushfiles(mp, flags, p)) != 0) + return (error); + } ump = VFSTOUFS(mp); fs = ump->um_fs; if (fs->fs_ronly == 0) { @@ -778,7 +801,7 @@ ffs_unmount(mp, mntflags, p) return (error); } } - ump->um_devvp->v_specflags &= ~SI_MOUNTEDON; + ump->um_devvp->v_specmountpoint = NULL; vinvalbuf(ump->um_devvp, V_SAVE, NOCRED, p, 0, 0); error = VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD|FWRITE, @@ -824,7 +847,17 @@ ffs_flushfiles(mp, flags, p) */ } #endif - error = vflush(mp, NULLVP, flags); + /* + * Flush all the files. + */ + if ((error = vflush(mp, NULL, flags)) != 0) + return (error); + /* + * Flush filesystem metadata. + */ + vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p); + error = VOP_FSYNC(ump->um_devvp, p->p_ucred, MNT_WAIT, p); + VOP_UNLOCK(ump->um_devvp, 0, p); return (error); } @@ -903,9 +936,9 @@ ffs_sync(mp, waitfor, cred, p) simple_lock(&vp->v_interlock); nvp = vp->v_mntvnodes.le_next; ip = VTOI(vp); - if (((ip->i_flag & + if ((vp->v_type == VNON) || ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0) && - vp->v_dirtyblkhd.lh_first == NULL) { + ((vp->v_dirtyblkhd.lh_first == NULL) || (waitfor == MNT_LAZY))) { simple_unlock(&vp->v_interlock); continue; } @@ -937,21 +970,22 @@ ffs_sync(mp, waitfor, cred, p) /* * Force stale file system control information to be flushed. */ - error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p); - if (error) - allerror = error; + if (waitfor != MNT_LAZY) { + if (ump->um_mountp->mnt_flag & MNT_SOFTDEP) + waitfor = MNT_NOWAIT; + vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p); + if ((error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p)) != 0) + allerror = error; + VOP_UNLOCK(ump->um_devvp, 0, p); + } #ifdef QUOTA qsync(mp); #endif /* * Write back modified superblock. */ - if (fs->fs_fmod != 0) { - fs->fs_fmod = 0; - fs->fs_time = time.tv_sec; - if (error = ffs_sbupdate(ump, waitfor)) - allerror = error; - } + if (fs->fs_fmod != 0 && (error = ffs_sbupdate(ump, waitfor)) != 0) + allerror = error; return (allerror); } @@ -1060,6 +1094,10 @@ ffs_vget(mp, ino, vpp) return (error); } ip->i_din = *((struct dinode *)bp->b_data + ino_to_fsbo(fs, ino)); + if (DOINGSOFTDEP(vp)) + softdep_load_inodeblock(ip); + else + ip->i_effnlink = ip->i_nlink; bqrelse(bp); /* @@ -1157,6 +1195,7 @@ ffs_init(vfsp) struct vfsconf *vfsp; { + softdep_initialize(); return (ufs_init(vfsp)); } @@ -1200,6 +1239,8 @@ ffs_sbupdate(mp, waitfor) if (allerror) return (allerror); bp = getblk(mp->um_devvp, SBLOCK, (int)fs->fs_sbsize, 0, 0); + fs->fs_fmod = 0; + fs->fs_time = time.tv_sec; bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize); /* Restore compatibility to old file systems. XXX */ dfs = (struct fs *)bp->b_data; /* XXX */ diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c index 0210d61458ee..44db8f4ac970 100644 --- a/sys/ufs/ffs/ffs_vnops.c +++ b/sys/ufs/ffs/ffs_vnops.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)ffs_vnops.c 8.15 (Berkeley) 5/14/95 - * $Id: ffs_vnops.c,v 1.42 1998/02/06 12:14:16 eivind Exp $ + * $Id: ffs_vnops.c,v 1.43 1998/02/26 06:39:38 msmith Exp $ */ #include @@ -74,6 +74,7 @@ static struct vnodeopv_entry_desc ffs_vnodeop_entries[] = { { &vop_getpages_desc, (vop_t *) ffs_getpages }, { &vop_putpages_desc, (vop_t *) ffs_putpages }, { &vop_read_desc, (vop_t *) ffs_read }, + { &vop_balloc_desc, (vop_t *) ffs_balloc }, { &vop_reallocblks_desc, (vop_t *) ffs_reallocblks }, { &vop_write_desc, (vop_t *) ffs_write }, { NULL, NULL } @@ -120,12 +121,11 @@ ffs_fsync(ap) struct proc *a_p; } */ *ap; { - register struct vnode *vp = ap->a_vp; - register struct buf *bp; + struct vnode *vp = ap->a_vp; + struct buf *bp; struct timeval tv; struct buf *nbp; - int pass; - int s; + int s, error, passes, skipmeta; daddr_t lbn; @@ -137,31 +137,45 @@ ffs_fsync(ap) lbn = lblkno(ip->i_fs, (ip->i_size + ip->i_fs->fs_bsize - 1)); } - pass = 0; /* * Flush all dirty buffers associated with a vnode. */ + passes = NIADDR; + skipmeta = 0; + if (ap->a_waitfor == MNT_WAIT) + skipmeta = 1; loop: s = splbio(); +loop2: for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { nbp = bp->b_vnbufs.le_next; - if ((bp->b_flags & B_BUSY) || (pass == 0 && (bp->b_lblkno < 0))) + /* + * First time through on a synchronous call, + * or if it's already scheduled, skip to the next + * buffer + */ + if ((bp->b_flags & B_BUSY) || + ((skipmeta == 1) && (bp->b_lblkno < 0))) continue; if ((bp->b_flags & B_DELWRI) == 0) panic("ffs_fsync: not dirty"); - - if (((bp->b_vp != vp) || (ap->a_waitfor != MNT_NOWAIT)) || - ((vp->v_type != VREG) && (vp->v_type != VBLK))) { - + /* + * If data is outstanding to another vnode, or we were + * asked to wait for everything, or it's not a file or BDEV, + * start the IO on this buffer immediatly. + */ + if (((bp->b_vp != vp) || (ap->a_waitfor == MNT_WAIT)) || + ((vp->v_type != VREG) && (vp->v_type != VBLK))) { bremfree(bp); bp->b_flags |= B_BUSY; splx(s); /* - * Wait for I/O associated with indirect blocks to complete, - * since there is no way to quickly wait for them below. + * Wait for I/O associated with indirect blocks to + * complete, since there is no way to quickly wait + * for them below. */ - if ((bp->b_vp == vp) && (ap->a_waitfor == MNT_NOWAIT)) { + if ((bp->b_vp == vp) || (ap->a_waitfor != MNT_WAIT)) { if (bp->b_flags & B_CLUSTEROK) { bdwrite(bp); (void) vfs_bio_awrite(bp); @@ -171,26 +185,30 @@ ffs_fsync(ap) } else { (void) bwrite(bp); } - } else if ((vp->v_type == VREG) && (bp->b_lblkno >= lbn)) { - + /* + * If the buffer is for data that has been truncated + * off the file, then throw it away. + */ bremfree(bp); bp->b_flags |= B_BUSY | B_INVAL | B_NOCACHE; brelse(bp); splx(s); - } else { vfs_bio_awrite(bp); splx(s); } goto loop; } - splx(s); - - if (pass == 0) { - pass = 1; - goto loop; + /* + * If we were asked to do this synchronously, then go back for + * another pass, this time doing the metadata. + */ + if (skipmeta) { + skipmeta = 0; + goto loop2; /* stay within the splbio() */ } + splx(s); if (ap->a_waitfor == MNT_WAIT) { s = splbio(); @@ -198,15 +216,38 @@ ffs_fsync(ap) vp->v_flag |= VBWAIT; (void) tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "ffsfsn", 0); } + /* + * Ensure that any filesystem metatdata associated + * with the vnode has been written. + */ splx(s); -#ifdef DIAGNOSTIC + if ((error = softdep_sync_metadata(ap)) != 0) + return (error); + s = splbio(); if (vp->v_dirtyblkhd.lh_first) { - vprint("ffs_fsync: dirty", vp); - goto loop; - } + /* + * Block devices associated with filesystems may + * have new I/O requests posted for them even if + * the vnode is locked, so no amount of trying will + * get them clean. Thus we give block devices a + * good effort, then just give up. For all other file + * types, go around and try again until it is clean. + */ + if (passes > 0) { + passes -= 1; + goto loop2; + } +#ifdef DIAGNOSTIC + if (vp->v_type != VBLK) + vprint("ffs_fsync: dirty", vp); #endif + } } - gettime(&tv); - return (UFS_UPDATE(ap->a_vp, &tv, &tv, ap->a_waitfor == MNT_WAIT)); + error = UFS_UPDATE(ap->a_vp, &tv, &tv, (ap->a_waitfor == MNT_WAIT)); + if (error) + return (error); + if (DOINGSOFTDEP(vp) && ap->a_waitfor == MNT_WAIT) + error = softdep_fsync(vp); + return (error); } diff --git a/sys/ufs/ffs/fs.h b/sys/ufs/ffs/fs.h index 98a9b06698d9..9f997318f997 100644 --- a/sys/ufs/ffs/fs.h +++ b/sys/ufs/ffs/fs.h @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)fs.h 8.13 (Berkeley) 3/21/95 - * $Id: fs.h,v 1.11 1997/03/23 20:08:22 guido Exp $ + * $Id: fs.h,v 1.12 1997/03/24 03:19:37 bde Exp $ */ #ifndef _UFS_FFS_FS_H_ @@ -222,7 +222,7 @@ struct fs { int8_t fs_fmod; /* super block modified flag */ int8_t fs_clean; /* file system is clean flag */ int8_t fs_ronly; /* mounted read-only flag */ - int8_t fs_flags; /* currently unused flag */ + int8_t fs_flags; /* see FS_ flags below */ u_char fs_fsmnt[MAXMNTLEN]; /* name mounted on */ /* these fields retain the current block allocation info */ int32_t fs_cgrotor; /* last cg searched */ @@ -254,12 +254,19 @@ struct fs { #define FS_OKAY 0x7c269d38 /* superblock checksum */ #define FS_42INODEFMT -1 /* 4.2BSD inode format */ #define FS_44INODEFMT 2 /* 4.4BSD inode format */ + /* * Preference for optimization. */ #define FS_OPTTIME 0 /* minimize allocation time */ #define FS_OPTSPACE 1 /* minimize disk fragmentation */ +/* + * Filesystem flags. + */ +#define FS_UNCLEAN 0x01 /* filesystem not clean at mount */ +#define FS_DOSOFTDEP 0x02 /* filesystem using soft dependencies */ + /* * Rotational layout table format types */ @@ -485,6 +492,11 @@ struct ocg { (((lbn) >= NDADDR || (dip)->di_size >= smalllblktosize(fs, (lbn) + 1)) \ ? (fs)->fs_bsize \ : (fragroundup(fs, blkoff(fs, (dip)->di_size)))) +#define sblksize(fs, size, lbn) \ + (((lbn) >= NDADDR || (size) >= ((lbn) + 1) << (fs)->fs_bshift) \ + ? (fs)->fs_bsize \ + : (fragroundup(fs, blkoff(fs, (size))))) + /* * Number of disk sectors per block/fragment; assumes DEV_BSIZE byte diff --git a/sys/ufs/ufs/inode.h b/sys/ufs/ufs/inode.h index f2fd0f25fa5e..4bd1cf5d7de1 100644 --- a/sys/ufs/ufs/inode.h +++ b/sys/ufs/ufs/inode.h @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)inode.h 8.9 (Berkeley) 5/14/95 - * $Id: inode.h,v 1.19 1997/12/05 13:43:47 jkh Exp $ + * $Id: inode.h,v 1.20 1998/01/30 11:34:02 phk Exp $ */ #ifndef _UFS_UFS_INODE_H_ @@ -45,6 +45,11 @@ #include #include +/* + * The size of a logical block number. + */ +typedef long ufs_lbn_t; + /* * This must agree with the definition in . */ @@ -67,6 +72,7 @@ struct inode { u_int32_t i_flag; /* flags, see below */ dev_t i_dev; /* Device associated with the inode. */ ino_t i_number; /* The identity of the inode. */ + int i_effnlink; /* i_nlink when I/O completes */ union { /* Associated filesystem. */ struct fs *fs; /* FFS */ @@ -160,6 +166,9 @@ struct indir { } \ } +/* Determine if soft dependencies are being done */ +#define DOINGSOFTDEP(vp) ((vp)->v_mount->mnt_flag & MNT_SOFTDEP) + /* This overlays the fid structure (see mount.h). */ struct ufid { u_int16_t ufid_len; /* Length of structure. */ diff --git a/sys/ufs/ufs/ufs_extern.h b/sys/ufs/ufs/ufs_extern.h index 5d7ec5f07a80..55d068c7e14a 100644 --- a/sys/ufs/ufs/ufs_extern.h +++ b/sys/ufs/ufs/ufs_extern.h @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)ufs_extern.h 8.10 (Berkeley) 5/14/95 - * $Id: ufs_extern.h,v 1.21 1997/10/16 11:59:09 phk Exp $ + * $Id: ufs_extern.h,v 1.22 1997/10/27 12:50:57 bde Exp $ */ #ifndef _UFS_UFS_EXTERN_H_ @@ -68,12 +68,12 @@ int ufs_checkpath __P((struct inode *, struct inode *, struct ucred *)); void ufs_dirbad __P((struct inode *, doff_t, char *)); int ufs_dirbadentry __P((struct vnode *, struct direct *, int)); int ufs_dirempty __P((struct inode *, ino_t, struct ucred *)); -int ufs_direnter __P((struct inode *, struct vnode *,struct componentname *)); -int ufs_direnter2 __P((struct vnode *, struct direct *, struct ucred *, - struct proc *)); -int ufs_dirremove __P((struct vnode *, struct componentname*)); -int ufs_dirrewrite - __P((struct inode *, struct inode *, struct componentname *)); +void ufs_makedirentry __P((struct inode *, struct componentname *, + struct direct *)); +int ufs_direnter __P((struct vnode *, struct vnode *, struct direct *, + struct componentname *, struct buf *)); +int ufs_dirremove __P((struct vnode *, struct inode *, int, int)); +int ufs_dirrewrite __P((struct inode *, struct inode *, ino_t, int, int)); int ufs_getlbns __P((struct vnode *, ufs_daddr_t, struct indir *, int *)); struct vnode * ufs_ihashget __P((dev_t, ino_t)); @@ -90,4 +90,17 @@ int ufs_root __P((struct mount *, struct vnode **)); int ufs_start __P((struct mount *, int, struct proc *)); int ufs_vinit __P((struct mount *, vop_t **, vop_t **, struct vnode **)); +/* + * Soft update function prototypes. + */ +void softdep_setup_directory_add __P((struct buf *, struct inode *, off_t, + long, struct buf *)); +void softdep_change_directoryentry_offset __P((struct inode *, caddr_t, + caddr_t, caddr_t, int)); +void softdep_setup_remove __P((struct buf *,struct inode *, struct inode *, + int)); +void softdep_setup_directory_change __P((struct buf *, struct inode *, + struct inode *, long, int)); +void softdep_increase_linkcnt __P((struct inode *)); + #endif /* !_UFS_UFS_EXTERN_H_ */ diff --git a/sys/ufs/ufs/ufs_lookup.c b/sys/ufs/ufs/ufs_lookup.c index 108880783524..2e7d9d9cd67b 100644 --- a/sys/ufs/ufs/ufs_lookup.c +++ b/sys/ufs/ufs/ufs_lookup.c @@ -36,16 +36,22 @@ * SUCH DAMAGE. * * @(#)ufs_lookup.c 8.15 (Berkeley) 6/16/95 - * $Id: ufs_lookup.c,v 1.20 1998/02/04 22:33:36 eivind Exp $ + * $Id: ufs_lookup.c,v 1.21 1998/02/06 12:14:18 eivind Exp $ */ #include #include +#include #include #include +#include +#include #include #include +#include +#include + #include #include #include @@ -143,7 +149,12 @@ ufs_lookup(ap) bp = NULL; slotoffset = -1; +/* + * XXX there was a soft-update diff about this I couldn't merge. + * I think this was the equiv. + */ *vpp = NULL; + vdp = ap->a_dvp; dp = VTOI(vdp); lockparent = flags & LOCKPARENT; @@ -331,7 +342,7 @@ ufs_lookup(ap) (nameiop == DELETE && (ap->a_cnp->cn_flags & DOWHITEOUT) && (ap->a_cnp->cn_flags & ISWHITEOUT))) && - (flags & ISLASTCN) && dp->i_nlink != 0) { + (flags & ISLASTCN) && dp->i_effnlink != 0) { /* * Access for write is interpreted as allowing * creation of files in the directory. @@ -603,64 +614,66 @@ ufs_dirbadentry(dp, ep, entryoffsetinblock) } /* - * Write a directory entry after a call to namei, using the parameters - * that it left in nameidata. The argument ip is the inode which the new - * directory entry will refer to. Dvp is a pointer to the directory to - * be written, which was left locked by namei. Remaining parameters - * (dp->i_offset, dp->i_count) indicate how the space for the new - * entry is to be obtained. + * Construct a new directory entry after a call to namei, using the + * parameters that it left in the componentname argument cnp. The + * argument ip is the inode to which the new directory entry will refer. */ -int -ufs_direnter(ip, dvp, cnp) +void +ufs_makedirentry(ip, cnp, newdirp) struct inode *ip; - struct vnode *dvp; - register struct componentname *cnp; + struct componentname *cnp; + struct direct *newdirp; { - register struct inode *dp; - struct direct newdir; #ifdef DIAGNOSTIC if ((cnp->cn_flags & SAVENAME) == 0) - panic("ufs_direnter: missing name"); + panic("ufs_makedirentry: missing name"); #endif - dp = VTOI(dvp); - newdir.d_ino = ip->i_number; - newdir.d_namlen = cnp->cn_namelen; - bcopy(cnp->cn_nameptr, newdir.d_name, (unsigned)cnp->cn_namelen + 1); - if (!OFSFMT(dvp)) - newdir.d_type = IFTODT(ip->i_mode); + newdirp->d_ino = ip->i_number; + newdirp->d_namlen = cnp->cn_namelen; + bcopy(cnp->cn_nameptr, newdirp->d_name, (unsigned)cnp->cn_namelen + 1); + if (ITOV(ip)->v_mount->mnt_maxsymlinklen > 0) + newdirp->d_type = IFTODT(ip->i_mode); else { - newdir.d_type = 0; + newdirp->d_type = 0; # if (BYTE_ORDER == LITTLE_ENDIAN) - { u_char tmp = newdir.d_namlen; - newdir.d_namlen = newdir.d_type; - newdir.d_type = tmp; } + { u_char tmp = newdirp->d_namlen; + newdirp->d_namlen = newdirp->d_type; + newdirp->d_type = tmp; } # endif } - return (ufs_direnter2(dvp, &newdir, cnp->cn_cred, cnp->cn_proc)); } /* - * Common entry point for directory entry removal used by ufs_direnter - * and ufs_whiteout + * Write a directory entry after a call to namei, using the parameters + * that it left in nameidata. The argument dirp is the new directory + * entry contents. Dvp is a pointer to the directory to be written, + * which was left locked by namei. Remaining parameters (dp->i_offset, + * dp->i_count) indicate how the space for the new entry is to be obtained. + * Non-null bp indicates that a directory is being created (for the + * soft dependency code). */ int -ufs_direnter2(dvp, dirp, cr, p) +ufs_direnter(dvp, tvp, dirp, cnp, newdirbp) struct vnode *dvp; + struct vnode *tvp; struct direct *dirp; + struct componentname *cnp; + struct buf *newdirbp; +{ struct ucred *cr; struct proc *p; -{ int newentrysize; struct inode *dp; struct buf *bp; - struct iovec aiov; - struct uio auio; u_int dsize; struct direct *ep, *nep; - int error, loc, spacefree; + int error, ret, blkoff, loc, spacefree, flags; char *dirbuf; + p = curproc; /* XXX */ + cr = p->p_ucred; + dp = VTOI(dvp); newentrysize = DIRSIZ(OFSFMT(dvp), dirp); @@ -672,36 +685,55 @@ ufs_direnter2(dvp, dirp, cr, p) * new entry into a fresh block. */ if (dp->i_offset & (DIRBLKSIZ - 1)) - panic("ufs_direnter2: newblk"); - auio.uio_offset = dp->i_offset; - dirp->d_reclen = DIRBLKSIZ; - auio.uio_resid = newentrysize; - aiov.iov_len = newentrysize; - aiov.iov_base = (caddr_t)dirp; - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - auio.uio_rw = UIO_WRITE; - auio.uio_segflg = UIO_SYSSPACE; - auio.uio_procp = (struct proc *)0; - error = VOP_WRITE(dvp, &auio, IO_SYNC, cr); - if (DIRBLKSIZ > - VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_bsize) - /* XXX should grow with balloc() */ - panic("ufs_direnter2: frag size"); - else if (!error) { - dp->i_size = roundup2(dp->i_size, DIRBLKSIZ); - dp->i_flag |= IN_CHANGE; + panic("ufs_direnter: newblk"); + flags = B_CLRBUF; + if (!DOINGSOFTDEP(dvp)) + flags |= B_SYNC; + if ((error = VOP_BALLOC(dvp, (off_t)dp->i_offset, DIRBLKSIZ, + cr, flags, &bp)) != 0) { + if (DOINGSOFTDEP(dvp) && newdirbp != NULL) + bdwrite(newdirbp); + return (error); } + dp->i_size = dp->i_offset + DIRBLKSIZ; + dp->i_flag |= IN_CHANGE | IN_UPDATE; + vnode_pager_setsize(dvp, (u_long)dp->i_size); + dirp->d_reclen = DIRBLKSIZ; + blkoff = dp->i_offset & + (VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_iosize - 1); + bcopy((caddr_t)dirp, (caddr_t)bp->b_data + blkoff,newentrysize); + if (DOINGSOFTDEP(dvp)) { + /* + * Ensure that the entire newly allocated block is a + * valid directory so that future growth within the + * block does not have to ensure that the block is + * written before the inode. + */ + blkoff += DIRBLKSIZ; + while (blkoff < bp->b_bcount) { + ((struct direct *) + (bp->b_data + blkoff))->d_reclen = DIRBLKSIZ; + blkoff += DIRBLKSIZ; + } + softdep_setup_directory_add(bp, dp, dp->i_offset, + dirp->d_ino, newdirbp); + bdwrite(bp); + } else { + error = VOP_BWRITE(bp); + } + ret = UFS_UPDATE(dvp, &time, &time, !DOINGSOFTDEP(dvp)); + if (error == 0) + return (ret); return (error); } /* - * If dp->i_count is non-zero, then namei found space - * for the new entry in the range dp->i_offset to - * dp->i_offset + dp->i_count in the directory. - * To use this space, we may have to compact the entries located - * there, by copying them together towards the beginning of the - * block, leaving the free space in one usable chunk at the end. + * If dp->i_count is non-zero, then namei found space for the new + * entry in the range dp->i_offset to dp->i_offset + dp->i_count + * in the directory. To use this space, we may have to compact + * the entries located there, by copying them together towards the + * beginning of the block, leaving the free space in one usable + * chunk at the end. */ /* @@ -717,14 +749,16 @@ ufs_direnter2(dvp, dirp, cr, p) * Get the block containing the space for the new directory entry. */ error = UFS_BLKATOFF(dvp, (off_t)dp->i_offset, &dirbuf, &bp); - if (error) + if (error) { + if (DOINGSOFTDEP(dvp) && newdirbp != NULL) + bdwrite(newdirbp); return (error); + } /* * Find space for the new entry. In the simple case, the entry at * offset base will have the space. If it does not, then namei * arranged that compacting the region dp->i_offset to - * dp->i_offset + dp->i_count would yield the - * space. + * dp->i_offset + dp->i_count would yield the space. */ ep = (struct direct *)dirbuf; dsize = DIRSIZ(OFSFMT(dvp), ep); @@ -742,7 +776,11 @@ ufs_direnter2(dvp, dirp, cr, p) dsize = DIRSIZ(OFSFMT(dvp), nep); spacefree += nep->d_reclen - dsize; loc += nep->d_reclen; - bcopy((caddr_t)nep, (caddr_t)ep, dsize); + if (DOINGSOFTDEP(dvp)) + softdep_change_directoryentry_offset(dp, dirbuf, + (caddr_t)nep, (caddr_t)ep, dsize); + else + bcopy((caddr_t)nep, (caddr_t)ep, dsize); } /* * Update the pointer fields in the previous entry (if any), @@ -752,26 +790,44 @@ ufs_direnter2(dvp, dirp, cr, p) (ep->d_ino == WINO && bcmp(ep->d_name, dirp->d_name, dirp->d_namlen) == 0)) { if (spacefree + dsize < newentrysize) - panic("ufs_direnter2: compact1"); + panic("ufs_direnter: compact1"); dirp->d_reclen = spacefree + dsize; } else { if (spacefree < newentrysize) - panic("ufs_direnter2: compact2"); + panic("ufs_direnter: compact2"); dirp->d_reclen = spacefree; ep->d_reclen = dsize; ep = (struct direct *)((char *)ep + dsize); } bcopy((caddr_t)dirp, (caddr_t)ep, (u_int)newentrysize); - if (dvp->v_mount->mnt_flag & MNT_ASYNC) { + if (DOINGSOFTDEP(dvp)) { + softdep_setup_directory_add(bp, dp, + dp->i_offset + (caddr_t)ep - dirbuf, dirp->d_ino, newdirbp); bdwrite(bp); - error = 0; } else { - error = bowrite(bp); + if (dvp->v_mount->mnt_flag & MNT_ASYNC) { + bdwrite(bp); + error = 0; + } else { + error = bowrite(bp); + } } dp->i_flag |= IN_CHANGE | IN_UPDATE; - if (!error && dp->i_endoff && dp->i_endoff < dp->i_size) - error = UFS_TRUNCATE(dvp, (off_t)dp->i_endoff, IO_SYNC, cr, p); + /* + * If all went well, and the directory can be shortened, proceed + * with the truncation. Note that we have to unlock the inode for + * the entry that we just entered, as the truncation may need to + * lock other inodes which can lead to deadlock if we also hold a + * lock on the newly entered node. + */ + if (error == 0 && dp->i_endoff && dp->i_endoff < dp->i_size) { + if (tvp != NULL) + VOP_UNLOCK(tvp, 0, p); + (void) UFS_TRUNCATE(dvp, (off_t)dp->i_endoff, IO_SYNC, cr, p); + if (tvp != NULL) + vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p); + } return (error); } @@ -788,18 +844,20 @@ ufs_direnter2(dvp, dirp, cr, p) * to the size of the previous entry. */ int -ufs_dirremove(dvp, cnp) +ufs_dirremove(dvp, ip, flags, isrmdir) struct vnode *dvp; - struct componentname *cnp; + struct inode *ip; + int flags; + int isrmdir; { - register struct inode *dp; + struct inode *dp; struct direct *ep; struct buf *bp; int error; dp = VTOI(dvp); - if (cnp->cn_flags & DOWHITEOUT) { + if (flags & DOWHITEOUT) { /* * Whiteout entry: set d_ino to WINO. */ @@ -808,24 +866,44 @@ ufs_dirremove(dvp, cnp) return (error); ep->d_ino = WINO; ep->d_type = DT_WHT; - error = VOP_BWRITE(bp); - dp->i_flag |= IN_CHANGE | IN_UPDATE; - return (error); + goto out; } + if ((error = UFS_BLKATOFF(dvp, + (off_t)(dp->i_offset - dp->i_count), (char **)&ep, &bp)) != 0) + return (error); if (dp->i_count == 0) { /* * First entry in block: set d_ino to zero. */ +#if 0 error = UFS_BLKATOFF(dvp, (off_t)dp->i_offset, (char **)&ep, &bp); if (error) return (error); +#endif ep->d_ino = 0; - error = bowrite(bp); - dp->i_flag |= IN_CHANGE | IN_UPDATE; - return (error); + } else { + /* + * Collapse new free space into previous entry. + */ + ep->d_reclen += dp->i_reclen; } +out: + if (ip) { + ip->i_effnlink--; + ip->i_flag |= IN_CHANGE; + } + if (DOINGSOFTDEP(dvp)) { + if (ip) + softdep_setup_remove(bp, dp, ip, isrmdir); + bdwrite(bp); + } else { + if (ip) + ip->i_nlink--; + error = bowrite(bp); /* maybe this should be as below? */ + } +#if 0 /* * Collapse new free space into previous entry. */ @@ -840,6 +918,7 @@ ufs_dirremove(dvp, cnp) } else { error = bowrite(bp); } +#endif dp->i_flag |= IN_CHANGE | IN_UPDATE; return (error); } @@ -850,9 +929,11 @@ ufs_dirremove(dvp, cnp) * set up by a call to namei. */ int -ufs_dirrewrite(dp, ip, cnp) - struct inode *dp, *ip; - struct componentname *cnp; +ufs_dirrewrite(dp, oip, newinum, newtype, isrmdir) + struct inode *dp, *oip; + ino_t newinum; + int newtype; + int isrmdir; { struct buf *bp; struct direct *ep; @@ -862,14 +943,22 @@ ufs_dirrewrite(dp, ip, cnp) error = UFS_BLKATOFF(vdp, (off_t)dp->i_offset, (char **)&ep, &bp); if (error) return (error); - ep->d_ino = ip->i_number; + ep->d_ino = newinum; if (!OFSFMT(vdp)) - ep->d_type = IFTODT(ip->i_mode); - if (vdp->v_mount->mnt_flag & MNT_ASYNC) { + ep->d_type = newtype; + oip->i_effnlink--; + oip->i_flag |= IN_CHANGE; + if (DOINGSOFTDEP(vdp)) { + softdep_setup_directory_change(bp, dp, oip, newinum, isrmdir); bdwrite(bp); - error = 0; } else { - error = bowrite(bp); + oip->i_nlink--; + if (vdp->v_mount->mnt_flag & MNT_ASYNC) { + bdwrite(bp); + error = 0; + } else { + error = bowrite(bp); + } } dp->i_flag |= IN_CHANGE | IN_UPDATE; return (error); @@ -929,7 +1018,7 @@ ufs_dirempty(ip, parentino, cred) * 1 implies ".", 2 implies ".." if second * char is also "." */ - if (namlen == 1) + if (namlen == 1 && dp->d_ino == ip->i_number) continue; if (dp->d_name[1] == '.' && dp->d_ino == parentino) continue; diff --git a/sys/ufs/ufs/ufs_quota.c b/sys/ufs/ufs/ufs_quota.c index da4641de9cf8..2eae865431c2 100644 --- a/sys/ufs/ufs/ufs_quota.c +++ b/sys/ufs/ufs/ufs_quota.c @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * @(#)ufs_quota.c 8.5 (Berkeley) 5/20/95 - * $Id: ufs_quota.c,v 1.18 1998/02/06 12:14:18 eivind Exp $ + * $Id: ufs_quota.c,v 1.19 1998/02/09 06:11:12 eivind Exp $ */ #include @@ -425,7 +425,7 @@ quotaon(p, mp, type, fname) again: for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nextvp) { nextvp = vp->v_mntvnodes.le_next; - if (vp->v_writecount == 0) + if (vp->v_type == VNON || vp->v_writecount == 0) continue; if (vget(vp, LK_EXCLUSIVE, p)) goto again; @@ -470,6 +470,8 @@ quotaoff(p, mp, type) again: for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nextvp) { nextvp = vp->v_mntvnodes.le_next; + if (vp->v_type == VNON) + continue; if (vget(vp, LK_EXCLUSIVE, p)) goto again; ip = VTOI(vp); @@ -657,6 +659,8 @@ qsync(mp) if (vp->v_mount != mp) goto again; nextvp = vp->v_mntvnodes.le_next; + if (vp->v_type == VNON) + continue; simple_lock(&vp->v_interlock); simple_unlock(&mntvnode_slock); error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p); diff --git a/sys/ufs/ufs/ufs_readwrite.c b/sys/ufs/ufs/ufs_readwrite.c index 6abb130c3527..e0a3488c42fa 100644 --- a/sys/ufs/ufs/ufs_readwrite.c +++ b/sys/ufs/ufs/ufs_readwrite.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)ufs_readwrite.c 8.11 (Berkeley) 5/8/95 - * $Id: ufs_readwrite.c,v 1.43 1998/02/26 06:39:50 msmith Exp $ + * $Id: ufs_readwrite.c,v 1.44 1998/03/07 21:36:42 dyson Exp $ */ #define BLKSIZE(a, b, c) blksize(a, b, c) @@ -338,10 +338,10 @@ WRITE(ap) flags |= B_CLRBUF; else flags &= ~B_CLRBUF; - - error = ffs_balloc(ip, - lbn, blkoffset + xfersize, ap->a_cred, &bp, flags); - if (error) +/* XXX is uio->uio_offset the right thing here? */ + error = VOP_BALLOC(vp, uio->uio_offset, xfersize, + ap->a_cred, flags, &bp); + if (error != 0) break; if (uio->uio_offset + xfersize > ip->i_size) { diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c index 28eae1419315..82a7cc34d95d 100644 --- a/sys/ufs/ufs/ufs_vnops.c +++ b/sys/ufs/ufs/ufs_vnops.c @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)ufs_vnops.c 8.27 (Berkeley) 5/27/95 - * $Id: ufs_vnops.c,v 1.77 1998/02/06 12:14:19 eivind Exp $ + * $Id: ufs_vnops.c,v 1.78 1998/02/09 06:11:14 eivind Exp $ */ #include "opt_quota.h" @@ -59,6 +59,8 @@ #include #include +#include +#include #include #include @@ -120,6 +122,18 @@ union _qcvt { (q) = tmp.qcvt; \ } +/* + * A virgin directory (no blushing please). + */ +static struct dirtemplate mastertemplate = { + 0, 12, DT_DIR, 1, ".", + 0, DIRBLKSIZ - 12, DT_DIR, 2, ".." +}; +static struct odirtemplate omastertemplate = { + 0, 12, 1, ".", + 0, DIRBLKSIZ - 12, 2, ".." +}; + /* * Create a regular file */ @@ -273,6 +287,8 @@ ufs_access(ap) return (error); #endif break; + default: + break; } } @@ -340,7 +356,7 @@ ufs_getattr(ap) vap->va_fsid = ip->i_dev; vap->va_fileid = ip->i_number; vap->va_mode = ip->i_mode & ~IFMT; - vap->va_nlink = ip->i_nlink; + vap->va_nlink = ip->i_effnlink; vap->va_uid = ip->i_uid; vap->va_gid = ip->i_gid; vap->va_rdev = (dev_t)ip->i_rdev; @@ -444,6 +460,8 @@ ufs_setattr(ap) if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); break; + default: + break; } if (error = UFS_TRUNCATE(vp, vap->va_size, 0, cred, p)) return (error); @@ -465,7 +483,7 @@ ufs_setattr(ap) atimeval.tv_usec = vap->va_atime.tv_nsec / 1000; mtimeval.tv_sec = vap->va_mtime.tv_sec; mtimeval.tv_usec = vap->va_mtime.tv_nsec / 1000; - error = UFS_UPDATE(vp, &atimeval, &mtimeval, 1); + error = UFS_UPDATE(vp, &atimeval, &mtimeval, 0); if (error) return (error); } @@ -652,11 +670,7 @@ ufs_remove(ap) error = EPERM; goto out; } - error = ufs_dirremove(dvp, ap->a_cnp); - if (error == 0) { - ip->i_nlink--; - ip->i_flag |= IN_CHANGE; - } + error = ufs_dirremove(dvp, ip, ap->a_cnp->cn_flags, 0); VN_POLLEVENT(vp, POLLNLINK); VN_POLLEVENT(dvp, POLLWRITE); out: @@ -685,6 +699,7 @@ ufs_link(ap) struct proc *p = cnp->cn_proc; struct inode *ip; struct timeval tv; + struct direct newdir; int error; #ifdef DIAGNOSTIC @@ -711,15 +726,20 @@ ufs_link(ap) error = EPERM; goto out1; } + ip->i_effnlink++; ip->i_nlink++; ip->i_flag |= IN_CHANGE; + if (DOINGSOFTDEP(vp)) + softdep_increase_linkcnt(ip); gettime(&tv); - error = UFS_UPDATE(vp, &tv, &tv, 1); + error = UFS_UPDATE(vp, &tv, &tv, !DOINGSOFTDEP(vp)); if (!error) { - error = ufs_direnter(ip, tdvp, cnp); + ufs_makedirentry(ip, cnp, &newdir); + error = ufs_direnter(tdvp, vp, &newdir, cnp, NULL); } if (error) { + ip->i_effnlink--; ip->i_nlink--; ip->i_flag |= IN_CHANGE; } @@ -770,7 +790,7 @@ ufs_whiteout(ap) newdir.d_namlen = cnp->cn_namelen; bcopy(cnp->cn_nameptr, newdir.d_name, (unsigned)cnp->cn_namelen + 1); newdir.d_type = DT_WHT; - error = ufs_direnter2(dvp, &newdir, cnp->cn_cred, cnp->cn_proc); + error = ufs_direnter(dvp, NULL, &newdir, cnp, NULL); break; case DELETE: @@ -781,8 +801,10 @@ ufs_whiteout(ap) #endif cnp->cn_flags &= ~DOWHITEOUT; - error = ufs_dirremove(dvp, cnp); + error = ufs_dirremove(dvp, NULL, cnp->cn_flags, 0); break; + default: + panic("ufs_whiteout: unknown op"); } if (cnp->cn_flags & HASBUF) { zfree(namei_zone, cnp->cn_pnbuf); @@ -834,11 +856,10 @@ ufs_rename(ap) struct componentname *fcnp = ap->a_fcnp; struct proc *p = fcnp->cn_proc; struct inode *ip, *xp, *dp; - struct dirtemplate dirbuf; + struct direct newdir; struct timeval tv; int doingdirectory = 0, oldparent = 0, newparent = 0; int error = 0; - u_char namlen; #ifdef DIAGNOSTIC if ((tcnp->cn_flags & HASBUF) == 0 || @@ -965,10 +986,13 @@ ufs_rename(ap) * completing our work, the link count * may be wrong, but correctable. */ + ip->i_effnlink++; ip->i_nlink++; ip->i_flag |= IN_CHANGE; + if (DOINGSOFTDEP(fvp)) + softdep_increase_linkcnt(ip); gettime(&tv); - if (error = UFS_UPDATE(fvp, &tv, &tv, 1)) { + if (error = UFS_UPDATE(fvp, &tv, &tv, !DOINGSOFTDEP(fvp))) { VOP_UNLOCK(fvp, 0, p); goto bad; } @@ -1027,15 +1051,20 @@ ufs_rename(ap) error = EMLINK; goto bad; } + dp->i_effnlink++; dp->i_nlink++; dp->i_flag |= IN_CHANGE; - error = UFS_UPDATE(tdvp, &tv, &tv, 1); + if (DOINGSOFTDEP(tdvp)) + softdep_increase_linkcnt(dp); + error = UFS_UPDATE(tdvp, &tv, &tv, !DOINGSOFTDEP(tdvp)); if (error) goto bad; } - error = ufs_direnter(ip, tdvp, tcnp); + ufs_makedirentry(ip, tcnp, &newdir); + error = ufs_direnter(tdvp, NULL, &newdir, tcnp, NULL); if (error) { if (doingdirectory && newparent) { + dp->i_effnlink--; dp->i_nlink--; dp->i_flag |= IN_CHANGE; (void)UFS_UPDATE(tdvp, &tv, &tv, 1); @@ -1070,9 +1099,8 @@ ufs_rename(ap) * (both directories, or both not directories). */ if ((xp->i_mode&IFMT) == IFDIR) { - if (! ufs_dirempty - (xp, dp->i_number, tcnp->cn_cred) || - xp->i_nlink > 2) { + if ((xp->i_effnlink > 2) || + !ufs_dirempty(xp, dp->i_number, tcnp->cn_cred)) { error = ENOTEMPTY; goto bad; } @@ -1085,40 +1113,37 @@ ufs_rename(ap) error = EISDIR; goto bad; } - error = ufs_dirrewrite(dp, ip, tcnp); + error = ufs_dirrewrite(dp, xp, ip->i_number, + IFTODT(ip->i_mode), doingdirectory); if (error) goto bad; - /* - * If the target directory is in the same - * directory as the source directory, - * decrement the link count on the parent - * of the target directory. - */ - if (doingdirectory && !newparent) { - dp->i_nlink--; + if (doingdirectory) { + dp->i_effnlink--; dp->i_flag |= IN_CHANGE; + xp->i_effnlink--; + xp->i_flag |= IN_CHANGE; } VN_POLLEVENT(tdvp, POLLWRITE); - vput(tdvp); - /* - * Adjust the link count of the target to - * reflect the dirrewrite above. If this is - * a directory it is empty and there are - * no links to it, so we can squash the inode and - * any space associated with it. We disallowed - * renaming over top of a directory with links to - * it above, as the remaining link would point to - * a directory without "." or ".." entries. - */ - xp->i_nlink--; - if (doingdirectory) { - if (--xp->i_nlink != 0) - panic("ufs_rename: linked directory"); - error = UFS_TRUNCATE(tvp, (off_t)0, IO_SYNC, - tcnp->cn_cred, tcnp->cn_proc); + if (doingdirectory && !DOINGSOFTDEP(tvp)) { + /* + * Truncate inode. The only stuff left in the directory + * is "." and "..". The "." reference is inconsequential + * since we are quashing it. We have removed the "." + * reference and the reference in the parent directory, + * but there may be other hard links. The soft + * dependency code will arrange to do these operations + * after the parent directory entry has been deleted on + * disk, so when running with that code we avoid doing + * them now. + */ + dp->i_nlink--; + xp->i_nlink--; + if ((error = UFS_TRUNCATE(tvp, (off_t)0, IO_SYNC, + tcnp->cn_cred, tcnp->cn_proc)) != 0) + goto bad; } - xp->i_flag |= IN_CHANGE; - VN_POLLEVENT(tvp, POLLNLINK); + vput(tdvp); + VN_POLLEVENT(tvp, POLLNLINK); /* XXX this right? */ vput(tvp); xp = NULL; } @@ -1151,10 +1176,9 @@ ufs_rename(ap) * changed while the new name has been entered. If the source is * a file then the entry may have been unlinked or renamed. In * either case there is no further work to be done. If the source - * is a directory then it cannot have been rmdir'ed; its link - * count of three would cause a rmdir to fail with ENOTEMPTY. - * The IN_RENAME flag ensures that it cannot be moved by another - * rename. + * is a directory then it cannot have been rmdir'ed; the IN_RENAME + * flag ensures that it cannot be moved by another rename or removed + * by a rmdir. */ if (xp != ip) { if (doingdirectory) @@ -1167,44 +1191,11 @@ ufs_rename(ap) * and ".." set to point to the new parent. */ if (doingdirectory && newparent) { - dp->i_nlink--; - dp->i_flag |= IN_CHANGE; - error = vn_rdwr(UIO_READ, fvp, (caddr_t)&dirbuf, - sizeof (struct dirtemplate), (off_t)0, - UIO_SYSSPACE, IO_NODELOCKED, - tcnp->cn_cred, (int *)0, (struct proc *)0); - if (error == 0) { -# if (BYTE_ORDER == LITTLE_ENDIAN) - if (fvp->v_mount->mnt_maxsymlinklen <= 0) - namlen = dirbuf.dotdot_type; - else - namlen = dirbuf.dotdot_namlen; -# else - namlen = dirbuf.dotdot_namlen; -# endif - if (namlen != 2 || - dirbuf.dotdot_name[0] != '.' || - dirbuf.dotdot_name[1] != '.') { - ufs_dirbad(xp, (doff_t)12, - "rename: mangled dir"); - } else { - dirbuf.dotdot_ino = newparent; - (void) vn_rdwr(UIO_WRITE, fvp, - (caddr_t)&dirbuf, - sizeof (struct dirtemplate), - (off_t)0, UIO_SYSSPACE, - IO_NODELOCKED|IO_SYNC, - tcnp->cn_cred, (int *)0, - (struct proc *)0); - cache_purge(fdvp); - } - } - } - error = ufs_dirremove(fdvp, fcnp); - if (!error) { - xp->i_nlink--; - xp->i_flag |= IN_CHANGE; + xp->i_offset = mastertemplate.dot_reclen; + ufs_dirrewrite(xp, dp, newparent, DT_DIR, 0); + cache_purge(fdvp); } + error = ufs_dirremove(fdvp, xp, fcnp->cn_flags, 0); xp->i_flag &= ~IN_RENAME; } if (dp) @@ -1222,6 +1213,7 @@ ufs_rename(ap) if (doingdirectory) ip->i_flag &= ~IN_RENAME; if (vn_lock(fvp, LK_EXCLUSIVE, p) == 0) { + ip->i_effnlink--; ip->i_nlink--; ip->i_flag |= IN_CHANGE; ip->i_flag &= ~IN_RENAME; @@ -1231,18 +1223,6 @@ ufs_rename(ap) return (error); } -/* - * A virgin directory (no blushing please). - */ -static struct dirtemplate mastertemplate = { - 0, 12, DT_DIR, 1, { '.', 0 }, - 0, DIRBLKSIZ - 12, DT_DIR, 2, { '.', '.', 0 } -}; -static struct odirtemplate omastertemplate = { - 0, 12, 1, { '.', 0 }, - 0, DIRBLKSIZ - 12, 2, { '.', '.', 0 } -}; - /* * Mkdir system call */ @@ -1260,7 +1240,9 @@ ufs_mkdir(ap) register struct componentname *cnp = ap->a_cnp; register struct inode *ip, *dp; struct vnode *tvp; + struct buf *bp; struct dirtemplate dirtemplate, *dtp; + struct direct newdir; struct timeval tv; int error, dmode; @@ -1348,25 +1330,31 @@ ufs_mkdir(ap) ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; ip->i_mode = dmode; tvp->v_type = VDIR; /* Rest init'd in getnewvnode(). */ + ip->i_effnlink = 2; ip->i_nlink = 2; + if (DOINGSOFTDEP(tvp)) + softdep_increase_linkcnt(ip); if (cnp->cn_flags & ISWHITEOUT) ip->i_flags |= UF_OPAQUE; - gettime(&tv); - error = UFS_UPDATE(tvp, &tv, &tv, 1); /* - * Bump link count in parent directory - * to reflect work done below. Should - * be done before reference is created - * so reparation is possible if we crash. + * Bump link count in parent directory to reflect work done below. + * Should be done before reference is created so cleanup is + * possible if we crash. */ + dp->i_effnlink++; dp->i_nlink++; dp->i_flag |= IN_CHANGE; - error = UFS_UPDATE(dvp, &tv, &tv, 1); + if (DOINGSOFTDEP(dvp)) + softdep_increase_linkcnt(dp); + gettime(&tv); + error = UFS_UPDATE(tvp, &tv, &tv, !DOINGSOFTDEP(dvp)); if (error) goto bad; - /* Initialize directory with "." and ".." from static template. */ + /* + * Initialize directory with "." and ".." from static template. + */ if (dvp->v_mount->mnt_maxsymlinklen > 0 ) dtp = &mastertemplate; @@ -1375,39 +1363,50 @@ ufs_mkdir(ap) dirtemplate = *dtp; dirtemplate.dot_ino = ip->i_number; dirtemplate.dotdot_ino = dp->i_number; - error = vn_rdwr(UIO_WRITE, tvp, (caddr_t)&dirtemplate, - sizeof (dirtemplate), (off_t)0, UIO_SYSSPACE, - IO_NODELOCKED|IO_SYNC, cnp->cn_cred, (int *)0, (struct proc *)0); - if (error) { - dp->i_nlink--; - dp->i_flag |= IN_CHANGE; + if ((error = VOP_BALLOC(tvp, (off_t)0, DIRBLKSIZ, cnp->cn_cred, + B_CLRBUF, &bp)) != 0) + goto bad; + ip->i_size = DIRBLKSIZ; + ip->i_flag |= IN_CHANGE | IN_UPDATE; + vnode_pager_setsize(tvp, (u_long)ip->i_size); + bcopy((caddr_t)&dirtemplate, (caddr_t)bp->b_data, sizeof dirtemplate); + if ((error = UFS_UPDATE(tvp, &tv, &tv, !DOINGSOFTDEP(tvp))) != 0) { + (void)VOP_BWRITE(bp); goto bad; } - if (DIRBLKSIZ > VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_bsize) - panic("ufs_mkdir: blksize"); /* XXX should grow with balloc() */ - else { - ip->i_size = DIRBLKSIZ; - ip->i_flag |= IN_CHANGE; - } - - /* Directory set up, now install it's entry in the parent directory. */ - error = ufs_direnter(ip, dvp, cnp); - if (error) { + VN_POLLEVENT(dvp, POLLWRITE); /* XXX right place? */ + /* + * Directory set up, now install it's entry in the parent directory. + * + * If we are not doing soft dependencies, then we must write out the + * buffer containing the new directory body before entering the new + * name in the parent. If we are doing soft dependencies, then the + * buffer containing the new directory body will be passed to and + * released in the soft dependency code after the code has attached + * an appropriate ordering dependency to the buffer which ensures that + * the buffer is written before the new name is written in the parent. + */ + if (!DOINGSOFTDEP(dvp) && ((error = VOP_BWRITE(bp)) != 0)) + goto bad; + ufs_makedirentry(ip, cnp, &newdir); + error = ufs_direnter(dvp, tvp, &newdir, cnp, bp); + +bad: + if (error == 0) { + *ap->a_vpp = tvp; + } else { + dp->i_effnlink--; dp->i_nlink--; dp->i_flag |= IN_CHANGE; - } - VN_POLLEVENT(dvp, POLLWRITE); -bad: - /* - * No need to do an explicit VOP_TRUNCATE here, vrele will do this - * for us because we set the link count to 0. - */ - if (error) { + /* + * No need to do an explicit VOP_TRUNCATE here, vrele will + * do this for us because we set the link count to 0. + */ + ip->i_effnlink = 0; ip->i_nlink = 0; ip->i_flag |= IN_CHANGE; vput(tvp); - } else - *ap->a_vpp = tvp; + } out: zfree(namei_zone, cnp->cn_pnbuf); vput(dvp); @@ -1435,14 +1434,17 @@ ufs_rmdir(ap) dp = VTOI(dvp); /* - * Verify the directory is empty (and valid). - * (Rmdir ".." won't be valid since - * ".." will contain a reference to - * the current directory and thus be - * non-empty.) + * Do not remove a directory that is in the process of being renamed. + * Verify the directory is empty (and valid). Rmdir ".." will not be + * valid since ".." will contain a reference to the current directory + * and thus be non-empty. */ error = 0; - if (ip->i_nlink != 2 || + if (ip->i_flag & IN_RENAME) { + error = EINVAL; + goto out; + } + if (ip->i_effnlink != 2 || !ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) { error = ENOTEMPTY; goto out; @@ -1457,34 +1459,36 @@ ufs_rmdir(ap) * inode. If we crash in between, the directory * will be reattached to lost+found, */ - error = ufs_dirremove(dvp, cnp); + error = ufs_dirremove(dvp, ip, cnp->cn_flags, 1); if (error) goto out; VN_POLLEVENT(dvp, POLLWRITE|POLLNLINK); - dp->i_nlink--; - dp->i_flag |= IN_CHANGE; cache_purge(dvp); - vput(dvp); - dvp = NULL; /* - * Truncate inode. The only stuff left - * in the directory is "." and "..". The - * "." reference is inconsequential since - * we're quashing it. The ".." reference - * has already been adjusted above. We've - * removed the "." reference and the reference - * in the parent directory, but there may be - * other hard links so decrement by 2 and - * worry about them later. + * Truncate inode. The only stuff left in the directory is "." and + * "..". The "." reference is inconsequential since we are quashing + * it. We have removed the "." reference and the reference in the + * parent directory, but there may be other hard links. So, + * ufs_dirremove will set the UF_IMMUTABLE flag to ensure that no + * new entries are made. The soft dependency code will arrange to + * do these operations after the parent directory entry has been + * deleted on disk, so when running with that code we avoid doing + * them now. */ - ip->i_nlink -= 2; - error = UFS_TRUNCATE(vp, (off_t)0, IO_SYNC, cnp->cn_cred, - cnp->cn_proc); - cache_purge(ITOV(ip)); - VN_POLLEVENT(vp, POLLNLINK); + dp->i_effnlink--; + dp->i_flag |= IN_CHANGE; + ip->i_effnlink--; + ip->i_flag |= IN_CHANGE; + if (!DOINGSOFTDEP(vp)) { + dp->i_nlink--; + ip->i_nlink--; + error = UFS_TRUNCATE(vp, (off_t)0, IO_SYNC, cnp->cn_cred, + cnp->cn_proc); + } + cache_purge(vp); out: - if (dvp) - vput(dvp); + vput(dvp); + VN_POLLEVENT(vp, POLLNLINK); vput(vp); return (error); } @@ -1974,7 +1978,7 @@ ufs_vinit(mntp, specops, fifoops, vpp) } if (ip->i_number == ROOTINO) - vp->v_flag |= VROOT; + vp->v_flag |= VROOT; /* * Initialize modrev times */ @@ -1995,6 +1999,7 @@ ufs_makeinode(mode, dvp, vpp, cnp) struct componentname *cnp; { register struct inode *ip, *pdir; + struct direct newdir; struct timeval tv; struct vnode *tvp; int error; @@ -2078,7 +2083,10 @@ ufs_makeinode(mode, dvp, vpp, cnp) ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; ip->i_mode = mode; tvp->v_type = IFTOVT(mode); /* Rest init'd in getnewvnode(). */ + ip->i_effnlink = 1; ip->i_nlink = 1; + if (DOINGSOFTDEP(tvp)) + softdep_increase_linkcnt(ip); if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred) && suser(cnp->cn_cred, NULL)) ip->i_mode &= ~ISGID; @@ -2090,10 +2098,11 @@ ufs_makeinode(mode, dvp, vpp, cnp) * Make sure inode goes to disk before directory entry. */ gettime(&tv); - error = UFS_UPDATE(tvp, &tv, &tv, 1); + error = UFS_UPDATE(tvp, &tv, &tv, !DOINGSOFTDEP(tvp)); if (error) goto bad; - error = ufs_direnter(ip, dvp, cnp); + ufs_makedirentry(ip, cnp, &newdir); + error = ufs_direnter(dvp, tvp, &newdir, cnp, NULL); if (error) goto bad; @@ -2110,6 +2119,7 @@ ufs_makeinode(mode, dvp, vpp, cnp) */ zfree(namei_zone, cnp->cn_pnbuf); vput(dvp); + ip->i_effnlink = 0; ip->i_nlink = 0; ip->i_flag |= IN_CHANGE; vput(tvp);