From ec79f43b0ff90fd24dee5391604ec49eb751347f Mon Sep 17 00:00:00 2001 From: kib Date: Thu, 5 Nov 2020 20:52:49 +0000 Subject: [PATCH] Suspend all writeable local filesystems on power suspend. This ensures that no writes are pending in memory, either metadata or user data, but not including dirty pages not yet converted to fs writes. Only filesystems declared local are suspended. Note that this does not guarantee absence of the metadata errors or leaks if resume is not done: for instance, on UFS unlinked but opened inodes are leaked and require fsck to gc. Reviewed by: markj Discussed with: imp Tested by: imp (previous version), pho Sponsored by: The FreeBSD Foundation MFC after: 2 weeks Differential revision: https://reviews.freebsd.org/D27054 --- sys/dev/acpica/acpi.c | 3 ++ sys/dev/xen/control/control.c | 3 ++ sys/kern/vfs_mount.c | 64 +++++++++++++++++++++++++++++++++++ sys/sys/mount.h | 4 +++ 4 files changed, 74 insertions(+) diff --git a/sys/dev/acpica/acpi.c b/sys/dev/acpica/acpi.c index 6e8e12e42f75..55075828a224 100644 --- a/sys/dev/acpica/acpi.c +++ b/sys/dev/acpica/acpi.c @@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -3081,6 +3082,7 @@ acpi_EnterSleepState(struct acpi_softc *sc, int state) EVENTHANDLER_INVOKE(power_suspend_early); stop_all_proc(); + suspend_all_fs(); EVENTHANDLER_INVOKE(power_suspend); #ifdef EARLY_AP_STARTUP @@ -3240,6 +3242,7 @@ acpi_EnterSleepState(struct acpi_softc *sc, int state) } #endif + resume_all_fs(); resume_all_proc(); EVENTHANDLER_INVOKE(power_resume); diff --git a/sys/dev/xen/control/control.c b/sys/dev/xen/control/control.c index 8ad398bf986b..e985d56cda5a 100644 --- a/sys/dev/xen/control/control.c +++ b/sys/dev/xen/control/control.c @@ -113,6 +113,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -204,6 +205,7 @@ xctrl_suspend() xs_lock(); stop_all_proc(); xs_unlock(); + suspend_all_fs(); EVENTHANDLER_INVOKE(power_suspend); #ifdef EARLY_AP_STARTUP @@ -317,6 +319,7 @@ xctrl_suspend() } #endif + resume_all_fs(); resume_all_proc(); EVENTHANDLER_INVOKE(power_resume); diff --git a/sys/kern/vfs_mount.c b/sys/kern/vfs_mount.c index 08190d432a93..7ab3b6274491 100644 --- a/sys/kern/vfs_mount.c +++ b/sys/kern/vfs_mount.c @@ -2507,3 +2507,67 @@ mount_devctl_event(const char *type, struct mount *mp, bool donew) sbuf_delete(&sb); free(buf, M_MOUNT); } + +/* + * Suspend write operations on all local writeable filesystems. Does + * full sync of them in the process. + * + * Iterate over the mount points in reverse order, suspending most + * recently mounted filesystems first. It handles a case where a + * filesystem mounted from a md(4) vnode-backed device should be + * suspended before the filesystem that owns the vnode. + */ +void +suspend_all_fs(void) +{ + struct mount *mp; + int error; + + mtx_lock(&mountlist_mtx); + TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) { + error = vfs_busy(mp, MBF_MNTLSTLOCK | MBF_NOWAIT); + if (error != 0) + continue; + if ((mp->mnt_flag & (MNT_RDONLY | MNT_LOCAL)) != MNT_LOCAL || + (mp->mnt_kern_flag & MNTK_SUSPEND) != 0) { + mtx_lock(&mountlist_mtx); + vfs_unbusy(mp); + continue; + } + error = vfs_write_suspend(mp, 0); + if (error == 0) { + MNT_ILOCK(mp); + MPASS((mp->mnt_kern_flag & MNTK_SUSPEND_ALL) == 0); + mp->mnt_kern_flag |= MNTK_SUSPEND_ALL; + MNT_IUNLOCK(mp); + mtx_lock(&mountlist_mtx); + } else { + printf("suspend of %s failed, error %d\n", + mp->mnt_stat.f_mntonname, error); + mtx_lock(&mountlist_mtx); + vfs_unbusy(mp); + } + } + mtx_unlock(&mountlist_mtx); +} + +void +resume_all_fs(void) +{ + struct mount *mp; + + mtx_lock(&mountlist_mtx); + TAILQ_FOREACH(mp, &mountlist, mnt_list) { + if ((mp->mnt_kern_flag & MNTK_SUSPEND_ALL) == 0) + continue; + mtx_unlock(&mountlist_mtx); + MNT_ILOCK(mp); + MPASS((mp->mnt_kern_flag & MNTK_SUSPEND) != 0); + mp->mnt_kern_flag &= ~MNTK_SUSPEND_ALL; + MNT_IUNLOCK(mp); + vfs_write_resume(mp, 0); + mtx_lock(&mountlist_mtx); + vfs_unbusy(mp); + } + mtx_unlock(&mountlist_mtx); +} diff --git a/sys/sys/mount.h b/sys/sys/mount.h index 06646f80d08b..ee8d916c2432 100644 --- a/sys/sys/mount.h +++ b/sys/sys/mount.h @@ -463,6 +463,7 @@ struct mntoptnames { #define MNTK_VMSETSIZE_BUG 0x00010000 #define MNTK_UNIONFS 0x00020000 /* A hack for F_ISUNIONSTACK */ #define MNTK_FPLOOKUP 0x00040000 /* fast path lookup is supported */ +#define MNTK_SUSPEND_ALL 0x00080000 /* Suspended by all-fs suspension */ #define MNTK_NOASYNC 0x00800000 /* disable async */ #define MNTK_UNMOUNT 0x01000000 /* unmount in progress */ #define MNTK_MWAIT 0x02000000 /* waiting for unmount to finish */ @@ -1048,6 +1049,9 @@ void vfs_dump_mount_counters(struct mount *); enum mount_counter { MNT_COUNT_REF, MNT_COUNT_LOCKREF, MNT_COUNT_WRITEOPCOUNT }; int vfs_mount_fetch_counter(struct mount *, enum mount_counter); +void suspend_all_fs(void); +void resume_all_fs(void); + /* * Code transitioning mnt_vfs_ops to > 0 issues IPIs until it observes * all CPUs not executing code enclosed by mnt_thread_in_ops_pcpu.