Suspend all writeable local filesystems on power suspend.

This ensures that no writes are pending in memory, either metadata or
user data, but not including dirty pages not yet converted to fs writes.

Only filesystems declared local are suspended.

Note that this does not guarantee absence of the metadata errors or
leaks if resume is not done: for instance, on UFS unlinked but opened
inodes are leaked and require fsck to gc.

Reviewed by:	markj
Discussed with:	imp
Tested by:	imp (previous version), pho
Sponsored by:	The FreeBSD Foundation
MFC after:	2 weeks
Differential revision:	https://reviews.freebsd.org/D27054
This commit is contained in:
Konstantin Belousov 2020-11-05 20:52:49 +00:00
parent 6a32dae2b7
commit f10845877e
4 changed files with 74 additions and 0 deletions

View File

@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$");
#include <sys/sysctl.h>
#include <sys/ctype.h>
#include <sys/linker.h>
#include <sys/mount.h>
#include <sys/power.h>
#include <sys/sbuf.h>
#include <sys/sched.h>
@ -3081,6 +3082,7 @@ acpi_EnterSleepState(struct acpi_softc *sc, int state)
EVENTHANDLER_INVOKE(power_suspend_early);
stop_all_proc();
suspend_all_fs();
EVENTHANDLER_INVOKE(power_suspend);
#ifdef EARLY_AP_STARTUP
@ -3240,6 +3242,7 @@ backout:
}
#endif
resume_all_fs();
resume_all_proc();
EVENTHANDLER_INVOKE(power_resume);

View File

@ -113,6 +113,7 @@ __FBSDID("$FreeBSD$");
#include <sys/filedesc.h>
#include <sys/kdb.h>
#include <sys/module.h>
#include <sys/mount.h>
#include <sys/namei.h>
#include <sys/proc.h>
#include <sys/reboot.h>
@ -204,6 +205,7 @@ xctrl_suspend()
xs_lock();
stop_all_proc();
xs_unlock();
suspend_all_fs();
EVENTHANDLER_INVOKE(power_suspend);
#ifdef EARLY_AP_STARTUP
@ -317,6 +319,7 @@ xctrl_suspend()
}
#endif
resume_all_fs();
resume_all_proc();
EVENTHANDLER_INVOKE(power_resume);

View File

@ -2507,3 +2507,67 @@ mount_devctl_event(const char *type, struct mount *mp, bool donew)
sbuf_delete(&sb);
free(buf, M_MOUNT);
}
/*
* Suspend write operations on all local writeable filesystems. Does
* full sync of them in the process.
*
* Iterate over the mount points in reverse order, suspending most
* recently mounted filesystems first. It handles a case where a
* filesystem mounted from a md(4) vnode-backed device should be
* suspended before the filesystem that owns the vnode.
*/
void
suspend_all_fs(void)
{
struct mount *mp;
int error;
mtx_lock(&mountlist_mtx);
TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
error = vfs_busy(mp, MBF_MNTLSTLOCK | MBF_NOWAIT);
if (error != 0)
continue;
if ((mp->mnt_flag & (MNT_RDONLY | MNT_LOCAL)) != MNT_LOCAL ||
(mp->mnt_kern_flag & MNTK_SUSPEND) != 0) {
mtx_lock(&mountlist_mtx);
vfs_unbusy(mp);
continue;
}
error = vfs_write_suspend(mp, 0);
if (error == 0) {
MNT_ILOCK(mp);
MPASS((mp->mnt_kern_flag & MNTK_SUSPEND_ALL) == 0);
mp->mnt_kern_flag |= MNTK_SUSPEND_ALL;
MNT_IUNLOCK(mp);
mtx_lock(&mountlist_mtx);
} else {
printf("suspend of %s failed, error %d\n",
mp->mnt_stat.f_mntonname, error);
mtx_lock(&mountlist_mtx);
vfs_unbusy(mp);
}
}
mtx_unlock(&mountlist_mtx);
}
void
resume_all_fs(void)
{
struct mount *mp;
mtx_lock(&mountlist_mtx);
TAILQ_FOREACH(mp, &mountlist, mnt_list) {
if ((mp->mnt_kern_flag & MNTK_SUSPEND_ALL) == 0)
continue;
mtx_unlock(&mountlist_mtx);
MNT_ILOCK(mp);
MPASS((mp->mnt_kern_flag & MNTK_SUSPEND) != 0);
mp->mnt_kern_flag &= ~MNTK_SUSPEND_ALL;
MNT_IUNLOCK(mp);
vfs_write_resume(mp, 0);
mtx_lock(&mountlist_mtx);
vfs_unbusy(mp);
}
mtx_unlock(&mountlist_mtx);
}

View File

@ -463,6 +463,7 @@ struct mntoptnames {
#define MNTK_VMSETSIZE_BUG 0x00010000
#define MNTK_UNIONFS 0x00020000 /* A hack for F_ISUNIONSTACK */
#define MNTK_FPLOOKUP 0x00040000 /* fast path lookup is supported */
#define MNTK_SUSPEND_ALL 0x00080000 /* Suspended by all-fs suspension */
#define MNTK_NOASYNC 0x00800000 /* disable async */
#define MNTK_UNMOUNT 0x01000000 /* unmount in progress */
#define MNTK_MWAIT 0x02000000 /* waiting for unmount to finish */
@ -1048,6 +1049,9 @@ void vfs_dump_mount_counters(struct mount *);
enum mount_counter { MNT_COUNT_REF, MNT_COUNT_LOCKREF, MNT_COUNT_WRITEOPCOUNT };
int vfs_mount_fetch_counter(struct mount *, enum mount_counter);
void suspend_all_fs(void);
void resume_all_fs(void);
/*
* Code transitioning mnt_vfs_ops to > 0 issues IPIs until it observes
* all CPUs not executing code enclosed by mnt_thread_in_ops_pcpu.