Make GEOM RAID more aggressive in marking volumes as clean on shutdown

and move that action from shutdown_pre_sync to shutdown_post_sync stage to avoid extra flapping. ZFS tends to not close devices on shutdown, that doesn't allow GEOM RAID to shutdown gracefully. To handle that, mark volume as clean just when shutdown time comes and there are no active writes. MFC after: 2 weeks
2012-10-29 14:18:54 +00:00 · 2012-10-29 14:18:54 +00:00 · fa229bcba8
commit fa229bcba8
parent 844d4d2472
1 changed files with 20 additions and 17 deletions
--- a/sys/geom/raid/g_raid.c
+++ b/sys/geom/raid/g_raid.c
@ -108,8 +108,9 @@ LIST_HEAD(, g_raid_tr_class) g_raid_tr_classes =
 LIST_HEAD(, g_raid_volume) g_raid_volumes =
    LIST_HEAD_INITIALIZER(g_raid_volumes);

-static eventhandler_tag g_raid_pre_sync = NULL;
+static eventhandler_tag g_raid_post_sync = NULL;
 static int g_raid_started = 0;
+static int g_raid_shutdown = 0;

 static int g_raid_destroy_geom(struct gctl_req *req, struct g_class *mp,
    struct g_geom *gp);
@ -881,7 +882,7 @@ g_raid_orphan(struct g_consumer *cp)
 	    G_RAID_EVENT_DISK);
 }

-static int
+static void
 g_raid_clean(struct g_raid_volume *vol, int acw)
 {
 	struct g_raid_softc *sc;
@ -892,22 +893,21 @@ g_raid_clean(struct g_raid_volume *vol, int acw)
 	sx_assert(&sc->sc_lock, SX_XLOCKED);

 //	if ((sc->sc_flags & G_RAID_DEVICE_FLAG_NOFAILSYNC) != 0)
-//		return (0);
+//		return;
 	if (!vol->v_dirty)
-		return (0);
+		return;
 	if (vol->v_writes > 0)
-		return (0);
+		return;
 	if (acw > 0 || (acw == -1 &&
 	    vol->v_provider != NULL && vol->v_provider->acw > 0)) {
 		timeout = g_raid_clean_time - (time_uptime - vol->v_last_write);
-		if (timeout > 0)
-			return (timeout);
+		if (!g_raid_shutdown && timeout > 0)
+			return;
 	}
 	vol->v_dirty = 0;
 	G_RAID_DEBUG1(1, sc, "Volume %s marked as clean.",
 	    vol->v_name);
 	g_raid_write_metadata(sc, vol, NULL, NULL);
-	return (0);
 }

 static void
@ -1520,8 +1520,7 @@ g_raid_worker(void *arg)
 				g_raid_disk_done_request(bp);
 		} else if (rv == EWOULDBLOCK) {
 			TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
-				if (vol->v_writes == 0 && vol->v_dirty)
-					g_raid_clean(vol, -1);
+				g_raid_clean(vol, -1);
 				if (bioq_first(&vol->v_inflight) == NULL &&
 				    vol->v_tr) {
 					t.tv_sec = g_raid_idle_threshold / 1000000;
@ -1783,7 +1782,7 @@ g_raid_access(struct g_provider *pp, int acr, int acw, int ace)
 		error = ENXIO;
 		goto out;
 	}
-	if (dcw == 0 && vol->v_dirty)
+	if (dcw == 0)
 		g_raid_clean(vol, dcw);
 	vol->v_provider_open += acr + acw + ace;
 	/* Handle delayed node destruction. */
@ -2379,21 +2378,25 @@ g_raid_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
 }

 static void
-g_raid_shutdown_pre_sync(void *arg, int howto)
+g_raid_shutdown_post_sync(void *arg, int howto)
 {
 	struct g_class *mp;
 	struct g_geom *gp, *gp2;
 	struct g_raid_softc *sc;
+	struct g_raid_volume *vol;
 	int error;

 	mp = arg;
 	DROP_GIANT();
 	g_topology_lock();
+	g_raid_shutdown = 1;
 	LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
 		if ((sc = gp->softc) == NULL)
 			continue;
 		g_topology_unlock();
 		sx_xlock(&sc->sc_lock);
+		TAILQ_FOREACH(vol, &sc->sc_volumes, v_next)
+			g_raid_clean(vol, -1);
 		g_cancel_event(sc);
 		error = g_raid_destroy(sc, G_RAID_DESTROY_DELAYED);
 		if (error != 0)
@ -2408,9 +2411,9 @@ static void
 g_raid_init(struct g_class *mp)
 {

-	g_raid_pre_sync = EVENTHANDLER_REGISTER(shutdown_pre_sync,
-	    g_raid_shutdown_pre_sync, mp, SHUTDOWN_PRI_FIRST);
-	if (g_raid_pre_sync == NULL)
+	g_raid_post_sync = EVENTHANDLER_REGISTER(shutdown_post_sync,
+	    g_raid_shutdown_post_sync, mp, SHUTDOWN_PRI_FIRST);
+	if (g_raid_post_sync == NULL)
 		G_RAID_DEBUG(0, "Warning! Cannot register shutdown event.");
 	g_raid_started = 1;
 }
@ -2419,8 +2422,8 @@ static void
 g_raid_fini(struct g_class *mp)
 {

-	if (g_raid_pre_sync != NULL)
-		EVENTHANDLER_DEREGISTER(shutdown_pre_sync, g_raid_pre_sync);
+	if (g_raid_post_sync != NULL)
+		EVENTHANDLER_DEREGISTER(shutdown_post_sync, g_raid_post_sync);
 	g_raid_started = 0;
 }